Pārlūkot izejas kodu

Ajout de plusieurs méthodes pour filtrer Les messages

feature/message_filters
Figg 9 mēnešus atpakaļ
vecāks
revīzija
a6aba9102a
2 mainītis faili ar 47 papildinājumiem un 8 dzēšanām
  1. 45
    5
      million/analyze/word_finder.py
  2. 2
    3
      scripts/find_gromots.py

+ 45
- 5
million/analyze/word_finder.py Parādīt failu

@@ -1,11 +1,51 @@
1
+from datetime import date
1 2
 import re
3
+import million.analyze.message_evaluation as msg_val
2 4
 from typing import List
3 5
 from million.model.message import Message
4 6
 
5 7
 
6
-def _wordFilter(msg: Message, words: List[str]) -> bool:
7
-    rgx = r"(\b"+ r"\b|\b".join(words) + r"\b)"
8
-    return msg.content and re.search(rgx, msg.content, re.I)
8
+def filter_words(messages: List[Message], words: List[str]) -> List[Message]:
9
+    """
10
+    Return every message containg the given words, you can use regex syntax inside your words
11
+    i.e. find_words(messages, ["dogs?","m(ous|ic)e"])
12
+    will search for : dog, dogs, mouse, mice
13
+    """
14
+    r_words = [rf"\b{w}\b" for w in words]
15
+    rgx = "(" + "|".join(r_words) + ")"
16
+    return [m for m in messages if m.content and re.search(rgx, m.content, re.I)]
17
+
18
+
19
+def filter_value(messages: List[Message], val: int) -> List[Message]:
20
+    """
21
+    Return every message whose value is evaluated to the given val
22
+    """
23
+    return [m for m in messages if msg_val.get(m) == val]
24
+
25
+
26
+def filter_date(messages: List[Message], other: date) -> List[Message]:
27
+    """
28
+    Return every message posted on the given date
29
+    """
30
+    return [m for m in messages if m.date_time.date() == other]
31
+
32
+
33
+def filter_neighbours(
34
+    messages: List[Message], msg: Message, amplitude: int = 10
35
+) -> List[Message]:
36
+    """
37
+    Return the messages posted just before and after the given one.
38
+    amplitude will indicate how many messages to look for in each direction
39
+    """
40
+    idx = messages.index(msg)
41
+    start_index = max(0, idx - amplitude)
42
+    end_index = min(len(messages), idx + amplitude + 1)
43
+    return messages[start_index:end_index]
44
+
45
+
46
+def retain_counts(messages: List[Message]) -> List[Message]:
47
+    """
48
+    Retain only the messages for which are considered having a counted value
49
+    """
50
+    return [msg for msg in messages if msg_val.get(msg)]
9 51
 
10
-def findWords(messages: List[Message], words: List[str]) -> List[Message]:
11
-    return filter(lambda m: _wordFilter(m, words), messages)

+ 2
- 3
scripts/find_gromots.py Parādīt failu

@@ -1,5 +1,4 @@
1
-from datetime import datetime
2
-from million.analyze.word_finder import find_words
1
+from million.analyze.word_finder import filter_words
3 2
 import million.parse.fb_exports as fb
4 3
 
5 4
 
@@ -23,7 +22,7 @@ gros_mots = [
23 22
     ]
24 23
 
25 24
 export = fb.parse_dirfiles(DATA_PATH)
26
-msg_gros_mots = find_words(export.messages, gros_mots)
25
+msg_gros_mots = filter_words(export.messages, gros_mots)
27 26
 
28 27
 msg_gros_mots_grp = {}
29 28
 

Notiek ielāde…
Atcelt
Saglabāt