|
@@ -1,11 +1,51 @@
|
|
1
|
+from datetime import date
|
1
|
2
|
import re
|
|
3
|
+import million.analyze.message_evaluation as msg_val
|
2
|
4
|
from typing import List
|
3
|
5
|
from million.model.message import Message
|
4
|
6
|
|
5
|
7
|
|
6
|
|
-def _wordFilter(msg: Message, words: List[str]) -> bool:
|
7
|
|
- rgx = r"(\b"+ r"\b|\b".join(words) + r"\b)"
|
8
|
|
- return msg.content and re.search(rgx, msg.content, re.I)
|
|
8
|
+def filter_words(messages: List[Message], words: List[str]) -> List[Message]:
|
|
9
|
+ """
|
|
10
|
+ Return every message containg the given words, you can use regex syntax inside your words
|
|
11
|
+ i.e. find_words(messages, ["dogs?","m(ous|ic)e"])
|
|
12
|
+ will search for : dog, dogs, mouse, mice
|
|
13
|
+ """
|
|
14
|
+ r_words = [rf"\b{w}\b" for w in words]
|
|
15
|
+ rgx = "(" + "|".join(r_words) + ")"
|
|
16
|
+ return [m for m in messages if m.content and re.search(rgx, m.content, re.I)]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+def filter_value(messages: List[Message], val: int) -> List[Message]:
|
|
20
|
+ """
|
|
21
|
+ Return every message whose value is evaluated to the given val
|
|
22
|
+ """
|
|
23
|
+ return [m for m in messages if msg_val.get(m) == val]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+def filter_date(messages: List[Message], other: date) -> List[Message]:
|
|
27
|
+ """
|
|
28
|
+ Return every message posted on the given date
|
|
29
|
+ """
|
|
30
|
+ return [m for m in messages if m.date_time.date() == other]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+def filter_neighbours(
|
|
34
|
+ messages: List[Message], msg: Message, amplitude: int = 10
|
|
35
|
+) -> List[Message]:
|
|
36
|
+ """
|
|
37
|
+ Return the messages posted just before and after the given one.
|
|
38
|
+ amplitude will indicate how many messages to look for in each direction
|
|
39
|
+ """
|
|
40
|
+ idx = messages.index(msg)
|
|
41
|
+ start_index = max(0, idx - amplitude)
|
|
42
|
+ end_index = min(len(messages), idx + amplitude + 1)
|
|
43
|
+ return messages[start_index:end_index]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+def retain_counts(messages: List[Message]) -> List[Message]:
|
|
47
|
+ """
|
|
48
|
+ Retain only the messages for which are considered having a counted value
|
|
49
|
+ """
|
|
50
|
+ return [msg for msg in messages if msg_val.get(msg)]
|
9
|
51
|
|
10
|
|
-def findWords(messages: List[Message], words: List[str]) -> List[Message]:
|
11
|
|
- return filter(lambda m: _wordFilter(m, words), messages)
|