Parcourir la source

Des utilitaires pour filtrer les messages et classer les participants

feat/year-wrapped
Elias Sebbar il y a 2 semaines
Parent
révision
7ffa5f8910

+ 0
- 40
million/analyze/filter.py Voir le fichier

@@ -1,40 +0,0 @@
1
-from __future__ import annotations
2
-from datetime import datetime
3
-from typing import List
4
-from million.model.filter.MessageFilter import MessageFilter
5
-from million.model.message import Message
6
-
7
-
8
-def before(message: Message, date: datetime):
9
-    return message.date < date
10
-
11
-
12
-def after(message: Message, date: datetime):
13
-    return message.date > date
14
-
15
-
16
-def contains(message: Message, content: str):
17
-    return message.content is not None and content.lower() in message.content.lower()
18
-
19
-
20
-def is_number(message: Message, is_number: bool):
21
-    return message.content.isdigit() == is_number
22
-
23
-
24
-def filter(messages: List[Message], criterias: MessageFilter):
25
-    print(criterias)
26
-    conditions = [
27
-        [before, criterias.before] if criterias.before else None,
28
-        [after, criterias.after] if criterias.after else None,
29
-        [contains, criterias.contains] if criterias.contains else None,
30
-        [is_number, criterias.is_number] if criterias.is_number else None
31
-    ]
32
-
33
-    return [
34
-        message for message in messages
35
-        if all(
36
-            condition[0](message, condition[1])
37
-            for condition in conditions
38
-            if condition
39
-        )
40
-    ]

+ 93
- 0
million/analyze/filter/message.py Voir le fichier

@@ -0,0 +1,93 @@
1
+from __future__ import annotations
2
+from datetime import datetime
3
+import functools
4
+from typing import Any, Callable, List
5
+from million.analyze.retain_counts import is_count
6
+from million.model.filter.MessageFilter import MessageFilter, MessageOrder
7
+from million.model.message import Message
8
+
9
+
10
+def before(message: Message, date: datetime):
11
+    return message.date < date
12
+
13
+
14
+def after(message: Message, date: datetime):
15
+    return message.date > date
16
+
17
+
18
+def contains(message: Message, content: str):
19
+    return message.content is not None and content.lower() in message.content.lower()
20
+
21
+
22
+def is_number(message: Message, is_number: bool):
23
+    return is_count(message) == is_number
24
+
25
+
26
+def sender_name(message: Message, sender_name: str):
27
+    return message.sender_name == sender_name
28
+
29
+
30
+def having_reactions(message: Message, having_reactions: bool):
31
+    return bool(message.reactions) == having_reactions
32
+
33
+
34
+def list_to_tuple(function: Callable) -> Any:
35
+    """Custom decorator function, to convert list to a tuple."""
36
+
37
+    def wrapper(*args, **kwargs) -> Any:
38
+        args = tuple(tuple(x) if isinstance(x, list) else x for x in args)
39
+        kwargs = {k: tuple(v) if isinstance(v, list)
40
+                  else v for k, v in kwargs.items()}
41
+        result = function(*args, **kwargs)
42
+        result = tuple(result) if isinstance(result, list) else result
43
+        return result
44
+
45
+    return wrapper
46
+
47
+
48
+def order_reaction(messages: List[Message]):
49
+    return sorted(
50
+        messages,
51
+        key=lambda x: len(x.reactions) if x.reactions else 0,
52
+        reverse=True)
53
+
54
+
55
+def order_date(messages: List[Message]):
56
+    return sorted(messages, key=lambda x: x.date, reverse=True)
57
+
58
+
59
+@list_to_tuple
60
+@functools.lru_cache(maxsize=10_000)
61
+def filter(messages: List[Message], criterias: MessageFilter = None, order: MessageOrder = None, limit: int = None) -> List[Message]:
62
+    print(criterias)
63
+    conditions = [
64
+        [sender_name, criterias.sender_name] if criterias.sender_name else None,
65
+        [after, criterias.after] if criterias.after else None,
66
+        [before, criterias.before] if criterias.before else None,
67
+        [contains, criterias.contains] if criterias.contains else None,
68
+        [is_number, criterias.is_number] if criterias.is_number is not None else None,
69
+        [having_reactions, criterias.having_reactions] if criterias.having_reactions is not None else None,
70
+    ]
71
+
72
+    filtered = [
73
+        message for message in messages
74
+        if all(
75
+            condition[0](message, condition[1])
76
+            for condition in conditions
77
+            if condition
78
+        )
79
+    ]
80
+
81
+    if order == MessageOrder.reactions:
82
+        filtered = order_reaction(filtered)
83
+    elif order == MessageOrder.date:
84
+        filtered = order_date(filtered)
85
+
86
+    if limit:
87
+        filtered = filtered[:limit]
88
+
89
+    return filtered
90
+
91
+
92
+def count(messages: List[Message], criterias: MessageFilter):
93
+    return len(filter(messages, criterias))

+ 46
- 0
million/analyze/filter/participants.py Voir le fichier

@@ -0,0 +1,46 @@
1
+
2
+
3
+from typing import List
4
+
5
+from pydantic import BaseModel
6
+from million.model.filter.MessageFilter import MessageFilter
7
+from million.model.filter.ParticipantFilter import OrderDirection, ParticipantFilter
8
+from million.model.message import Message
9
+from million.model.participant import Participant
10
+from million.analyze.filter.message import filter as message_filter
11
+
12
+
13
+class ParticipantCount(BaseModel):
14
+    participant: Participant
15
+    count: int
16
+
17
+
18
+def order_count(participants: List[Participant], messages: List[Message], criterias: MessageFilter = None, direction: OrderDirection = OrderDirection.desc) -> ParticipantCount:
19
+    result = [
20
+        ParticipantCount(
21
+            participant=participant,
22
+            count=len(
23
+                message_filter(
24
+                    messages,
25
+                    criterias=MessageFilter(**{
26
+                        **(criterias.model_dump() if criterias else dict()),
27
+                        "sender_name": participant.name
28
+                    })
29
+                )
30
+            ))
31
+
32
+        for participant in participants
33
+    ]
34
+    result.sort(key=lambda p: p.count, reverse=True)
35
+    return [result for result in result]
36
+
37
+
38
+def filter(messages: List[Message], participants: List[Participant], criterias: ParticipantFilter) -> List[ParticipantCount]:
39
+    result = [
40
+        participant for participant in participants
41
+    ]
42
+
43
+    result = order_count(
44
+        result, messages, criterias=criterias, direction=criterias.order_dir)
45
+
46
+    return result

+ 11
- 5
million/analyze/retain_counts.py Voir le fichier

@@ -4,12 +4,18 @@ from typing import List
4 4
 from million.model.message import Message
5 5
 
6 6
 
7
-def retain_counts(messages : List[Message])-> List[Message]:
7
+def is_count(message: Message) -> bool:
8
+    """
9
+    Check if the message is a count
10
+    """
11
+    return message.content is not None and bool(re.search('(\d{2,}|^\d$)', message.content))
12
+
13
+
14
+def retain_counts(messages: List[Message]) -> List[Message]:
8 15
     """
9 16
     Retain only the messages that have a content
10 17
     """
11 18
     return [
12
-        m for m in messages 
13
-        if m.content and
14
-        re.search('(\d{2,}|^\d$)', m.content)
15
-        ]
19
+        m for m in messages
20
+        if is_count(m)
21
+    ]

+ 19
- 4
million/http/app.py Voir le fichier

@@ -1,8 +1,11 @@
1 1
 from typing import Annotated, List
2 2
 from fastapi import FastAPI, Query
3
-from million.model.filter.MessageFilter import MessageFilter
3
+from million.model.filter.MessageFilter import MessageFilter, MessageOrder
4
+from million.model.filter.ParticipantFilter import ParticipantFilter
4 5
 from million.model.message import Message
5
-from million.analyze.filter import filter
6
+from million.analyze.filter.message import filter as message_filter
7
+from million.analyze.filter.participants import ParticipantCount, filter as participants_filter
8
+from million.model.participant import Participant
6 9
 import million.parse.fb_exports as fb
7 10
 
8 11
 
@@ -13,6 +16,18 @@ export = fb.parse_dirfiles(DATA_PATH)
13 16
 app = FastAPI()
14 17
 
15 18
 
19
+class GetMessageForm(MessageFilter):
20
+    order: MessageOrder = None
21
+    limit: int = 100
22
+
23
+
16 24
 @app.get("/messages")
17
-def read_messages(form: Annotated[MessageFilter, Query()]) -> List[Message]:
18
-    return filter(export.messages, form)
25
+def read_messages(
26
+    form: Annotated[GetMessageForm, Query()]
27
+) -> List[Message]:
28
+    return message_filter(export.messages, form, form.order, form.limit)
29
+
30
+
31
+@app.get("/participants")
32
+def read_participants(form: Annotated[ParticipantFilter, Query()]) -> List[ParticipantCount]:
33
+    return participants_filter(export.messages, export.participants, form)

+ 11
- 0
million/model/filter/MessageFilter.py Voir le fichier

@@ -1,9 +1,20 @@
1 1
 from datetime import datetime
2
+from enum import Enum
2 3
 from pydantic import BaseModel
3 4
 
4 5
 
6
+class MessageOrder(str, Enum):
7
+    date = "date"
8
+    reactions = "reactions"
9
+
10
+
5 11
 class MessageFilter(BaseModel):
6 12
     before: datetime | None = None
7 13
     after: datetime | None = None
8 14
     contains: str | None = None
9 15
     is_number: bool | None = None
16
+    sender_name: str | None = None
17
+    having_reactions: bool | None = None
18
+
19
+    class Config:
20
+        frozen = True

+ 23
- 0
million/model/filter/ParticipantFilter.py Voir le fichier

@@ -0,0 +1,23 @@
1
+from datetime import datetime
2
+from pydantic import BaseModel
3
+from enum import Enum
4
+
5
+from million.model.filter.MessageFilter import MessageFilter
6
+
7
+
8
+class OrderBy(str, Enum):
9
+    count = "count"
10
+
11
+
12
+class OrderDirection(str, Enum):
13
+    asc = "asc"
14
+    desc = "desc"
15
+
16
+
17
+class ParticipantFilter(MessageFilter):
18
+
19
+    order_by: OrderBy | None = OrderBy.count
20
+    order_dir: OrderDirection | None = OrderDirection.desc
21
+
22
+    def __hash__(self):
23
+        return hash(self.name)

+ 4
- 2
million/model/message.py Voir le fichier

@@ -1,7 +1,9 @@
1 1
 from datetime import datetime
2
-from math import floor
3 2
 from typing import Any, List
4 3
 from pydantic import BaseModel
4
+import pytz
5
+
6
+utc=pytz.UTC
5 7
 
6 8
 class Reaction(BaseModel):
7 9
     reaction: str
@@ -47,7 +49,7 @@ class Message(BaseModel):
47 49
 
48 50
     @property
49 51
     def date(self) -> datetime:
50
-        return datetime.fromtimestamp(self.timestamp_ms / 1000)
52
+        return utc.localize(datetime.fromtimestamp(self.timestamp_ms / 1000))
51 53
 
52 54
     def __str__(self) -> str:
53 55
         dt = datetime.fromtimestamp(self.timestamp_ms / 1000)

+ 2
- 1
scripts/find_holes.py Voir le fichier

@@ -18,11 +18,12 @@ print(f"Actual counted: {actual_counted}")
18 18
 
19 19
 holes = find_holes(filtered)
20 20
 
21
-print(len(holes))
22 21
 
23 22
 for hole in holes:
24 23
     print(f"{hole.start() + 1} -> {hole.end() - 1} ({hole.length() - 2})")
25 24
 
25
+print(f"Total holes: {len(holes)}")
26
+print(f"Total holes size: {sum([h.length() for h in holes if h.length() < 10_000])}")
26 27
 
27 28
 # lets export a csv file of the holes and the people responsible for them
28 29
 with open('output/holes.csv', 'w') as f:

Chargement…
Annuler
Enregistrer