瀏覽代碼

Added a Members movement module, able to compute

the history of members joining and leaving the group
through time with details about each movement
DEV-Mael
Figg 4 月之前
父節點
當前提交
13af197748
共有 2 個檔案被更改,包括 194 行新增0 行删除
  1. 22
    0
      million/model/member_movement.py
  2. 172
    0
      million/processing/members_movements.py

+ 22
- 0
million/model/member_movement.py 查看文件

@@ -0,0 +1,22 @@
1
+import datetime as dt
2
+from enum import Enum
3
+from typing import Optional
4
+
5
+
6
+class MovementType(Enum):
7
+    IN = 0
8
+    OUT = 1
9
+
10
+class MemberMovement:
11
+    member: Optional[str]
12
+    initiator: str
13
+    movementType: MovementType
14
+    date_time: dt.datetime
15
+    estimated: bool
16
+
17
+    def __init__(self, member: Optional[str], initiator: str, movementType: MovementType, date_time: dt.datetime, estimated: bool = False):
18
+        self.member = member
19
+        self.initiator = initiator
20
+        self.movementType = movementType
21
+        self.date_time = date_time
22
+        self.estimated = estimated

+ 172
- 0
million/processing/members_movements.py 查看文件

@@ -0,0 +1,172 @@
1
+import datetime as dt
2
+import re
3
+from typing import List
4
+from million.model.facebook_schema import Message
5
+from million.model.member_movement import MemberMovement, MovementType
6
+
7
+#########################
8
+#       CONSTANTS
9
+#########################
10
+
11
+_AUTHOR_ADDED_MEMBERS = r'Vous avez ajouté (.+?) au groupe\.'
12
+_SEVERAL_PEOPLE = r'(.+?) et (.+?)'
13
+_N_OTHER_PEOPLE = r'(\d+) autres personnes'
14
+_MEMBER_ADDED_MEMBERS = r'(.+?) a ajouté (.+?) au groupe\.'
15
+_MEMBER_LEFT_GROUP = r'(.+) a quitté le groupe\.'
16
+_AUTHOR_FIRED_MEMBERS = r'Vous avez retiré (.+?) du groupe\.'
17
+_MEMBER_FIRED_MEMBERS = r'(.+?) a retiré (.+?) du groupe\.'
18
+
19
+#########################
20
+#       GLOBALS
21
+#########################
22
+
23
+# As computing the members movements goes through analyzing
24
+# the history of messages, this variable keeps the list of the
25
+# members tracked as being in the group
26
+_tracked_members : List[str]
27
+# Datetime of the earliest message, considered as datetime of
28
+# the group creation
29
+_initial_date : dt.datetime
30
+# List of movements with unnamed people added to the conversation
31
+# through bulk adds 
32
+_unnamed_adds : List[MemberMovement]
33
+# History of the movements in the members group list computed
34
+# through analyzing the group message history
35
+_movements : List[MemberMovement]
36
+
37
+#########################
38
+#      PRIVATE FUNC
39
+#########################
40
+
41
+def _add_unnamed_adds(times: int, member_adding: str, addition_date: dt.datetime):
42
+    for _ in range(times):
43
+        unnamed_add = MemberMovement(None, member_adding, MovementType.IN, addition_date)
44
+        _unnamed_adds.append(unnamed_add)
45
+
46
+def _add_unnamed_add(member_adding: str, addition_date: dt.datetime):
47
+    unnamed_add = MemberMovement(None, member_adding, MovementType.IN, addition_date)
48
+    _unnamed_adds.append(unnamed_add)
49
+
50
+def _add_member(new_member: str, member_adding: str, addition_date: dt.datetime, estimated: bool = False):
51
+    movement = MemberMovement(new_member, member_adding, MovementType.IN, addition_date, estimated)
52
+    _movements.append(movement)
53
+
54
+    _tracked_members.append(new_member)
55
+
56
+def _remove_member(removed_member: str, member_removing: str, deletion_date: dt.datetime):
57
+    movement = MemberMovement(removed_member, member_removing, MovementType.OUT, deletion_date)
58
+    _movements.append(movement)
59
+
60
+    _check_member_tracked(removed_member)
61
+    _tracked_members.remove(removed_member)
62
+
63
+# Checks if the member is tracked by the algorithm. If not, it considers
64
+# he was added anonymously through a bulk add or he has been there since
65
+# the beginning
66
+def _check_member_tracked(member_name: str):
67
+    if member_name in _tracked_members: return
68
+    
69
+    if len(_unnamed_adds) > 0:
70
+        unnamed_add = _unnamed_adds.pop()
71
+        added_by = unnamed_add.initiator
72
+        added_time = unnamed_add.date_time
73
+    else:
74
+        added_by = member_name
75
+        added_time = _initial_date
76
+
77
+    _add_member(member_name, added_by, added_time, estimated=True)
78
+
79
+# Returns a list of all people names found inside a group of names
80
+# unnamed people will be returned as None
81
+# examples :
82
+#  "Jean Dupont et Marie Buffet" -> ["Jean Dupont", "Marie Buffet"]
83
+#  "Pierre Martin et 3 autres personnes" -> ["Pierre Martin", None, None, None]
84
+def _resolve_names_group(names_group: str) -> List[str | None]:
85
+    match = re.fullmatch(_SEVERAL_PEOPLE, names_group)
86
+    if not match: return [names_group]
87
+
88
+    names = [match.group(1)]
89
+    other_people = match.group(2)
90
+
91
+    if match := re.fullmatch(_N_OTHER_PEOPLE, other_people):
92
+        n = int(match.group(1))
93
+        names.extend([None] * n)
94
+    else:
95
+        names.append(other_people)
96
+
97
+    return names
98
+
99
+def _handle_author_added_members(match, message_date, sender_name):
100
+    names_group = match.group(1)
101
+    initiator = sender_name
102
+    for name in _resolve_names_group(names_group):
103
+        if name is None:
104
+            _add_unnamed_add(initiator, message_date)
105
+        else:
106
+            _add_member(name, initiator, message_date)
107
+
108
+def _handle_member_added_members(match, message_date):
109
+    names_group = match.group(2)
110
+    initiator = match.group(1)
111
+    for name in _resolve_names_group(names_group):
112
+        if name is None:
113
+            _add_unnamed_add(initiator, message_date)
114
+        else:
115
+            _add_member(name, initiator, message_date)
116
+
117
+def _handle_member_left_group(match, message_date):
118
+    names_group = match.group(1)
119
+    initiator = match.group(1)
120
+    _remove_member(names_group, initiator, message_date)
121
+    
122
+def _handle_author_fired_members(match, message_date, sender_name):
123
+    names_group = match.group(1)
124
+    initiator = sender_name
125
+    _remove_member(names_group, initiator, message_date)
126
+
127
+def _handle_member_fired_members(match, message_date):
128
+    names_group = match.group(2)
129
+    initiator = match.group(1)
130
+    _remove_member(names_group, initiator, message_date)
131
+
132
+# Uses message content to determine group arrivals and departures
133
+# by detecting system-generated messages
134
+def _handle_message_content(message: Message):
135
+    content = message.content
136
+    if content is None: return
137
+
138
+    sender_name = message.sender_name
139
+    message_date = message.date_time
140
+
141
+    regex_actions = [
142
+        (_AUTHOR_ADDED_MEMBERS, _handle_author_added_members, sender_name),
143
+        (_MEMBER_ADDED_MEMBERS, _handle_member_added_members),
144
+        (_MEMBER_LEFT_GROUP, _handle_member_left_group),
145
+        (_AUTHOR_FIRED_MEMBERS, _handle_author_fired_members, sender_name),
146
+        (_MEMBER_FIRED_MEMBERS, _handle_member_fired_members)
147
+    ]
148
+
149
+    for pattern, handler, *extra_args in regex_actions:
150
+        if (match := re.fullmatch(pattern, content)):
151
+            handler(match, message_date, *extra_args)
152
+            break
153
+
154
+# Reset every global variables
155
+def _initialize(initial_date: dt.datetime):
156
+    global _tracked_members, _initial_date, _movements, _unnamed_adds
157
+    _tracked_members = []
158
+    _movements = []
159
+    _unnamed_adds = []
160
+    _initial_date = initial_date
161
+
162
+def compute_members_movements(messages: List[Message], participants: List[str] = []) -> List[MemberMovement]:
163
+    _initialize(messages[0].date_time)
164
+
165
+    for message in messages:
166
+        _check_member_tracked(message.sender_name)
167
+        _handle_message_content(message)
168
+
169
+    for p in participants:
170
+        _check_member_tracked(p)
171
+
172
+    return sorted(_movements, key = lambda move: move.date_time)

Loading…
取消
儲存