Graphe des relations d'ajout au groupe entre les membres du Million Project
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. import datetime as dt
  2. import re
  3. from typing import List
  4. from million.model.facebook_schema import Message
  5. from million.model.member_movement import MemberMovement, MovementType
  6. #########################
  7. # CONSTANTS
  8. #########################
  9. _AUTHOR_ADDED_MEMBERS = r'Vous avez ajouté (.+?) au groupe\.'
  10. _SEVERAL_PEOPLE = r'(.+?) et (.+?)'
  11. _N_OTHER_PEOPLE = r'(\d+) autres personnes'
  12. _MEMBER_ADDED_MEMBERS = r'(.+?) a ajouté (.+?) au groupe\.'
  13. _MEMBER_LEFT_GROUP = r'(.+) a quitté le groupe\.'
  14. _AUTHOR_FIRED_MEMBERS = r'Vous avez retiré (.+?) du groupe\.'
  15. _MEMBER_FIRED_MEMBERS = r'(.+?) a retiré (.+?) du groupe\.'
  16. #########################
  17. # GLOBALS
  18. #########################
  19. # As computing the members movements goes through analyzing
  20. # the history of messages, this variable keeps the list of the
  21. # members tracked as being in the group
  22. _tracked_members : List[str]
  23. # Datetime of the earliest message, considered as datetime of
  24. # the group creation
  25. _initial_date : dt.datetime
  26. # List of movements with unnamed people added to the conversation
  27. # through bulk adds
  28. _unnamed_adds : List[MemberMovement]
  29. # History of the movements in the members group list computed
  30. # through analyzing the group message history
  31. _movements : List[MemberMovement]
  32. #########################
  33. # PRIVATE FUNC
  34. #########################
  35. def _add_unnamed_adds(times: int, member_adding: str, addition_date: dt.datetime):
  36. for _ in range(times):
  37. unnamed_add = MemberMovement(None, member_adding, MovementType.IN, addition_date)
  38. _unnamed_adds.append(unnamed_add)
  39. def _add_unnamed_add(member_adding: str, addition_date: dt.datetime):
  40. unnamed_add = MemberMovement(None, member_adding, MovementType.IN, addition_date)
  41. _unnamed_adds.append(unnamed_add)
  42. def _add_member(new_member: str, member_adding: str, addition_date: dt.datetime, estimated: bool = False):
  43. movement = MemberMovement(new_member, member_adding, MovementType.IN, addition_date, estimated)
  44. _movements.append(movement)
  45. _tracked_members.append(new_member)
  46. def _remove_member(removed_member: str, member_removing: str, deletion_date: dt.datetime):
  47. movement = MemberMovement(removed_member, member_removing, MovementType.OUT, deletion_date)
  48. _movements.append(movement)
  49. _check_member_tracked(removed_member)
  50. _tracked_members.remove(removed_member)
  51. # Checks if the member is tracked by the algorithm. If not, it considers
  52. # he was added anonymously through a bulk add or he has been there since
  53. # the beginning
  54. def _check_member_tracked(member_name: str):
  55. if member_name in _tracked_members: return
  56. if len(_unnamed_adds) > 0:
  57. unnamed_add = _unnamed_adds.pop()
  58. added_by = unnamed_add.initiator
  59. added_time = unnamed_add.date_time
  60. else:
  61. added_by = member_name
  62. added_time = _initial_date
  63. _add_member(member_name, added_by, added_time, estimated=True)
  64. # Returns a list of all people names found inside a group of names
  65. # unnamed people will be returned as None
  66. # examples :
  67. # "Jean Dupont et Marie Buffet" -> ["Jean Dupont", "Marie Buffet"]
  68. # "Pierre Martin et 3 autres personnes" -> ["Pierre Martin", None, None, None]
  69. def _resolve_names_group(names_group: str) -> List[str | None]:
  70. match = re.fullmatch(_SEVERAL_PEOPLE, names_group)
  71. if not match: return [names_group]
  72. names = [match.group(1)]
  73. other_people = match.group(2)
  74. if match := re.fullmatch(_N_OTHER_PEOPLE, other_people):
  75. n = int(match.group(1))
  76. names.extend([None] * n)
  77. else:
  78. names.append(other_people)
  79. return names
  80. def _handle_author_added_members(match, message_date, sender_name):
  81. names_group = match.group(1)
  82. initiator = sender_name
  83. for name in _resolve_names_group(names_group):
  84. if name is None:
  85. _add_unnamed_add(initiator, message_date)
  86. else:
  87. _add_member(name, initiator, message_date)
  88. def _handle_member_added_members(match, message_date):
  89. names_group = match.group(2)
  90. initiator = match.group(1)
  91. for name in _resolve_names_group(names_group):
  92. if name is None:
  93. _add_unnamed_add(initiator, message_date)
  94. else:
  95. _add_member(name, initiator, message_date)
  96. def _handle_member_left_group(match, message_date):
  97. names_group = match.group(1)
  98. initiator = match.group(1)
  99. _remove_member(names_group, initiator, message_date)
  100. def _handle_author_fired_members(match, message_date, sender_name):
  101. names_group = match.group(1)
  102. initiator = sender_name
  103. _remove_member(names_group, initiator, message_date)
  104. def _handle_member_fired_members(match, message_date):
  105. names_group = match.group(2)
  106. initiator = match.group(1)
  107. _remove_member(names_group, initiator, message_date)
  108. # Uses message content to determine group arrivals and departures
  109. # by detecting system-generated messages
  110. def _handle_message_content(message: Message):
  111. content = message.content
  112. if content is None: return
  113. sender_name = message.sender_name
  114. message_date = message.date_time
  115. regex_actions = [
  116. (_AUTHOR_ADDED_MEMBERS, _handle_author_added_members, sender_name),
  117. (_MEMBER_ADDED_MEMBERS, _handle_member_added_members),
  118. (_MEMBER_LEFT_GROUP, _handle_member_left_group),
  119. (_AUTHOR_FIRED_MEMBERS, _handle_author_fired_members, sender_name),
  120. (_MEMBER_FIRED_MEMBERS, _handle_member_fired_members)
  121. ]
  122. for pattern, handler, *extra_args in regex_actions:
  123. if (match := re.fullmatch(pattern, content)):
  124. handler(match, message_date, *extra_args)
  125. break
  126. # Reset every global variables
  127. def _initialize(initial_date: dt.datetime):
  128. global _tracked_members, _initial_date, _movements, _unnamed_adds
  129. _tracked_members = []
  130. _movements = []
  131. _unnamed_adds = []
  132. _initial_date = initial_date
  133. def compute_members_movements(messages: List[Message], participants: List[str] = []) -> List[MemberMovement]:
  134. _initialize(messages[0].date_time)
  135. for message in messages:
  136. _check_member_tracked(message.sender_name)
  137. _handle_message_content(message)
  138. for p in participants:
  139. _check_member_tracked(p)
  140. return sorted(_movements, key = lambda move: move.date_time)