Graphe des relations d'ajout au groupe entre les membres du Million Project
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

members_movements.py 6.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. import datetime as dt
  2. import re
  3. from typing import List
  4. from million.model.facebook_schema import Message
  5. from million.model.member_movement import MemberMovement, MovementType
  6. #########################
  7. # CONSTANTS
  8. #########################
  9. _AUTHOR_ADDED_MEMBERS = r'Vous avez ajouté (.+?) au groupe\.'
  10. _SEVERAL_PEOPLE = r'(.+?) et (.+?)'
  11. _N_OTHER_PEOPLE = r'(\d+) autres personnes'
  12. _MEMBER_ADDED_MEMBERS = r'(.+?) a ajouté (.+?) au groupe\.'
  13. _MEMBER_LEFT_GROUP = r'(.+) a quitté le groupe\.'
  14. _AUTHOR_FIRED_MEMBERS = r'Vous avez retiré (.+?) du groupe\.'
  15. _MEMBER_FIRED_MEMBERS = r'(.+?) a retiré (.+?) du groupe\.'
  16. #########################
  17. # GLOBALS
  18. #########################
  19. # As computing the members movements goes through analyzing
  20. # the history of messages, this variable keeps the list of the
  21. # members tracked as being in the group
  22. _tracked_members : List[str]
  23. # Datetime of the earliest message, considered as datetime of
  24. # the group creation
  25. _initial_date : dt.datetime
  26. # List of movements with unnamed people added to the conversation
  27. # through bulk adds
  28. _unnamed_adds : List[MemberMovement]
  29. # History of the movements in the members group list computed
  30. # through analyzing the group message history
  31. _movements : List[MemberMovement]
  32. #########################
  33. # PRIVATE FUNC
  34. #########################
  35. def _add_unnamed_adds(times: int, member_adding: str, addition_date: dt.datetime):
  36. for _ in range(times):
  37. unnamed_add = MemberMovement(None, member_adding, MovementType.IN, addition_date)
  38. _unnamed_adds.append(unnamed_add)
  39. def _add_unnamed_add(member_adding: str, addition_date: dt.datetime):
  40. unnamed_add = MemberMovement(None, member_adding, MovementType.IN, addition_date)
  41. _unnamed_adds.append(unnamed_add)
  42. def _add_member(new_member: str, member_adding: str, addition_date: dt.datetime, estimated: bool = False):
  43. movement = MemberMovement(new_member, member_adding, MovementType.IN, addition_date, estimated)
  44. _movements.append(movement)
  45. _tracked_members.append(new_member)
  46. def _remove_member(removed_member: str, member_removing: str, deletion_date: dt.datetime):
  47. movement = MemberMovement(removed_member, member_removing, MovementType.OUT, deletion_date)
  48. _movements.append(movement)
  49. _check_member_tracked(removed_member)
  50. _tracked_members.remove(removed_member)
  51. # Checks if the member is tracked by the algorithm. If not, it considers
  52. # he was added anonymously through a bulk add or he has been there since
  53. # the beginning
  54. def _check_member_tracked(member_name: str):
  55. if member_name in _tracked_members: return
  56. if len(_unnamed_adds) > 0:
  57. unnamed_add = _unnamed_adds.pop()
  58. added_by = unnamed_add.initiator
  59. added_time = unnamed_add.date_time
  60. else:
  61. added_by = member_name
  62. added_time = _initial_date
  63. _add_member(member_name, added_by, added_time, estimated=True)
  64. # Returns a list of all people names found inside a group of names
  65. # unnamed people will be returned as None
  66. # examples :
  67. # "Jean Dupont et Marie Buffet" -> ["Jean Dupont", "Marie Buffet"]
  68. # "Pierre Martin et 3 autres personnes" -> ["Pierre Martin", None, None, None]
  69. def _resolve_names_group(names_group: str) -> List[str | None]:
  70. match = re.fullmatch(_SEVERAL_PEOPLE, names_group)
  71. if not match: return [names_group]
  72. names = [match.group(1)]
  73. other_people = match.group(2)
  74. if match := re.fullmatch(_N_OTHER_PEOPLE, other_people):
  75. n = int(match.group(1))
  76. names.extend([None] * n)
  77. else:
  78. names.append(other_people)
  79. return names
  80. def _handle_author_added_members(match, message_date, sender_name):
  81. names_group = match.group(1)
  82. initiator = sender_name
  83. for name in _resolve_names_group(names_group):
  84. if name is None:
  85. _add_unnamed_add(initiator, message_date)
  86. else:
  87. _add_member(name, initiator, message_date)
  88. def _handle_member_added_members(match, message_date):
  89. names_group = match.group(2)
  90. initiator = match.group(1)
  91. for name in _resolve_names_group(names_group):
  92. if name is None:
  93. _add_unnamed_add(initiator, message_date)
  94. else:
  95. _add_member(name, initiator, message_date)
  96. def _handle_member_left_group(match, message_date):
  97. names_group = match.group(1)
  98. initiator = match.group(1)
  99. _remove_member(names_group, initiator, message_date)
  100. def _handle_author_fired_members(match, message_date, sender_name):
  101. names_group = match.group(1)
  102. initiator = sender_name
  103. _remove_member(names_group, initiator, message_date)
  104. def _handle_member_fired_members(match, message_date):
  105. names_group = match.group(2)
  106. initiator = match.group(1)
  107. _remove_member(names_group, initiator, message_date)
  108. # Uses message content to determine group arrivals and departures
  109. # by detecting system-generated messages
  110. def _handle_message_content(message: Message):
  111. content = message.content
  112. if content is None: return
  113. sender_name = message.sender_name
  114. message_date = message.date_time
  115. regex_actions = [
  116. (_AUTHOR_ADDED_MEMBERS, _handle_author_added_members, sender_name),
  117. (_MEMBER_ADDED_MEMBERS, _handle_member_added_members),
  118. (_MEMBER_LEFT_GROUP, _handle_member_left_group),
  119. (_AUTHOR_FIRED_MEMBERS, _handle_author_fired_members, sender_name),
  120. (_MEMBER_FIRED_MEMBERS, _handle_member_fired_members)
  121. ]
  122. for pattern, handler, *extra_args in regex_actions:
  123. if (match := re.fullmatch(pattern, content)):
  124. handler(match, message_date, *extra_args)
  125. break
  126. # Reset every global variables
  127. def _initialize(initial_date: dt.datetime):
  128. global _tracked_members, _initial_date, _movements, _unnamed_adds
  129. _tracked_members = []
  130. _movements = []
  131. _unnamed_adds = []
  132. _initial_date = initial_date
  133. def compute_members_movements(messages: List[Message], participants: List[str] = []) -> List[MemberMovement]:
  134. _initialize(messages[0].date_time)
  135. for message in messages:
  136. _check_member_tracked(message.sender_name)
  137. _handle_message_content(message)
  138. for p in participants:
  139. _check_member_tracked(p)
  140. return sorted(_movements, key = lambda move: move.date_time)