You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

find_holes.py 4.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. from typing import List
  2. from million.model.hole import Hole
  3. from million.model.message import Message
  4. from million.model.sequence import Sequence
  5. def compute_sequences(messages: List[Message], accepted_max: int = 1_000_000) -> List[Sequence]:
  6. sequences: List[Sequence] = []
  7. current_sequence = Sequence(
  8. start=messages[0].get_counted_value(),
  9. start_message=messages[0],
  10. end=messages[0].get_counted_value(),
  11. end_message=messages[0]
  12. )
  13. for i in range(1, len(messages)):
  14. message = messages[i]
  15. message_value = message.get_counted_value()
  16. if message_value > accepted_max:
  17. continue
  18. if message_value - current_sequence.end == 1:
  19. current_sequence.end = message_value
  20. current_sequence.end_message = message
  21. else:
  22. sequences.append(current_sequence)
  23. current_sequence = Sequence(
  24. start=message_value,
  25. start_message=message,
  26. end=message_value,
  27. end_message=message
  28. )
  29. # order the sequences by start
  30. sequences.sort(key=lambda s: s.start)
  31. merged_sequences: List[Sequence] = []
  32. current_sequence = sequences[0]
  33. for i in range(1, len(sequences)):
  34. sequence = sequences[i]
  35. sequence_start_is_in_current_sequence = current_sequence.start <= sequence.start and current_sequence.end >= sequence.start
  36. sequence_end_is_further = sequence.end > current_sequence.end
  37. sequence_start_is_current_end_or_next = sequence.start == current_sequence.end + 1
  38. if sequence_start_is_in_current_sequence or sequence_start_is_current_end_or_next:
  39. if sequence_end_is_further:
  40. current_sequence.end = sequence.end
  41. current_sequence.end_message = sequence.end_message
  42. else:
  43. merged_sequences.append(current_sequence)
  44. current_sequence = sequence
  45. # Having merged the sequences once, any sequence having start = end can be removed
  46. return [s for s in merged_sequences if s.start != s.end]
  47. def find_holes(messages: List[Message], accepted_max: int = 1_000_000) -> List[Hole]:
  48. """
  49. Find the holes in the conversation
  50. """
  51. merged_sequences = compute_sequences(messages, accepted_max)
  52. holes = []
  53. for i in range(1, len(merged_sequences)):
  54. previous_sequence = merged_sequences[i - 1]
  55. sequence = merged_sequences[i]
  56. if sequence.start - previous_sequence.end > 1:
  57. holes.append(Hole(
  58. start=previous_sequence.end,
  59. end=sequence.start,
  60. start_message=previous_sequence.end_message,
  61. end_message=sequence.start_message
  62. ))
  63. return holes
  64. def find_holesV2(messages: List[Message]) -> List[Hole]:
  65. current = 1
  66. msg_idx = 0
  67. threshold = 1000
  68. limitAhead = 100
  69. limitBehind = 20
  70. holes = []
  71. while msg_idx < len(messages):
  72. #search value current in messages from msgIdx, with lookahead then lookbehind
  73. for i in range(0, limitAhead):
  74. msgCurrent = messages[msg_idx + i]
  75. if msgCurrent.get_counted_value() == current: break
  76. if msgCurrent.get_counted_value() != current:
  77. for i in range(1, limitBehind):
  78. msgCurrent = messages[msg_idx - i]
  79. if msgCurrent.get_counted_value() == current: break
  80. if msgCurrent.get_counted_value() == current:
  81. # la valeur current a été trouvé dans la zone de recherche
  82. print(f"{msgCurrent.sender_name} : {msgCurrent.content}")
  83. # si un trou était ouvert il faut le fermer
  84. if len(holes) > 0 and holes[-1].end == 0:
  85. holes[-1].end = current-1
  86. holes[-1].end_message = msgCurrent
  87. print(f"\t{current-1}")
  88. msg_idx += 1
  89. else:
  90. # la valeur current n'a pas été trouvée
  91. # on est dans un trou
  92. # si aucun trou n'est ouvert, on en crée un
  93. if len(holes) == 0 or holes[-1].end > 0:
  94. hole = Hole(
  95. start=current,
  96. end=0,
  97. start_message=messages[msg_idx],
  98. end_message=Message(sender_name='',timestamp_ms=0)
  99. )
  100. holes.append(hole)
  101. print(f"\t HOLE : {hole.start}\n\t\t...")
  102. current += 1
  103. return holes