You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

find_holes.py 4.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. import time
  2. from typing import List
  3. from million.model.hole import Hole
  4. from million.model.message import Message
  5. from million.model.sequence import Sequence
  6. def compute_sequences(messages: List[Message], accepted_max: int = 1_000_000) -> List[Sequence]:
  7. sequences: List[Sequence] = []
  8. current_sequence = Sequence(
  9. start=messages[0].get_counted_value(),
  10. start_message=messages[0],
  11. end=messages[0].get_counted_value(),
  12. end_message=messages[0]
  13. )
  14. for i in range(1, len(messages)):
  15. message = messages[i]
  16. message_value = message.get_counted_value()
  17. if message_value > accepted_max:
  18. continue
  19. if message_value - current_sequence.end == 1:
  20. current_sequence.end = message_value
  21. current_sequence.end_message = message
  22. else:
  23. sequences.append(current_sequence)
  24. current_sequence = Sequence(
  25. start=message_value,
  26. start_message=message,
  27. end=message_value,
  28. end_message=message
  29. )
  30. # order the sequences by start
  31. sequences.sort(key=lambda s: s.start)
  32. merged_sequences: List[Sequence] = []
  33. current_sequence = sequences[0]
  34. for i in range(1, len(sequences)):
  35. sequence = sequences[i]
  36. sequence_start_is_in_current_sequence = current_sequence.start <= sequence.start and current_sequence.end >= sequence.start
  37. sequence_end_is_further = sequence.end > current_sequence.end
  38. sequence_start_is_current_end_or_next = sequence.start == current_sequence.end + 1
  39. if sequence_start_is_in_current_sequence or sequence_start_is_current_end_or_next:
  40. if sequence_end_is_further:
  41. current_sequence.end = sequence.end
  42. current_sequence.end_message = sequence.end_message
  43. else:
  44. merged_sequences.append(current_sequence)
  45. current_sequence = sequence
  46. # Having merged the sequences once, any sequence having start = end can be removed
  47. return [s for s in merged_sequences if s.start != s.end]
  48. def find_holes(messages: List[Message], accepted_max: int = 1_000_000) -> List[Hole]:
  49. """
  50. Find the holes in the conversation
  51. """
  52. merged_sequences = compute_sequences(messages, accepted_max)
  53. holes = []
  54. for i in range(1, len(merged_sequences)):
  55. previous_sequence = merged_sequences[i - 1]
  56. sequence = merged_sequences[i]
  57. if sequence.start - previous_sequence.end > 1:
  58. holes.append(Hole(
  59. start=previous_sequence.end,
  60. end=sequence.start,
  61. start_message=previous_sequence.end_message,
  62. end_message=sequence.start_message
  63. ))
  64. return holes
  65. def _find_value_around_index(messages: List[Message], value, idx, amplitude) -> int:
  66. check_value = lambda x: messages[x].get_counted_value() == value
  67. if check_value(idx): return idx
  68. for offset in range(1, amplitude):
  69. o_idx = idx + offset * +1
  70. if check_value(o_idx):
  71. return o_idx
  72. o_idx = idx + offset * -1
  73. if check_value(o_idx):
  74. return o_idx
  75. return -1
  76. def _open_sequence(sequences: List[Sequence], msg: Message):
  77. sequence = Sequence(
  78. start=msg.get_counted_value(),
  79. start_message=msg,
  80. end=-1,
  81. end_message=msg
  82. )
  83. sequences.append(sequence)
  84. def _close_sequence(sequences: List[Sequence]):
  85. if len(sequences) == 0: return
  86. sequences[-1].end = sequences[-1].end_message.get_counted_value()
  87. def _opened_sequence(sequences: List[Sequence]):
  88. return len(sequences) > 0 and sequences[-1].end == -1
  89. def find_sequences_v2(messages: List[Message]) -> List[Sequence]:
  90. current = 1
  91. base_idx = 0
  92. amplitude = 200
  93. sequences = []
  94. while base_idx < len(messages):
  95. curr_idx = _find_value_around_index(messages, current, base_idx, amplitude)
  96. print(f"searching {current} from [{messages[base_idx]}]\t-> {'Not found' if curr_idx == -1 else 'Itself' if curr_idx == base_idx else messages[curr_idx]}")
  97. if curr_idx != -1: #trouvé
  98. if not _opened_sequence(sequences):
  99. _open_sequence(sequences, messages[curr_idx])
  100. else:
  101. sequences[-1].end_message = messages[curr_idx]
  102. base_idx = curr_idx + 1
  103. current += 1
  104. else: # pas trouvé
  105. # fermer la sequence si ouverte
  106. if _opened_sequence(sequences):
  107. _close_sequence(sequences)
  108. if messages[base_idx].get_counted_value() < current:
  109. base_idx += 1
  110. else:
  111. current += 1
  112. #time.sleep(.005)
  113. return sequences