You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

find_holes.py 2.5KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. from typing import List
  2. from million.model.hole import Hole
  3. from million.model.message import Message
  4. from million.model.sequence import Sequence
  5. import million.analyze.message_evaluation as msg_ev
  6. def compute_sequences(messages: List[Message], accepted_max: int = 1_000_000) -> List[Sequence]:
  7. sequences: List[Sequence] = []
  8. current_sequence = Sequence(
  9. start_message=messages[0],
  10. end_message=messages[0]
  11. )
  12. for i in range(1, len(messages)):
  13. message = messages[i]
  14. message_value = msg_ev.compute(message)
  15. if message_value > accepted_max:
  16. continue
  17. if message_value - current_sequence.end() == 1:
  18. current_sequence.end_message = message
  19. else:
  20. sequences.append(current_sequence)
  21. current_sequence = Sequence(
  22. start_message=message,
  23. end_message=message
  24. )
  25. # order the sequences by start
  26. sequences.sort(key=lambda s: s.start())
  27. merged_sequences: List[Sequence] = []
  28. current_sequence = sequences[0]
  29. for i in range(1, len(sequences)):
  30. sequence = sequences[i]
  31. sequence_start_is_in_current_sequence = current_sequence.start() <= sequence.start() and current_sequence.end() >= sequence.start()
  32. sequence_end_is_further = sequence.end() > current_sequence.end()
  33. sequence_start_is_current_end_or_next = sequence.start() == current_sequence.end() + 1
  34. if sequence_start_is_in_current_sequence or sequence_start_is_current_end_or_next:
  35. if sequence_end_is_further:
  36. current_sequence.end_message = sequence.end_message
  37. else:
  38. merged_sequences.append(current_sequence)
  39. current_sequence = sequence
  40. # Having merged the sequences once, any sequence having start = end can be removed
  41. return [s for s in merged_sequences if s.start() != s.end()]
  42. def find_holes(messages: List[Message], accepted_max: int = 1_000_000) -> List[Hole]:
  43. """
  44. Find the holes in the conversation
  45. """
  46. merged_sequences = compute_sequences(messages, accepted_max)
  47. holes = []
  48. for i in range(1, len(merged_sequences)):
  49. previous_sequence = merged_sequences[i - 1]
  50. sequence = merged_sequences[i]
  51. if sequence.start() - previous_sequence.end() > 1:
  52. holes.append(Hole(
  53. start=previous_sequence.end(),
  54. end=sequence.start(),
  55. start_message=previous_sequence.end_message,
  56. end_message=sequence.start_message
  57. ))
  58. return holes