Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

find_holes.py 1.6KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. from typing import List
  2. from million.model.message import Message
  3. from million.model.sequence import Sequence
  4. import million.analyze.message_evaluation as msg_val
  5. def compute_sequences(messages: List[Message], accepted_max: int = 1_000_000) -> List[Sequence]:
  6. sequences: List[Sequence] = [Sequence(start_message=messages[0])]
  7. for message in messages[1:]:
  8. if msg_val.get(message) > accepted_max: continue
  9. if msg_val.get(message) == sequences[-1].end() + 1:
  10. sequences[-1].end_message = message
  11. else:
  12. sequences.append(Sequence(start_message=message))
  13. return sequences
  14. def merge_duplicates(sequences: List[Sequence]) -> List[Sequence]:
  15. o_sequences = sorted(sequences, key= lambda s : s.start())
  16. current = o_sequences[0]
  17. result = []
  18. for sequence in o_sequences[1:]:
  19. if current.overlaps(sequence):
  20. current.merge(sequence)
  21. else:
  22. result.append(current)
  23. current = sequence
  24. return result
  25. def invert_sequences(sequences: List[Sequence]) -> List[Sequence]:
  26. result = []
  27. for previous, current in zip(sequences[:-1],sequences[1:]):
  28. result.append(Sequence(
  29. start_message=previous.end_message,
  30. end_message=current.start_message
  31. ))
  32. return result
  33. def find_holes(messages: List[Message], accepted_max: int = 1_000_000) -> List[Sequence]:
  34. """
  35. Find the holes in the conversation
  36. """
  37. sequences = compute_sequences(messages, accepted_max)
  38. merged = merge_duplicates(sequences)
  39. merged = [s for s in merged if s.length() > 1]
  40. return invert_sequences(merged)