from typing import List from million.model.message import Message from million.model.sequence import Sequence import million.analyze.message_evaluation as msg_val def compute_sequences(messages: List[Message], accepted_max: int = 1_000_000) -> List[Sequence]: sequences: List[Sequence] = [Sequence(start_message=messages[0])] for message in messages[1:]: if msg_val.get(message) > accepted_max: continue if msg_val.get(message) == sequences[-1].end() + 1: sequences[-1].end_message = message else: sequences.append(Sequence(start_message=message)) return sequences def merge_duplicates(sequences: List[Sequence]) -> List[Sequence]: o_sequences = sorted(sequences, key= lambda s : s.start()) current = o_sequences[0] result = [] for sequence in o_sequences[1:]: if current.overlaps(sequence): current.merge(sequence) else: result.append(current) current = sequence return result def invert_sequences(sequences: List[Sequence]) -> List[Sequence]: result = [] for previous, current in zip(sequences[:-1],sequences[1:]): result.append(Sequence( start_message=previous.end_message, end_message=current.start_message )) return result def find_holes(messages: List[Message], accepted_max: int = 1_000_000) -> List[Sequence]: """ Find the holes in the conversation """ sequences = compute_sequences(messages, accepted_max) merged = merge_duplicates(sequences) merged = [s for s in merged if s.length() > 1] return invert_sequences(merged)