123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172 |
-
-
- from typing import List
- from million.model.message import Message
- from million.model.sequence import Sequence
- import million.analyze.message_evaluation as msg_val
-
-
- def compute_sequences(messages: List[Message], accepted_max: int = 1_000_000) -> List[Sequence]:
- """
- Takes a list of messages as input and returns a list of sequences
- for every following messages with following 'counted values'
- """
- sequences: List[Sequence] = [Sequence(start_message=messages[0])]
-
- for message in messages[1:]:
- if msg_val.get(message) > accepted_max: continue
-
- if msg_val.get(message) == sequences[-1].end() + 1:
- sequences[-1].end_message = message
- else:
- sequences.append(Sequence(start_message=message))
-
- return sequences
-
-
- def merge_duplicates(sequences: List[Sequence]) -> List[Sequence]:
- """
- Take sequences as an input and returns a list with every
- overlapping input sequences merged in one
- """
- o_sequences = sorted(sequences, key= lambda s : s.start())
- current = o_sequences[0]
-
- result = []
-
- for sequence in o_sequences[1:]:
- if current.overlaps(sequence):
- current.merge(sequence)
- else:
- result.append(current)
- current = sequence
-
- return result
-
-
-
- def invert_sequences(sequences: List[Sequence]) -> List[Sequence]:
- """
- Returns the sequences representing the spaces between
- the ones given as input
- """
- result = []
-
- for previous, current in zip(sequences[:-1],sequences[1:]):
- result.append(Sequence(
- start_message=previous.end_message,
- end_message=current.start_message
- ))
-
- return result
-
- def find_holes(messages: List[Message], accepted_max: int = 1_000_000) -> List[Sequence]:
- """
- Find the holes in the conversation
- TODO might need to be moved inside scripts/find_holes
- """
- sequences = compute_sequences(messages, accepted_max)
- merged = merge_duplicates(sequences)
- merged = [s for s in merged if s.length() > 1]
-
- return invert_sequences(merged)
|