|
@@ -7,6 +7,10 @@ import million.analyze.message_evaluation as msg_val
|
7
|
7
|
|
8
|
8
|
|
9
|
9
|
def compute_sequences(messages: List[Message], accepted_max: int = 1_000_000) -> List[Sequence]:
|
|
10
|
+ """
|
|
11
|
+ Takes a list of messages as input and returns a list of sequences
|
|
12
|
+ for every following messages with following 'counted values'
|
|
13
|
+ """
|
10
|
14
|
sequences: List[Sequence] = [Sequence(start_message=messages[0])]
|
11
|
15
|
|
12
|
16
|
for message in messages[1:]:
|
|
@@ -21,6 +25,10 @@ def compute_sequences(messages: List[Message], accepted_max: int = 1_000_000) ->
|
21
|
25
|
|
22
|
26
|
|
23
|
27
|
def merge_duplicates(sequences: List[Sequence]) -> List[Sequence]:
|
|
28
|
+ """
|
|
29
|
+ Take sequences as an input and returns a list with every
|
|
30
|
+ overlapping input sequences merged in one
|
|
31
|
+ """
|
24
|
32
|
o_sequences = sorted(sequences, key= lambda s : s.start())
|
25
|
33
|
current = o_sequences[0]
|
26
|
34
|
|
|
@@ -38,6 +46,10 @@ def merge_duplicates(sequences: List[Sequence]) -> List[Sequence]:
|
38
|
46
|
|
39
|
47
|
|
40
|
48
|
def invert_sequences(sequences: List[Sequence]) -> List[Sequence]:
|
|
49
|
+ """
|
|
50
|
+ Returns the sequences representing the spaces between
|
|
51
|
+ the ones given as input
|
|
52
|
+ """
|
41
|
53
|
result = []
|
42
|
54
|
|
43
|
55
|
for previous, current in zip(sequences[:-1],sequences[1:]):
|
|
@@ -51,6 +63,7 @@ def invert_sequences(sequences: List[Sequence]) -> List[Sequence]:
|
51
|
63
|
def find_holes(messages: List[Message], accepted_max: int = 1_000_000) -> List[Sequence]:
|
52
|
64
|
"""
|
53
|
65
|
Find the holes in the conversation
|
|
66
|
+ TODO might need to be moved inside scripts/find_holes
|
54
|
67
|
"""
|
55
|
68
|
sequences = compute_sequences(messages, accepted_max)
|
56
|
69
|
merged = merge_duplicates(sequences)
|