Procházet zdrojové kódy

Atomisation du code

pull/5/head
Figg před 9 měsíci
rodič
revize
30c380838c

+ 33
- 24
million/analyze/find_holes.py Zobrazit soubor

@@ -7,44 +7,53 @@ import million.analyze.message_evaluation as msg_val
7 7
 
8 8
 
9 9
 def compute_sequences(messages: List[Message], accepted_max: int = 1_000_000) -> List[Sequence]:
10
-    sequences: List[Sequence] = []
11
-    current = Sequence(start_message=messages[0])
10
+    sequences: List[Sequence] = [Sequence(start_message=messages[0])]
12 11
     
13 12
     for message in messages[1:]:
14 13
         if msg_val.get(message) > accepted_max: continue
15 14
 
16
-        if msg_val.get(message) == current.end() + 1:
17
-            current.end_message = message
15
+        if msg_val.get(message) == sequences[-1].end() + 1:
16
+            sequences[-1].end_message = message
18 17
         else:
19
-            sequences.append(current)
20
-            current = Sequence(start_message=message)
18
+            sequences.append(Sequence(start_message=message))
21 19
 
22
-    sequences.sort(key=lambda s: s.start())
20
+    return sequences            
23 21
 
24
-    merged_sequences: List[Sequence] = []
25
-    previous = sequences[0]
26
-    
27
-    for sequence in sequences[1:]:
28
-        if previous.overlaps(sequence):
29
-            previous.merge(sequence)
22
+
23
+def merge_duplicates(sequences: List[Sequence]) -> List[Sequence]:
24
+    o_sequences = sorted(sequences, key= lambda s : s.start())
25
+    current = o_sequences[0]
26
+
27
+    result = []
28
+        
29
+    for sequence in o_sequences[1:]:
30
+        if current.overlaps(sequence):
31
+            current.merge(sequence)
30 32
         else:
31
-            merged_sequences.append(previous)
32
-            previous = sequence
33
+            result.append(current)
34
+            current = sequence
35
+
36
+    return result
33 37
 
34
-    return [s for s in merged_sequences if s.length() > 1]
35 38
 
36 39
 
40
+def invert_sequences(sequences: List[Sequence]) -> List[Sequence]:
41
+    result = []
42
+
43
+    for previous, current in zip(sequences[:-1],sequences[1:]):
44
+        result.append(Sequence(
45
+            start_message=previous.end_message,
46
+            end_message=current.start_message
47
+        ))
48
+
49
+    return result
50
+
37 51
 def find_holes(messages: List[Message], accepted_max: int = 1_000_000) -> List[Sequence]:
38 52
     """
39 53
     Find the holes in the conversation
40 54
     """
41 55
     sequences = compute_sequences(messages, accepted_max)
42
-    holes = []
43
-    
44
-    for previous, current in zip(sequences[:-1],sequences[1:]):
45
-        holes.append(Sequence(
46
-            start_message=previous.end_message,
47
-            end_message=current.start_message
48
-        ))
56
+    merged = merge_duplicates(sequences)
57
+    merged = [s for s in merged if s.length() > 1]
49 58
 
50
-    return holes
59
+    return invert_sequences(merged)

+ 2
- 2
million/analyze/message_evaluation.py Zobrazit soubor

@@ -7,8 +7,6 @@ memoization: Dict[Message, int] = {}
7 7
 # TODO WIP
8 8
 # - DNS to resolve audio, gif, pictures with counts
9 9
 def __compute__(msg: Message) -> int:
10
-    """ Returns the estimated value counted in this message
11
-    """
12 10
     value = None
13 11
     # Remove any number that is not a digit
14 12
     # TODO parse potential math expressions in content
@@ -23,4 +21,6 @@ def __compute__(msg: Message) -> int:
23 21
     return value
24 22
 
25 23
 def get(msg: Message) -> int:
24
+    """ Returns the estimated value counted in this message
25
+    """
26 26
     return memoization.get(msg, __compute__(msg))

+ 1
- 1
scripts/find_holes.py Zobrazit soubor

@@ -21,7 +21,7 @@ holes = find_holes(filtered)
21 21
 print(len(holes))
22 22
 
23 23
 for hole in holes:
24
-    print(f"{hole.start()} -> {hole.end()} ({hole.length()})")
24
+    print(f"{hole.start() + 1} -> {hole.end() - 1} ({hole.length() - 2})")
25 25
 
26 26
 
27 27
 # lets export a csv file of the holes and the people responsible for them

Načítá se…
Zrušit
Uložit