Browse Source

deleted useless hole class

pull/5/head
Figg 9 months ago
parent
commit
226cc7add9
3 changed files with 14 additions and 33 deletions
  1. 9
    15
      million/analyze/find_holes.py
  2. 0
    11
      million/model/hole.py
  3. 5
    7
      scripts/find_holes.py

+ 9
- 15
million/analyze/find_holes.py View File

1
 
1
 
2
 
2
 
3
 from typing import List
3
 from typing import List
4
-from million.model.hole import Hole
5
 from million.model.message import Message
4
 from million.model.message import Message
6
 from million.model.sequence import Sequence
5
 from million.model.sequence import Sequence
7
 import million.analyze.message_evaluation as msg_val
6
 import million.analyze.message_evaluation as msg_val
20
             sequences.append(current)
19
             sequences.append(current)
21
             current = Sequence(start_message=message)
20
             current = Sequence(start_message=message)
22
 
21
 
23
-    # order the sequences by start
24
     sequences.sort(key=lambda s: s.start())
22
     sequences.sort(key=lambda s: s.start())
25
 
23
 
26
     merged_sequences: List[Sequence] = []
24
     merged_sequences: List[Sequence] = []
33
             merged_sequences.append(previous)
31
             merged_sequences.append(previous)
34
             previous = sequence
32
             previous = sequence
35
 
33
 
36
-    # Having merged the sequences once, any sequence having start = end can be removed
37
     return [s for s in merged_sequences if s.length() > 1]
34
     return [s for s in merged_sequences if s.length() > 1]
38
 
35
 
39
 
36
 
40
-def find_holes(messages: List[Message], accepted_max: int = 1_000_000) -> List[Hole]:
37
+def find_holes(messages: List[Message], accepted_max: int = 1_000_000) -> List[Sequence]:
41
     """
38
     """
42
     Find the holes in the conversation
39
     Find the holes in the conversation
43
     """
40
     """
44
-    merged_sequences = compute_sequences(messages, accepted_max)
41
+    sequences = compute_sequences(messages, accepted_max)
45
     holes = []
42
     holes = []
46
-    for i in range(1, len(merged_sequences)):
47
-        previous_sequence = merged_sequences[i - 1]
48
-        sequence = merged_sequences[i]
49
-        if sequence.start() - previous_sequence.end() > 1:
50
-            holes.append(Hole(
51
-                start=previous_sequence.end(),
52
-                end=sequence.start(),
53
-                start_message=previous_sequence.end_message,
54
-                end_message=sequence.start_message
55
-            ))
43
+    
44
+    for previous, current in zip(sequences[:-1],sequences[1:]):
45
+        holes.append(Sequence(
46
+            start_message=previous.end_message,
47
+            end_message=current.start_message
48
+        ))
49
+
56
     return holes
50
     return holes

+ 0
- 11
million/model/hole.py View File

1
-
2
-from pydantic import BaseModel
3
-
4
-from million.model.message import Message
5
-
6
-
7
-class Hole(BaseModel):
8
-    start: int
9
-    end: int
10
-    start_message: Message
11
-    end_message: Message

+ 5
- 7
scripts/find_holes.py View File

1
 from datetime import datetime
1
 from datetime import datetime
2
 from million.analyze.find_holes import compute_sequences, find_holes
2
 from million.analyze.find_holes import compute_sequences, find_holes
3
-from million.view.bar_chart import plot as bar_chart
4
-from million.analyze.count_participations import count_participations
5
 from million.analyze.retain_counts import retain_counts
3
 from million.analyze.retain_counts import retain_counts
6
 import million.parse.fb_exports as fb
4
 import million.parse.fb_exports as fb
7
 
5
 
14
 
12
 
15
 sequences = compute_sequences(filtered)
13
 sequences = compute_sequences(filtered)
16
 
14
 
17
-actual_counted = sum([s.end() - s.start() for s in sequences])
15
+actual_counted = sum([s.length() for s in sequences])
18
 
16
 
19
 print(f"Actual counted: {actual_counted}")
17
 print(f"Actual counted: {actual_counted}")
20
 
18
 
23
 print(len(holes))
21
 print(len(holes))
24
 
22
 
25
 for hole in holes:
23
 for hole in holes:
26
-    print(f"{hole.start} - {hole.end} ({hole.end - hole.start})")
24
+    print(f"{hole.start()} -> {hole.end()} ({hole.length()})")
27
 
25
 
28
 
26
 
29
 # lets export a csv file of the holes and the people responsible for them
27
 # lets export a csv file of the holes and the people responsible for them
35
         date_end = datetime.utcfromtimestamp(
33
         date_end = datetime.utcfromtimestamp(
36
             hole.end_message.timestamp_ms / 1000.0).strftime('%Y-%m-%d %H:%M:%S')
34
             hole.end_message.timestamp_ms / 1000.0).strftime('%Y-%m-%d %H:%M:%S')
37
         f.write(
35
         f.write(
38
-            f"{hole.start},"
39
-            f"{hole.end},"
40
-            f"{hole.end - hole.start},"
36
+            f"{hole.start()},"
37
+            f"{hole.end()},"
38
+            f"{hole.length()},"
41
             f"{hole.start_message.sender_name},"
39
             f"{hole.start_message.sender_name},"
42
             f"{hole.end_message.sender_name},"
40
             f"{hole.end_message.sender_name},"
43
             f"{date_start},{date_end}\n"
41
             f"{date_start},{date_end}\n"

Loading…
Cancel
Save