|
@@ -0,0 +1,77 @@
|
|
1
|
+from million.analyze.word_finder import retain_counts
|
|
2
|
+import million.analyze.message_evaluation as msg_val
|
|
3
|
+from million.model.sequence import Sequence
|
|
4
|
+import million.parse.fb_exports as fb
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+def _find_value_around_index(messages, value, idx, amplitude) -> int:
|
|
8
|
+ check_value = lambda x: msg_val.get(messages[x]) == value
|
|
9
|
+
|
|
10
|
+ if check_value(idx):
|
|
11
|
+ return idx
|
|
12
|
+
|
|
13
|
+ for offset in range(1, amplitude):
|
|
14
|
+ o_idx = idx + offset * +1
|
|
15
|
+ if check_value(o_idx):
|
|
16
|
+ return o_idx
|
|
17
|
+
|
|
18
|
+ o_idx = idx + offset * -1
|
|
19
|
+ if check_value(o_idx):
|
|
20
|
+ return o_idx
|
|
21
|
+
|
|
22
|
+ return -1
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+def _open_sequence(sequences, msg):
|
|
26
|
+ sequence = Sequence(
|
|
27
|
+ start=msg_val.get(msg), start_message=msg, end=-1, end_message=msg
|
|
28
|
+ )
|
|
29
|
+
|
|
30
|
+ sequences.append(sequence)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+def _close_sequence(sequences):
|
|
34
|
+ if len(sequences) == 0:
|
|
35
|
+ return
|
|
36
|
+
|
|
37
|
+ sequences[-1].end = msg_val.get(sequences[-1].end_message)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+def _opened_sequence(sequences):
|
|
41
|
+ return len(sequences) > 0 and sequences[-1].end == -1
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+export = fb.parse_dirfiles("./data/")
|
|
45
|
+messages = retain_counts(export.messages)
|
|
46
|
+
|
|
47
|
+current = 1
|
|
48
|
+base_idx = 0
|
|
49
|
+amplitude = 200
|
|
50
|
+
|
|
51
|
+sequences = []
|
|
52
|
+
|
|
53
|
+while base_idx < len(messages):
|
|
54
|
+ curr_idx = _find_value_around_index(messages, current, base_idx, amplitude)
|
|
55
|
+ print(
|
|
56
|
+ f"searching {current} from [{messages[base_idx]}]\t-> {'Not found' if curr_idx == -1 else 'Itself' if curr_idx == base_idx else messages[curr_idx]}"
|
|
57
|
+ )
|
|
58
|
+
|
|
59
|
+ if curr_idx != -1: # trouvé
|
|
60
|
+
|
|
61
|
+ if not _opened_sequence(sequences):
|
|
62
|
+ _open_sequence(sequences, messages[curr_idx])
|
|
63
|
+ else:
|
|
64
|
+ sequences[-1].end_message = messages[curr_idx]
|
|
65
|
+
|
|
66
|
+ base_idx = curr_idx + 1
|
|
67
|
+ current += 1
|
|
68
|
+ else: # pas trouvé
|
|
69
|
+
|
|
70
|
+ # fermer la sequence si ouverte
|
|
71
|
+ if _opened_sequence(sequences):
|
|
72
|
+ _close_sequence(sequences)
|
|
73
|
+
|
|
74
|
+ if msg_val.get(messages[base_idx]) < current:
|
|
75
|
+ base_idx += 1
|
|
76
|
+ else:
|
|
77
|
+ current += 1
|