|
@@ -1,77 +0,0 @@
|
1
|
|
-from million.analyze.word_finder import retain_counts
|
2
|
|
-import million.analyze.message_evaluation as msg_val
|
3
|
|
-from million.model.sequence import Sequence
|
4
|
|
-import million.parse.fb_exports as fb
|
5
|
|
-
|
6
|
|
-
|
7
|
|
-def _find_value_around_index(messages, value, idx, amplitude) -> int:
|
8
|
|
- check_value = lambda x: msg_val.get(messages[x]) == value
|
9
|
|
-
|
10
|
|
- if check_value(idx):
|
11
|
|
- return idx
|
12
|
|
-
|
13
|
|
- for offset in range(1, amplitude):
|
14
|
|
- o_idx = idx + offset * +1
|
15
|
|
- if check_value(o_idx):
|
16
|
|
- return o_idx
|
17
|
|
-
|
18
|
|
- o_idx = idx + offset * -1
|
19
|
|
- if check_value(o_idx):
|
20
|
|
- return o_idx
|
21
|
|
-
|
22
|
|
- return -1
|
23
|
|
-
|
24
|
|
-
|
25
|
|
-def _open_sequence(sequences, msg):
|
26
|
|
- sequence = Sequence(
|
27
|
|
- start=msg_val.get(msg), start_message=msg, end=-1, end_message=msg
|
28
|
|
- )
|
29
|
|
-
|
30
|
|
- sequences.append(sequence)
|
31
|
|
-
|
32
|
|
-
|
33
|
|
-def _close_sequence(sequences):
|
34
|
|
- if len(sequences) == 0:
|
35
|
|
- return
|
36
|
|
-
|
37
|
|
- sequences[-1].end = msg_val.get(sequences[-1].end_message)
|
38
|
|
-
|
39
|
|
-
|
40
|
|
-def _opened_sequence(sequences):
|
41
|
|
- return len(sequences) > 0 and sequences[-1].end == -1
|
42
|
|
-
|
43
|
|
-
|
44
|
|
-export = fb.parse_dirfiles("./data/")
|
45
|
|
-messages = retain_counts(export.messages)
|
46
|
|
-
|
47
|
|
-current = 1
|
48
|
|
-base_idx = 0
|
49
|
|
-amplitude = 200
|
50
|
|
-
|
51
|
|
-sequences = []
|
52
|
|
-
|
53
|
|
-while base_idx < len(messages):
|
54
|
|
- curr_idx = _find_value_around_index(messages, current, base_idx, amplitude)
|
55
|
|
- print(
|
56
|
|
- f"searching {current} from [{messages[base_idx]}]\t-> {'Not found' if curr_idx == -1 else 'Itself' if curr_idx == base_idx else messages[curr_idx]}"
|
57
|
|
- )
|
58
|
|
-
|
59
|
|
- if curr_idx != -1: # trouvé
|
60
|
|
-
|
61
|
|
- if not _opened_sequence(sequences):
|
62
|
|
- _open_sequence(sequences, messages[curr_idx])
|
63
|
|
- else:
|
64
|
|
- sequences[-1].end_message = messages[curr_idx]
|
65
|
|
-
|
66
|
|
- base_idx = curr_idx + 1
|
67
|
|
- current += 1
|
68
|
|
- else: # pas trouvé
|
69
|
|
-
|
70
|
|
- # fermer la sequence si ouverte
|
71
|
|
- if _opened_sequence(sequences):
|
72
|
|
- _close_sequence(sequences)
|
73
|
|
-
|
74
|
|
- if msg_val.get(messages[base_idx]) < current:
|
75
|
|
- base_idx += 1
|
76
|
|
- else:
|
77
|
|
- current += 1
|