瀏覽代碼

nouveaux scripts pour chercher les valeurs manquantes

feature/find_holes
Figg 7 月之前
父節點
當前提交
391fae71f3
共有 2 個文件被更改,包括 102 次插入0 次删除
  1. 77
    0
      scripts/find_holes_v2.py
  2. 25
    0
      scripts/find_missing.py

+ 77
- 0
scripts/find_holes_v2.py 查看文件

@@ -0,0 +1,77 @@
1
+from million.analyze.word_finder import retain_counts
2
+import million.analyze.message_evaluation as msg_val
3
+from million.model.sequence import Sequence
4
+import million.parse.fb_exports as fb
5
+
6
+
7
+def _find_value_around_index(messages, value, idx, amplitude) -> int:
8
+    check_value = lambda x: msg_val.get(messages[x]) == value
9
+
10
+    if check_value(idx):
11
+        return idx
12
+
13
+    for offset in range(1, amplitude):
14
+        o_idx = idx + offset * +1
15
+        if check_value(o_idx):
16
+            return o_idx
17
+
18
+        o_idx = idx + offset * -1
19
+        if check_value(o_idx):
20
+            return o_idx
21
+
22
+    return -1
23
+
24
+
25
+def _open_sequence(sequences, msg):
26
+    sequence = Sequence(
27
+        start=msg_val.get(msg), start_message=msg, end=-1, end_message=msg
28
+    )
29
+
30
+    sequences.append(sequence)
31
+
32
+
33
+def _close_sequence(sequences):
34
+    if len(sequences) == 0:
35
+        return
36
+
37
+    sequences[-1].end = msg_val.get(sequences[-1].end_message)
38
+
39
+
40
+def _opened_sequence(sequences):
41
+    return len(sequences) > 0 and sequences[-1].end == -1
42
+
43
+
44
+export = fb.parse_dirfiles("./data/")
45
+messages = retain_counts(export.messages)
46
+
47
+current = 1
48
+base_idx = 0
49
+amplitude = 200
50
+
51
+sequences = []
52
+
53
+while base_idx < len(messages):
54
+    curr_idx = _find_value_around_index(messages, current, base_idx, amplitude)
55
+    print(
56
+        f"searching {current} from [{messages[base_idx]}]\t-> {'Not found' if curr_idx == -1 else 'Itself' if curr_idx == base_idx else messages[curr_idx]}"
57
+    )
58
+
59
+    if curr_idx != -1:  # trouvé
60
+
61
+        if not _opened_sequence(sequences):
62
+            _open_sequence(sequences, messages[curr_idx])
63
+        else:
64
+            sequences[-1].end_message = messages[curr_idx]
65
+
66
+        base_idx = curr_idx + 1
67
+        current += 1
68
+    else:  # pas trouvé
69
+
70
+        # fermer la sequence si ouverte
71
+        if _opened_sequence(sequences):
72
+            _close_sequence(sequences)
73
+
74
+        if msg_val.get(messages[base_idx]) < current:
75
+            base_idx += 1
76
+        else:
77
+            current += 1

+ 25
- 0
scripts/find_missing.py 查看文件

@@ -0,0 +1,25 @@
1
+import million.analyze.message_evaluation as msg_val
2
+import million.parse.fb_exports as fb
3
+import time
4
+
5
+export = fb.parse_dirfiles("./data")
6
+messages = export.messages
7
+
8
+counts = {val for m in messages if (val := msg_val.get(m)) and val <= 1_000_000}
9
+counts = sorted(counts)
10
+
11
+expected_value = 1
12
+intervals = []
13
+
14
+for value in counts:
15
+    if value != expected_value:
16
+        interval_length = value - expected_value
17
+
18
+        if interval_length == 1:
19
+            intervals.append(str(expected_value))
20
+        else:
21
+            intervals.append(f"{expected_value}..{value - 1}")
22
+
23
+    expected_value = value + 1
24
+
25
+print(intervals)

Loading…
取消
儲存