Ver código fonte

Content value now extracted differently

+ remove useless script
feature/find_holes
Figg 9 meses atrás
pai
commit
fadff8c69a
2 arquivos alterados com 16 adições e 87 exclusões
  1. 16
    10
      million/analyze/message_evaluation.py
  2. 0
    77
      scripts/find_holes_v2.py

+ 16
- 10
million/analyze/message_evaluation.py Ver arquivo

@@ -7,12 +7,14 @@ import million.analyze.dns_solver as dns
7 7
 _memoization: Dict[Message, int] = {}
8 8
 _dns_solver: dns.DNS_solver = dns.DNS_solver()
9 9
 
10
+
10 11
 def get(msg: Message) -> int:
11 12
     """
12 13
     Returns the estimated value counted in this message
13 14
     """
14 15
     return _memoization.get(msg, _compute(msg))
15 16
 
17
+
16 18
 def reset(msg: Message) -> None:
17 19
     """
18 20
     Drop memorized value of this Message
@@ -20,6 +22,7 @@ def reset(msg: Message) -> None:
20 22
     if msg in _memoization:
21 23
         _memoization.pop(msg)
22 24
 
25
+
23 26
 def reset() -> None:
24 27
     """
25 28
     Drop every memorized message value
@@ -28,20 +31,23 @@ def reset() -> None:
28 31
 
29 32
 
30 33
 def _compute(msg: Message) -> int:
31
-    value = _dns_solver.solve(msg) or \
32
-        _computeContent(msg) or \
33
-        None
34
+    value = _dns_solver.solve(msg) or _computeContent(msg) or None
34 35
 
35 36
     _memoization[msg] = value
36 37
     return value
37 38
 
39
+
40
+# 1🍁 pour 1420 ?
41
+# @Elias Cheddar pour 69 ?
42
+
38 43
 def _computeContent(msg: Message) -> int:
39
-    # TODO parse potential math expressions in content
40
-    match = msg.content and re.search(r"\d+", msg.content)
44
+    if not msg.content:
45
+        return
41 46
     
47
+    s = re.sub(r"[^\s\d]","", msg.content)
48
+    match = re.search(r"\d+", s)
49
+
42 50
     if match:
43
-        value = int(match.group())
44
-    else:
45
-        value = None
46
-    
47
-    return value
51
+        return int(match.group())
52
+
53
+    return None

+ 0
- 77
scripts/find_holes_v2.py Ver arquivo

@@ -1,77 +0,0 @@
1
-from million.analyze.word_finder import retain_counts
2
-import million.analyze.message_evaluation as msg_val
3
-from million.model.sequence import Sequence
4
-import million.parse.fb_exports as fb
5
-
6
-
7
-def _find_value_around_index(messages, value, idx, amplitude) -> int:
8
-    check_value = lambda x: msg_val.get(messages[x]) == value
9
-
10
-    if check_value(idx):
11
-        return idx
12
-
13
-    for offset in range(1, amplitude):
14
-        o_idx = idx + offset * +1
15
-        if check_value(o_idx):
16
-            return o_idx
17
-
18
-        o_idx = idx + offset * -1
19
-        if check_value(o_idx):
20
-            return o_idx
21
-
22
-    return -1
23
-
24
-
25
-def _open_sequence(sequences, msg):
26
-    sequence = Sequence(
27
-        start=msg_val.get(msg), start_message=msg, end=-1, end_message=msg
28
-    )
29
-
30
-    sequences.append(sequence)
31
-
32
-
33
-def _close_sequence(sequences):
34
-    if len(sequences) == 0:
35
-        return
36
-
37
-    sequences[-1].end = msg_val.get(sequences[-1].end_message)
38
-
39
-
40
-def _opened_sequence(sequences):
41
-    return len(sequences) > 0 and sequences[-1].end == -1
42
-
43
-
44
-export = fb.parse_dirfiles("./data/")
45
-messages = retain_counts(export.messages)
46
-
47
-current = 1
48
-base_idx = 0
49
-amplitude = 200
50
-
51
-sequences = []
52
-
53
-while base_idx < len(messages):
54
-    curr_idx = _find_value_around_index(messages, current, base_idx, amplitude)
55
-    print(
56
-        f"searching {current} from [{messages[base_idx]}]\t-> {'Not found' if curr_idx == -1 else 'Itself' if curr_idx == base_idx else messages[curr_idx]}"
57
-    )
58
-
59
-    if curr_idx != -1:  # trouvé
60
-
61
-        if not _opened_sequence(sequences):
62
-            _open_sequence(sequences, messages[curr_idx])
63
-        else:
64
-            sequences[-1].end_message = messages[curr_idx]
65
-
66
-        base_idx = curr_idx + 1
67
-        current += 1
68
-    else:  # pas trouvé
69
-
70
-        # fermer la sequence si ouverte
71
-        if _opened_sequence(sequences):
72
-            _close_sequence(sequences)
73
-
74
-        if msg_val.get(messages[base_idx]) < current:
75
-            base_idx += 1
76
-        else:
77
-            current += 1

Carregando…
Cancelar
Salvar