1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- from million.analyze.word_finder import retain_counts
- import million.analyze.message_evaluation as msg_val
- from million.model.sequence import Sequence
- import million.parse.fb_exports as fb
-
-
- def _find_value_around_index(messages, value, idx, amplitude) -> int:
- check_value = lambda x: msg_val.get(messages[x]) == value
-
- if check_value(idx):
- return idx
-
- for offset in range(1, amplitude):
- o_idx = idx + offset * +1
- if check_value(o_idx):
- return o_idx
-
- o_idx = idx + offset * -1
- if check_value(o_idx):
- return o_idx
-
- return -1
-
-
- def _open_sequence(sequences, msg):
- sequence = Sequence(
- start=msg_val.get(msg), start_message=msg, end=-1, end_message=msg
- )
-
- sequences.append(sequence)
-
-
- def _close_sequence(sequences):
- if len(sequences) == 0:
- return
-
- sequences[-1].end = msg_val.get(sequences[-1].end_message)
-
-
- def _opened_sequence(sequences):
- return len(sequences) > 0 and sequences[-1].end == -1
-
-
- export = fb.parse_dirfiles("./data/")
- messages = retain_counts(export.messages)
-
- current = 1
- base_idx = 0
- amplitude = 200
-
- sequences = []
-
- while base_idx < len(messages):
- curr_idx = _find_value_around_index(messages, current, base_idx, amplitude)
- print(
- f"searching {current} from [{messages[base_idx]}]\t-> {'Not found' if curr_idx == -1 else 'Itself' if curr_idx == base_idx else messages[curr_idx]}"
- )
-
- if curr_idx != -1: # trouvé
-
- if not _opened_sequence(sequences):
- _open_sequence(sequences, messages[curr_idx])
- else:
- sequences[-1].end_message = messages[curr_idx]
-
- base_idx = curr_idx + 1
- current += 1
- else: # pas trouvé
-
- # fermer la sequence si ouverte
- if _opened_sequence(sequences):
- _close_sequence(sequences)
-
- if msg_val.get(messages[base_idx]) < current:
- base_idx += 1
- else:
- current += 1
|