from million.analyze.word_finder import retain_counts import million.analyze.message_evaluation as msg_val from million.model.sequence import Sequence import million.parse.fb_exports as fb def _find_value_around_index(messages, value, idx, amplitude) -> int: check_value = lambda x: msg_val.get(messages[x]) == value if check_value(idx): return idx for offset in range(1, amplitude): o_idx = idx + offset * +1 if check_value(o_idx): return o_idx o_idx = idx + offset * -1 if check_value(o_idx): return o_idx return -1 def _open_sequence(sequences, msg): sequence = Sequence( start=msg_val.get(msg), start_message=msg, end=-1, end_message=msg ) sequences.append(sequence) def _close_sequence(sequences): if len(sequences) == 0: return sequences[-1].end = msg_val.get(sequences[-1].end_message) def _opened_sequence(sequences): return len(sequences) > 0 and sequences[-1].end == -1 export = fb.parse_dirfiles("./data/") messages = retain_counts(export.messages) current = 1 base_idx = 0 amplitude = 200 sequences = [] while base_idx < len(messages): curr_idx = _find_value_around_index(messages, current, base_idx, amplitude) print( f"searching {current} from [{messages[base_idx]}]\t-> {'Not found' if curr_idx == -1 else 'Itself' if curr_idx == base_idx else messages[curr_idx]}" ) if curr_idx != -1: # trouvé if not _opened_sequence(sequences): _open_sequence(sequences, messages[curr_idx]) else: sequences[-1].end_message = messages[curr_idx] base_idx = curr_idx + 1 current += 1 else: # pas trouvé # fermer la sequence si ouverte if _opened_sequence(sequences): _close_sequence(sequences) if msg_val.get(messages[base_idx]) < current: base_idx += 1 else: current += 1