Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

find_holes_v2.py 1.9KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. from million.analyze.word_finder import retain_counts
  2. import million.analyze.message_evaluation as msg_val
  3. from million.model.sequence import Sequence
  4. import million.parse.fb_exports as fb
  5. def _find_value_around_index(messages, value, idx, amplitude) -> int:
  6. check_value = lambda x: msg_val.get(messages[x]) == value
  7. if check_value(idx):
  8. return idx
  9. for offset in range(1, amplitude):
  10. o_idx = idx + offset * +1
  11. if check_value(o_idx):
  12. return o_idx
  13. o_idx = idx + offset * -1
  14. if check_value(o_idx):
  15. return o_idx
  16. return -1
  17. def _open_sequence(sequences, msg):
  18. sequence = Sequence(
  19. start=msg_val.get(msg), start_message=msg, end=-1, end_message=msg
  20. )
  21. sequences.append(sequence)
  22. def _close_sequence(sequences):
  23. if len(sequences) == 0:
  24. return
  25. sequences[-1].end = msg_val.get(sequences[-1].end_message)
  26. def _opened_sequence(sequences):
  27. return len(sequences) > 0 and sequences[-1].end == -1
  28. export = fb.parse_dirfiles("./data/")
  29. messages = retain_counts(export.messages)
  30. current = 1
  31. base_idx = 0
  32. amplitude = 200
  33. sequences = []
  34. while base_idx < len(messages):
  35. curr_idx = _find_value_around_index(messages, current, base_idx, amplitude)
  36. print(
  37. f"searching {current} from [{messages[base_idx]}]\t-> {'Not found' if curr_idx == -1 else 'Itself' if curr_idx == base_idx else messages[curr_idx]}"
  38. )
  39. if curr_idx != -1: # trouvé
  40. if not _opened_sequence(sequences):
  41. _open_sequence(sequences, messages[curr_idx])
  42. else:
  43. sequences[-1].end_message = messages[curr_idx]
  44. base_idx = curr_idx + 1
  45. current += 1
  46. else: # pas trouvé
  47. # fermer la sequence si ouverte
  48. if _opened_sequence(sequences):
  49. _close_sequence(sequences)
  50. if msg_val.get(messages[base_idx]) < current:
  51. base_idx += 1
  52. else:
  53. current += 1