find_holes.py 1.4KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. from datetime import datetime
  2. from million.analyze.find_holes import compute_sequences, find_holes
  3. from million.analyze.retain_counts import retain_counts
  4. import million.parse.fb_exports as fb
  5. DATA_PATH = './data/'
  6. export = fb.parse_dirfiles(DATA_PATH)
  7. filtered = retain_counts(export.messages)
  8. sequences = compute_sequences(filtered)
  9. actual_counted = sum([s.length() for s in sequences])
  10. print(f"Actual counted: {actual_counted}")
  11. holes = find_holes(filtered)
  12. for hole in holes:
  13. print(f"{hole.start() + 1} -> {hole.end() - 1} ({hole.length() - 2})")
  14. print(f"Total holes: {len(holes)}")
  15. print(f"Total holes size: {sum([h.length() for h in holes if h.length() < 10_000])}")
  16. # lets export a csv file of the holes and the people responsible for them
  17. with open('output/holes.csv', 'w') as f:
  18. f.write('début,fin,taille,responsable1,responsable2,date1,date2\n')
  19. for hole in holes:
  20. date_start = datetime.utcfromtimestamp(
  21. hole.start_message.timestamp_ms / 1000.0).strftime('%Y-%m-%d %H:%M:%S')
  22. date_end = datetime.utcfromtimestamp(
  23. hole.end_message.timestamp_ms / 1000.0).strftime('%Y-%m-%d %H:%M:%S')
  24. f.write(
  25. f"{hole.start()},"
  26. f"{hole.end()},"
  27. f"{hole.length()},"
  28. f"{hole.start_message.sender_name},"
  29. f"{hole.end_message.sender_name},"
  30. f"{date_start},{date_end}\n"
  31. )