Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

find_holes.py 1.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. from datetime import datetime
  2. import million.analyze.find_holes as fh
  3. from million.analyze.word_finder import retain_counts
  4. import million.parse.fb_exports as fb
  5. DATA_PATH = "./data/"
  6. export = fb.parse_dirfiles(DATA_PATH)
  7. filtered = retain_counts(export.messages)
  8. sequences = fh.compute_sequences(filtered)
  9. actual_counted = sum([s.length() for s in sequences])
  10. print(f"Actual counted: {actual_counted}")
  11. merged = fh.merge_duplicates(sequences)
  12. merged = [s for s in merged if s.length() > 1]
  13. holes = fh.find_holes(filtered)
  14. print(len(holes))
  15. for hole in holes:
  16. print(f"{hole.start() + 1} -> {hole.end() - 1} ({hole.length() - 2})")
  17. # lets export a csv file of the holes and the people responsible for them
  18. with open("output/holes.csv", "w") as f:
  19. f.write("début,fin,taille,responsable1,responsable2,date1,date2\n")
  20. for hole in holes:
  21. date_start = hole.start_message.date_time.strftime("%Y-%m-%d %H:%M:%S")
  22. date_end = hole.end_message.date_time.strftime("%Y-%m-%d %H:%M:%S")
  23. f.write(
  24. f"{hole.start()},"
  25. f"{hole.end()},"
  26. f"{hole.length()},"
  27. f"{hole.start_message.sender_name},"
  28. f"{hole.end_message.sender_name},"
  29. f"{date_start},{date_end}\n"
  30. )