You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

find_gromots.py 1.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. from datetime import datetime
  2. from million.analyze.word_finder import findWords
  3. from million.parse.fb_exports import FacebookExportParser
  4. DATA_PATH = './data/'
  5. parser = FacebookExportParser()
  6. export = parser.parse(DATA_PATH)
  7. gros_mots = [
  8. '.*merde.*',
  9. 'sexe',
  10. 'pute',
  11. 'pé?dé?',
  12. 'putain',
  13. 'bite',
  14. 'encul.*',
  15. 'cul',
  16. 'nichon',
  17. 'gueule',
  18. 'con(ne)?',
  19. 'chatte',
  20. 'niqu.*',
  21. 'chi(é|e).*',
  22. 'bais.*',
  23. 'couill.*'
  24. ]
  25. msg_gros_mots = findWords(export.messages, gros_mots)
  26. msg_gros_mots_grp = {}
  27. for msg in msg_gros_mots:
  28. if msg.sender_name not in msg_gros_mots_grp: msg_gros_mots_grp[msg.sender_name] = []
  29. msg_gros_mots_grp[msg.sender_name].append(msg)
  30. for name in sorted(msg_gros_mots_grp, key = lambda k: len(msg_gros_mots_grp[k])):
  31. print(name)
  32. for msg in msg_gros_mots_grp[name]:
  33. time = datetime.fromtimestamp(msg.timestamp_ms / 1000)
  34. time_str = time.strftime("%d/%m/%Y %H:%M:%S")
  35. print(f"\t{time_str} : {msg.content}")