您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

find_gromots.py 910B

123456789101112131415161718192021222324252627282930313233343536373839
  1. from datetime import datetime
  2. from million.analyze.word_finder import find_words
  3. import million.parse.fb_exports as fb
  4. DATA_PATH = './data/'
  5. gros_mots = [
  6. '.*merde.*',
  7. 'sexe',
  8. 'pute',
  9. 'pé?dé?',
  10. 'putain',
  11. 'bite',
  12. 'encul.*',
  13. 'cul',
  14. 'nichon',
  15. 'gueule',
  16. 'con(ne)?',
  17. 'chatte',
  18. 'niqu.*',
  19. 'chi(é|e).*',
  20. 'bais.*'
  21. ]
  22. export = fb.parse_dirfiles(DATA_PATH)
  23. msg_gros_mots = find_words(export.messages, gros_mots)
  24. msg_gros_mots_grp = {}
  25. for msg in msg_gros_mots:
  26. if msg.sender_name not in msg_gros_mots_grp: msg_gros_mots_grp[msg.sender_name] = []
  27. msg_gros_mots_grp[msg.sender_name].append(msg)
  28. for name in sorted(msg_gros_mots_grp, key = lambda k: len(msg_gros_mots_grp[k])):
  29. print(name)
  30. for msg in msg_gros_mots_grp[name]:
  31. time_str = msg.date_time.strftime("%d/%m/%Y %H:%M:%S")
  32. print(f"\t{time_str} : {msg.content}")