You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fb_exports.py 1.1KB

12345678910111213141516171819202122232425262728293031323334353637
  1. import json
  2. import os
  3. import re
  4. from typing import List
  5. from million.model.fb_export import FacebookExport
  6. from million.model.message import Message
  7. class FacebookExportParser:
  8. def __init__(self):
  9. pass
  10. def parse(self, file_dir) -> FacebookExport:
  11. files = [file_dir +
  12. f for f in os.listdir(file_dir) if f.endswith('.json')]
  13. messages = []
  14. participants = []
  15. for file in files:
  16. print(file)
  17. with open(file, 'rb') as f:
  18. json_data = self.__read_broken_fb_json(f.read())
  19. messages += [Message(**m) for m in json_data['messages']]
  20. participants += json_data['participants']
  21. messages.sort(key=lambda m: m.timestamp_ms)
  22. return FacebookExport(messages=messages, participants=participants)
  23. def __read_broken_fb_json(self, binary_data):
  24. repaired = re.sub(
  25. rb'\\u00([\da-f]{2})',
  26. lambda m: bytes.fromhex(m.group(1).decode()),
  27. binary_data
  28. )
  29. return json.loads(repaired.decode('utf8'))