You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fb_exports.py 1.3KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. import json, os, re
  2. from typing import List
  3. from million.model.fb_export import FacebookExport
  4. def is_file_valid(file_name: str) -> bool:
  5. return os.path.splitext(file_name)[-1].lower() == '.json'
  6. def valid_dirfiles(file_dir: str) -> List[str]:
  7. return [os.path.join(file_dir, file_name)
  8. for file_name in os.listdir(file_dir)
  9. if is_file_valid(file_name)]
  10. def parse_file(file_name: str) -> FacebookExport:
  11. if not is_file_valid(file_name): return None
  12. with open(file_name, 'rb') as f:
  13. fixed_json = __read_broken_fb_json(f.read())
  14. json_data = json.loads(fixed_json)
  15. return (FacebookExport(**json_data))
  16. def parse_dirfiles(file_dir: str) -> FacebookExport:
  17. exports = [parse_file(f) for f in valid_dirfiles(file_dir)]
  18. if len(exports) == 0: return
  19. for other in exports[1:]:
  20. exports[0].messages.extend(other.messages)
  21. exports[0].participants.extend(other.participants)
  22. exports[0].messages.sort(key = lambda m: m.timestamp_ms)
  23. exports[0].participants = set(exports[0].participants)
  24. return exports[0]
  25. def __read_broken_fb_json(binary_data):
  26. repaired = re.sub(
  27. rb'\\u00([\da-f]{2})',
  28. lambda m: bytes.fromhex(m.group(1).decode()),
  29. binary_data
  30. )
  31. return repaired.decode('utf8')