You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fb_exports.py 1.2KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. import os, re
  2. from typing import List
  3. from million.model.fb_export import FacebookExport
  4. def is_file_valid(file_name: str) -> bool:
  5. # NOTE is there a way to peek inside a json file to
  6. # check its internal structure ?
  7. return os.path.splitext(file_name)[-1].lower() == '.json'
  8. def valid_dirfiles(file_dir: str) -> List[str]:
  9. return [os.path.join(file_dir, file_name)
  10. for file_name in os.listdir(file_dir)
  11. if is_file_valid(file_name)]
  12. def parse_file(file_name: str) -> FacebookExport:
  13. if not is_file_valid(file_name): return None
  14. with open(file_name, 'rb') as f:
  15. json_data = __read_broken_fb_json(f.read())
  16. return FacebookExport.model_validate_json(json_data)
  17. def parse_dirfiles(file_dir: str) -> FacebookExport:
  18. exports = [parse_file(f) for f in valid_dirfiles(file_dir)]
  19. result = exports[0]
  20. for ex in exports[1:]:
  21. result.merge(ex)
  22. result.sort()
  23. return result
  24. def __read_broken_fb_json(binary_data):
  25. # https://stackoverflow.com/questions/50008296/facebook-json-badly-encoded
  26. repaired = re.sub(
  27. rb'\\u00([\da-f]{2})',
  28. lambda m: bytes.fromhex(m.group(1).decode()),
  29. binary_data
  30. )
  31. return repaired.decode('utf8')