You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fb_exports.py 1.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. import os, re
  2. from typing import List
  3. from million.model.fb_export import FacebookExport
  4. def is_file_valid(file_name: str) -> bool:
  5. """
  6. Check if this file can be parsed into a FacebookExport
  7. (Actually only check if its a json file atm)
  8. """
  9. # NOTE is there a way to peek inside a json file to
  10. # check its internal structure ?
  11. return os.path.splitext(file_name)[-1].lower() == '.json'
  12. def valid_dirfiles(file_dir: str) -> List[str]:
  13. """
  14. Returns a list of parsable files contained
  15. in this directory
  16. """
  17. return [os.path.join(file_dir, file_name)
  18. for file_name in os.listdir(file_dir)
  19. if is_file_valid(file_name)]
  20. def parse_file(file_name: str) -> FacebookExport:
  21. """
  22. Parses a single parsable file into a FacebookExport Object
  23. """
  24. if not is_file_valid(file_name): return None
  25. with open(file_name, 'rb') as f:
  26. json_data = __read_broken_fb_json(f.read())
  27. return FacebookExport.model_validate_json(json_data)
  28. def parse_dirfiles(file_dir: str) -> FacebookExport:
  29. """
  30. Parses every parsable files inside this directory
  31. into a single FacebookExport Object
  32. """
  33. exports = [parse_file(f) for f in valid_dirfiles(file_dir)]
  34. result = exports[0]
  35. for ex in exports[1:]:
  36. result.merge(ex)
  37. result.sort()
  38. return result
  39. def __read_broken_fb_json(binary_data):
  40. # https://stackoverflow.com/questions/50008296/facebook-json-badly-encoded
  41. repaired = re.sub(
  42. rb'\\u00([\da-f]{2})',
  43. lambda m: bytes.fromhex(m.group(1).decode()),
  44. binary_data
  45. )
  46. return repaired.decode('utf8')