1234567891011121314151617181920212223242526272829303132333435363738394041424344 |
-
- import os, re
- from typing import List
-
- from million.model.fb_export import FacebookExport
-
-
- def is_file_valid(file_name: str) -> bool:
- # NOTE is there a way to peek inside a json file to
- # check its internal structure ?
- return os.path.splitext(file_name)[-1].lower() == '.json'
-
- def valid_dirfiles(file_dir: str) -> List[str]:
- return [os.path.join(file_dir, file_name)
- for file_name in os.listdir(file_dir)
- if is_file_valid(file_name)]
-
- def parse_file(file_name: str) -> FacebookExport:
- if not is_file_valid(file_name): return None
-
- with open(file_name, 'rb') as f:
- json_data = __read_broken_fb_json(f.read())
- return FacebookExport.model_validate_json(json_data)
-
- def parse_dirfiles(file_dir: str) -> FacebookExport:
- exports = [parse_file(f) for f in valid_dirfiles(file_dir)]
-
- result = exports[0]
-
- for ex in exports[1:]:
- result.merge(ex)
-
- result.sort()
- return result
-
- def __read_broken_fb_json(binary_data):
- # https://stackoverflow.com/questions/50008296/facebook-json-badly-encoded
- repaired = re.sub(
- rb'\\u00([\da-f]{2})',
- lambda m: bytes.fromhex(m.group(1).decode()),
- binary_data
- )
-
- return repaired.decode('utf8')
|