import os, re from typing import List from million.model.fb_export import FacebookExport def is_file_valid(file_name: str) -> bool: # NOTE is there a way to peek inside a json file to # check its internal structure ? return os.path.splitext(file_name)[-1].lower() == '.json' def valid_dirfiles(file_dir: str) -> List[str]: return [os.path.join(file_dir, file_name) for file_name in os.listdir(file_dir) if is_file_valid(file_name)] def parse_file(file_name: str) -> FacebookExport: if not is_file_valid(file_name): return None with open(file_name, 'rb') as f: json_data = __read_broken_fb_json(f.read()) return FacebookExport.model_validate_json(json_data) def parse_dirfiles(file_dir: str) -> FacebookExport: exports = [parse_file(f) for f in valid_dirfiles(file_dir)] result = exports[0] for ex in exports[1:]: result.merge(ex) result.sort() return result def __read_broken_fb_json(binary_data): # https://stackoverflow.com/questions/50008296/facebook-json-badly-encoded repaired = re.sub( rb'\\u00([\da-f]{2})', lambda m: bytes.fromhex(m.group(1).decode()), binary_data ) return repaired.decode('utf8')