import os, re from typing import List from million.model.fb_export import FacebookExport def is_file_valid(file_name: str) -> bool: """ Check if this file can be parsed into a FacebookExport (Actually only check if its a json file atm) """ # NOTE is there a way to peek inside a json file to # check its internal structure ? return os.path.splitext(file_name)[-1].lower() == '.json' def valid_dirfiles(file_dir: str) -> List[str]: """ Returns a list of parsable files contained in this directory """ return [os.path.join(file_dir, file_name) for file_name in os.listdir(file_dir) if is_file_valid(file_name)] def parse_file(file_name: str) -> FacebookExport: """ Parses a single parsable file into a FacebookExport Object """ if not is_file_valid(file_name): return None with open(file_name, 'rb') as f: json_data = __read_broken_fb_json(f.read()) return FacebookExport.model_validate_json(json_data) def parse_dirfiles(file_dir: str) -> FacebookExport: """ Parses every parsable files inside this directory into a single FacebookExport Object """ exports = [parse_file(f) for f in valid_dirfiles(file_dir)] result = exports[0] for ex in exports[1:]: result.merge(ex) result.sort() return result def __read_broken_fb_json(binary_data): # https://stackoverflow.com/questions/50008296/facebook-json-badly-encoded repaired = re.sub( rb'\\u00([\da-f]{2})', lambda m: bytes.fromhex(m.group(1).decode()), binary_data ) return repaired.decode('utf8')