import json import os import re from typing import List from million.model.fb_export import FacebookExport from million.model.message import Message class FacebookExportParser: def __init__(self): pass def parse(self, file_dir) -> FacebookExport: files = [file_dir + f for f in os.listdir(file_dir) if f.endswith('.json')] messages = [] participants = [] for file in files: print(file) with open(file, 'rb') as f: json_data = self.__read_broken_fb_json(f.read()) messages += [Message(**m) for m in json_data['messages']] participants += json_data['participants'] messages.sort(key=lambda m: m.timestamp_ms) return FacebookExport(messages=messages, participants=participants) def __read_broken_fb_json(self, binary_data): repaired = re.sub( rb'\\u00([\da-f]{2})', lambda m: bytes.fromhex(m.group(1).decode()), binary_data ) return json.loads(repaired.decode('utf8'))