12345678910111213141516171819202122232425262728293031323334353637 |
-
- import json
- import os
- import re
- from typing import List
-
- from million.model.fb_export import FacebookExport
- from million.model.message import Message
-
-
- class FacebookExportParser:
-
- def __init__(self):
- pass
-
- def parse(self, file_dir) -> FacebookExport:
- files = [file_dir +
- f for f in os.listdir(file_dir) if f.endswith('.json')]
- messages = []
- participants = []
- for file in files:
- print(file)
- with open(file, 'rb') as f:
- json_data = self.__read_broken_fb_json(f.read())
- messages += [Message(**m) for m in json_data['messages']]
- participants += json_data['participants']
-
- messages.sort(key=lambda m: m.timestamp_ms)
- return FacebookExport(messages=messages, participants=participants)
-
- def __read_broken_fb_json(self, binary_data):
- repaired = re.sub(
- rb'\\u00([\da-f]{2})',
- lambda m: bytes.fromhex(m.group(1).decode()),
- binary_data
- )
- return json.loads(repaired.decode('utf8'))
|