Browse Source

Auto-parsing timestamp_ms in JSON Message

from int to datetime object
feature/message_filters
Figg 9 months ago
parent
commit
9a171af035
4 changed files with 28 additions and 19 deletions
  1. 1
    1
      million/model/fb_export.py
  2. 19
    7
      million/model/message.py
  3. 3
    4
      scripts/find_gromots.py
  4. 5
    7
      scripts/find_holes.py

+ 1
- 1
million/model/fb_export.py View File

35
             self.magic_words.update(other.magic_words)
35
             self.magic_words.update(other.magic_words)
36
 
36
 
37
     def sort(self) -> None:
37
     def sort(self) -> None:
38
-        self.messages.sort(key = lambda m: m.timestamp_ms)
38
+        self.messages.sort(key = lambda m: m.date_time)
39
 
39
 
40
 
40
 
41
     def __eq__(self, other: FacebookExport) -> bool:
41
     def __eq__(self, other: FacebookExport) -> bool:

+ 19
- 7
million/model/message.py View File

1
 from datetime import datetime
1
 from datetime import datetime
2
 from typing import Any, List
2
 from typing import Any, List
3
 from uuid import uuid4
3
 from uuid import uuid4
4
-from pydantic import BaseModel, PrivateAttr, computed_field
4
+from pydantic import BaseModel, Field, PrivateAttr, computed_field, validator
5
+
5
 
6
 
6
 class Reaction(BaseModel):
7
 class Reaction(BaseModel):
7
     reaction: str
8
     reaction: str
8
     actor: str
9
     actor: str
9
 
10
 
11
+
10
 class AudioFile(BaseModel):
12
 class AudioFile(BaseModel):
11
     uri: str
13
     uri: str
12
     creation_timestamp: int
14
     creation_timestamp: int
13
 
15
 
16
+
14
 class Video(BaseModel):
17
 class Video(BaseModel):
15
     uri: str
18
     uri: str
16
     creation_timestamp: int
19
     creation_timestamp: int
17
 
20
 
21
+
18
 class Photo(BaseModel):
22
 class Photo(BaseModel):
19
     uri: str
23
     uri: str
20
     creation_timestamp: int
24
     creation_timestamp: int
21
 
25
 
26
+
22
 class Gif(BaseModel):
27
 class Gif(BaseModel):
23
     uri: str
28
     uri: str
24
 
29
 
30
+
25
 class Share(BaseModel):
31
 class Share(BaseModel):
26
     link: str
32
     link: str
27
     share_text: str
33
     share_text: str
28
 
34
 
35
+
29
 class Sticker(BaseModel):
36
 class Sticker(BaseModel):
30
     uri: str
37
     uri: str
31
     ai_stickers: List[Any]
38
     ai_stickers: List[Any]
32
 
39
 
40
+
33
 class Message(BaseModel):
41
 class Message(BaseModel):
34
     sender_name: str
42
     sender_name: str
35
-    timestamp_ms: int
43
+    date_time: datetime = Field(alias="timestamp_ms")
36
     content: str | None = None
44
     content: str | None = None
37
     sticker: Sticker | None = None
45
     sticker: Sticker | None = None
38
     share: Share | None = None
46
     share: Share | None = None
47
 
55
 
48
     _id: str = PrivateAttr(default_factory=lambda: str(uuid4()))
56
     _id: str = PrivateAttr(default_factory=lambda: str(uuid4()))
49
 
57
 
50
-
51
     def __str__(self) -> str:
58
     def __str__(self) -> str:
52
-        dt = datetime.fromtimestamp(self.timestamp_ms / 1000)
53
-        dt_str = dt.strftime("%d/%m/%Y, %H:%M:%S")
59
+        dt_str = self.date_time.strftime("%d/%m/%Y, %H:%M:%S")
54
         return f"{self.sender_name}({dt_str}) : {self.content}"
60
         return f"{self.sender_name}({dt_str}) : {self.content}"
55
 
61
 
56
     def __hash__(self) -> int:
62
     def __hash__(self) -> int:
57
         return hash(self.item_id)
63
         return hash(self.item_id)
58
-    
64
+
59
     @computed_field
65
     @computed_field
60
     @property
66
     @property
61
     def item_id(self) -> str:
67
     def item_id(self) -> str:
62
-        return self._id
68
+        return self._id
69
+
70
+    @validator("date_time", pre=True, always=True)
71
+    def parse_timestamp(cls, v):
72
+        if isinstance(v, int):
73
+            return datetime.fromtimestamp(v / 1000)
74
+        return v

+ 3
- 4
scripts/find_gromots.py View File

1
 from datetime import datetime
1
 from datetime import datetime
2
-from million.analyze.word_finder import findWords
2
+from million.analyze.word_finder import find_words
3
 import million.parse.fb_exports as fb
3
 import million.parse.fb_exports as fb
4
 
4
 
5
 
5
 
23
     ]
23
     ]
24
 
24
 
25
 export = fb.parse_dirfiles(DATA_PATH)
25
 export = fb.parse_dirfiles(DATA_PATH)
26
-msg_gros_mots = findWords(export.messages, gros_mots)
26
+msg_gros_mots = find_words(export.messages, gros_mots)
27
 
27
 
28
 msg_gros_mots_grp = {}
28
 msg_gros_mots_grp = {}
29
 
29
 
35
     print(name)
35
     print(name)
36
 
36
 
37
     for msg in msg_gros_mots_grp[name]:
37
     for msg in msg_gros_mots_grp[name]:
38
-        time = datetime.fromtimestamp(msg.timestamp_ms / 1000)
39
-        time_str = time.strftime("%d/%m/%Y %H:%M:%S")
38
+        time_str = msg.date_time.strftime("%d/%m/%Y %H:%M:%S")
40
         print(f"\t{time_str} : {msg.content}")
39
         print(f"\t{time_str} : {msg.content}")

+ 5
- 7
scripts/find_holes.py View File

4
 import million.parse.fb_exports as fb
4
 import million.parse.fb_exports as fb
5
 
5
 
6
 
6
 
7
-DATA_PATH = './data/'
7
+DATA_PATH = "./data/"
8
 
8
 
9
 export = fb.parse_dirfiles(DATA_PATH)
9
 export = fb.parse_dirfiles(DATA_PATH)
10
 
10
 
27
 
27
 
28
 
28
 
29
 # lets export a csv file of the holes and the people responsible for them
29
 # lets export a csv file of the holes and the people responsible for them
30
-with open('output/holes.csv', 'w') as f:
31
-    f.write('début,fin,taille,responsable1,responsable2,date1,date2\n')
30
+with open("output/holes.csv", "w") as f:
31
+    f.write("début,fin,taille,responsable1,responsable2,date1,date2\n")
32
     for hole in holes:
32
     for hole in holes:
33
-        date_start = datetime.utcfromtimestamp(
34
-            hole.start_message.timestamp_ms / 1000.0).strftime('%Y-%m-%d %H:%M:%S')
35
-        date_end = datetime.utcfromtimestamp(
36
-            hole.end_message.timestamp_ms / 1000.0).strftime('%Y-%m-%d %H:%M:%S')
33
+        date_start = hole.start_message.date_time.strftime("%Y-%m-%d %H:%M:%S")
34
+        date_end = hole.end_message.date_time.strftime("%Y-%m-%d %H:%M:%S")
37
         f.write(
35
         f.write(
38
             f"{hole.start()},"
36
             f"{hole.start()},"
39
             f"{hole.end()},"
37
             f"{hole.end()},"

Loading…
Cancel
Save