Browse Source

Auto-parsing timestamp_ms in JSON Message

from int to datetime object
feature/message_filters
Figg 9 months ago
parent
commit
9a171af035
4 changed files with 28 additions and 19 deletions
  1. 1
    1
      million/model/fb_export.py
  2. 19
    7
      million/model/message.py
  3. 3
    4
      scripts/find_gromots.py
  4. 5
    7
      scripts/find_holes.py

+ 1
- 1
million/model/fb_export.py View File

@@ -35,7 +35,7 @@ class FacebookExport(BaseModel):
35 35
             self.magic_words.update(other.magic_words)
36 36
 
37 37
     def sort(self) -> None:
38
-        self.messages.sort(key = lambda m: m.timestamp_ms)
38
+        self.messages.sort(key = lambda m: m.date_time)
39 39
 
40 40
 
41 41
     def __eq__(self, other: FacebookExport) -> bool:

+ 19
- 7
million/model/message.py View File

@@ -1,38 +1,46 @@
1 1
 from datetime import datetime
2 2
 from typing import Any, List
3 3
 from uuid import uuid4
4
-from pydantic import BaseModel, PrivateAttr, computed_field
4
+from pydantic import BaseModel, Field, PrivateAttr, computed_field, validator
5
+
5 6
 
6 7
 class Reaction(BaseModel):
7 8
     reaction: str
8 9
     actor: str
9 10
 
11
+
10 12
 class AudioFile(BaseModel):
11 13
     uri: str
12 14
     creation_timestamp: int
13 15
 
16
+
14 17
 class Video(BaseModel):
15 18
     uri: str
16 19
     creation_timestamp: int
17 20
 
21
+
18 22
 class Photo(BaseModel):
19 23
     uri: str
20 24
     creation_timestamp: int
21 25
 
26
+
22 27
 class Gif(BaseModel):
23 28
     uri: str
24 29
 
30
+
25 31
 class Share(BaseModel):
26 32
     link: str
27 33
     share_text: str
28 34
 
35
+
29 36
 class Sticker(BaseModel):
30 37
     uri: str
31 38
     ai_stickers: List[Any]
32 39
 
40
+
33 41
 class Message(BaseModel):
34 42
     sender_name: str
35
-    timestamp_ms: int
43
+    date_time: datetime = Field(alias="timestamp_ms")
36 44
     content: str | None = None
37 45
     sticker: Sticker | None = None
38 46
     share: Share | None = None
@@ -47,16 +55,20 @@ class Message(BaseModel):
47 55
 
48 56
     _id: str = PrivateAttr(default_factory=lambda: str(uuid4()))
49 57
 
50
-
51 58
     def __str__(self) -> str:
52
-        dt = datetime.fromtimestamp(self.timestamp_ms / 1000)
53
-        dt_str = dt.strftime("%d/%m/%Y, %H:%M:%S")
59
+        dt_str = self.date_time.strftime("%d/%m/%Y, %H:%M:%S")
54 60
         return f"{self.sender_name}({dt_str}) : {self.content}"
55 61
 
56 62
     def __hash__(self) -> int:
57 63
         return hash(self.item_id)
58
-    
64
+
59 65
     @computed_field
60 66
     @property
61 67
     def item_id(self) -> str:
62
-        return self._id
68
+        return self._id
69
+
70
+    @validator("date_time", pre=True, always=True)
71
+    def parse_timestamp(cls, v):
72
+        if isinstance(v, int):
73
+            return datetime.fromtimestamp(v / 1000)
74
+        return v

+ 3
- 4
scripts/find_gromots.py View File

@@ -1,5 +1,5 @@
1 1
 from datetime import datetime
2
-from million.analyze.word_finder import findWords
2
+from million.analyze.word_finder import find_words
3 3
 import million.parse.fb_exports as fb
4 4
 
5 5
 
@@ -23,7 +23,7 @@ gros_mots = [
23 23
     ]
24 24
 
25 25
 export = fb.parse_dirfiles(DATA_PATH)
26
-msg_gros_mots = findWords(export.messages, gros_mots)
26
+msg_gros_mots = find_words(export.messages, gros_mots)
27 27
 
28 28
 msg_gros_mots_grp = {}
29 29
 
@@ -35,6 +35,5 @@ for name in sorted(msg_gros_mots_grp, key = lambda k: len(msg_gros_mots_grp[k]))
35 35
     print(name)
36 36
 
37 37
     for msg in msg_gros_mots_grp[name]:
38
-        time = datetime.fromtimestamp(msg.timestamp_ms / 1000)
39
-        time_str = time.strftime("%d/%m/%Y %H:%M:%S")
38
+        time_str = msg.date_time.strftime("%d/%m/%Y %H:%M:%S")
40 39
         print(f"\t{time_str} : {msg.content}")

+ 5
- 7
scripts/find_holes.py View File

@@ -4,7 +4,7 @@ from million.analyze.retain_counts import retain_counts
4 4
 import million.parse.fb_exports as fb
5 5
 
6 6
 
7
-DATA_PATH = './data/'
7
+DATA_PATH = "./data/"
8 8
 
9 9
 export = fb.parse_dirfiles(DATA_PATH)
10 10
 
@@ -27,13 +27,11 @@ for hole in holes:
27 27
 
28 28
 
29 29
 # lets export a csv file of the holes and the people responsible for them
30
-with open('output/holes.csv', 'w') as f:
31
-    f.write('début,fin,taille,responsable1,responsable2,date1,date2\n')
30
+with open("output/holes.csv", "w") as f:
31
+    f.write("début,fin,taille,responsable1,responsable2,date1,date2\n")
32 32
     for hole in holes:
33
-        date_start = datetime.utcfromtimestamp(
34
-            hole.start_message.timestamp_ms / 1000.0).strftime('%Y-%m-%d %H:%M:%S')
35
-        date_end = datetime.utcfromtimestamp(
36
-            hole.end_message.timestamp_ms / 1000.0).strftime('%Y-%m-%d %H:%M:%S')
33
+        date_start = hole.start_message.date_time.strftime("%Y-%m-%d %H:%M:%S")
34
+        date_end = hole.end_message.date_time.strftime("%Y-%m-%d %H:%M:%S")
37 35
         f.write(
38 36
             f"{hole.start()},"
39 37
             f"{hole.end()},"

Loading…
Cancel
Save