petites modifs de syntaxe ailleurs

--- a/DNS
+++ b/DNS
@@ -0,0 +1 @@
 
				
				+https://www.youtube.com/watch?v=mC9yute2k_Q 3000
			
--- a/million/analyze/count_participations.py
+++ b/million/analyze/count_participations.py
@@ -1,23 +1,30 @@
 
				
				-
			
 
				
				-from typing import List
			
 
				
				+from collections import Counter
			
 
				
				+from typing import Dict, List
			
 
				
				 from million.model.message import Message
			
 
				
				 from million.model.participant import Participant
			
 
				
				 
			
 
				
				 
			
 
				
				-def count_participations(messages: List[Message], participants: List[Participant]):
			
 
				
				+def count_participations(
			
 
				
				+        messages: List[Message],
			
 
				
				+        participants: List[Participant] | None = [],
			
 
				
				+        threshold: int | None = 0
			
 
				
				+        ) -> Dict[str, int]:
			
 
				
				     """
			
 
				
				-    Count the number of messages sent by each participant
			
 
				
				+    Count the number of messages sent by each participant,\n
			
 
				
				+    you can specify a threshold to return only people having reached that many counts
			
 
				
				     """
			
 
				
				-    participations = {}
			
 
				
				-    for participant in participants:
			
 
				
				-        participations[participant.name] = 0
			
 
				
				-
			
 
				
				-    for message in messages:
			
 
				
				-        if message.sender_name not in participations:
			
 
				
				-            participations[message.sender_name] = 1
			
 
				
				-        else:
			
 
				
				-            participations[message.sender_name] += 1
			
 
				
				+    participations = dict.fromkeys([p.name for p in participants], 0)
			
 
				
				+    participations.update(Counter([m.sender_name for m in messages]))
			
 
				
				+    
			
 
				
				+    return {k: v for k,v in sorted(participations.items(), key=lambda x: -x[1]) if v >= threshold}
			
 
				
				 
			
 
				
				-    ordered_participations = sorted(
			
 
				
				-        participations.items(), key=lambda x: x[1], reverse=True)
			
 
				
				-    return [{"name": v[0], "participations": v[1]} for v in ordered_participations]
			
 
				
				+def podium(
			
 
				
				+        messages: List[Message],
			
 
				
				+        top: int,
			
 
				
				+        participants: List[Participant] | None = [],
			
 
				
				+        ) -> Dict[str, int]:
			
 
				
				+    """
			
 
				
				+    Returns the N biggest counters
			
 
				
				+    """
			
 
				
				+    cp = count_participations(messages, participants)
			
 
				
				+    return {k: cp[k] for idx, k in enumerate(cp) if idx < top}
			
--- a/million/analyze/dns_solver.py
+++ b/million/analyze/dns_solver.py
@@ -0,0 +1,36 @@
 
				
				+from typing import Dict
			
 
				
				+
			
 
				
				+from pydantic import BaseModel, PrivateAttr
			
 
				
				+from million.model.message import Message
			
 
				
				+
			
 
				
				+_default_file_path = './DNS'
			
 
				
				+
			
 
				
				+class DNS_solver(BaseModel):
			
 
				
				+    file_path:str = _default_file_path
			
 
				
				+
			
 
				
				+    _bank: Dict[str, int] | None = PrivateAttr(None)
			
 
				
				+
			
 
				
				+    def solve(self, msg: Message) -> int:
			
 
				
				+        if self._bank == None: 
			
 
				
				+            self._bank = self.load(_default_file_path)
			
 
				
				+
			
 
				
				+        k = self._get_key(msg)
			
 
				
				+        if k and k in self._bank: return self._bank[k]
			
 
				
				+
			
 
				
				+        return None
			
 
				
				+
			
 
				
				+    def load(self, file_name: str) -> Dict[str, int]:
			
 
				
				+        result = {}
			
 
				
				+        with open(file_name, 'r') as f:
			
 
				
				+            for line in f:
			
 
				
				+                a,b = line.split()
			
 
				
				+                result[a] = int(b)
			
 
				
				+
			
 
				
				+        return result
			
 
				
				+
			
 
				
				+    def _get_key(self, msg: Message) -> str:
			
 
				
				+
			
 
				
				+        # look into msg attributes
			
 
				
				+        # find uri
			
 
				
				+        return (msg.share or None) and msg.share.link or \
			
 
				
				+            (msg.gifs or None) and msg.gifs[0].uri
			
--- a/million/analyze/find_holes.py
+++ b/million/analyze/find_holes.py
@@ -1,5 +1,3 @@
 
				
				-
			
 
				
				-
			
 
				
				 from typing import List
			
 
				
				 from million.model.message import Message
			
 
				
				 from million.model.sequence import Sequence
			
@@ -23,7 +21,6 @@ def compute_sequences(messages: List[Message], accepted_max: int = 1_000_000) ->
 
				
				 
			
 
				
				     return sequences            
			
 
				
				 
			
 
				
				-
			
 
				
				 def merge_duplicates(sequences: List[Sequence]) -> List[Sequence]:
			
 
				
				     """ 
			
 
				
				     Take sequences as an input and returns a list with every
			
@@ -43,8 +40,6 @@ def merge_duplicates(sequences: List[Sequence]) -> List[Sequence]:
 
				
				 
			
 
				
				     return result
			
 
				
				 
			
 
				
				-
			
 
				
				-
			
 
				
				 def invert_sequences(sequences: List[Sequence]) -> List[Sequence]:
			
 
				
				     """ 
			
 
				
				     Returns the sequences representing the spaces between
			
--- a/million/analyze/message_evaluation.py
+++ b/million/analyze/message_evaluation.py
@@ -1,38 +1,47 @@
 
				
				-from math import floor
			
 
				
				 import re
			
 
				
				 from typing import Dict
			
 
				
				 from million.model.message import Message
			
 
				
				+import million.analyze.dns_solver as dns
			
 
				
				 
			
 
				
				-memoization: Dict[Message, int] = {}
			
 
				
				 
			
 
				
				-# TODO WIP
			
 
				
				-# - DNS to resolve audio, gif, pictures with counts
			
 
				
				-def __compute__(msg: Message) -> int:
			
 
				
				-    value = __computeContent(msg)
			
 
				
				+_memoization: Dict[Message, int] = {}
			
 
				
				+_dns_solver: dns.DNS_solver = dns.DNS_solver()
			
 
				
				 
			
 
				
				-    memoization[msg] = value
			
 
				
				+def get(msg: Message) -> int:
			
 
				
				+    """
			
 
				
				+    Returns the estimated value counted in this message
			
 
				
				+    """
			
 
				
				+    return _memoization.get(msg, _compute(msg))
			
 
				
				+
			
 
				
				+def reset(msg: Message) -> None:
			
 
				
				+    """
			
 
				
				+    Drop memorized value of this Message
			
 
				
				+    """
			
 
				
				+    if msg in _memoization:
			
 
				
				+        _memoization.pop(msg)
			
 
				
				+
			
 
				
				+def reset() -> None:
			
 
				
				+    """
			
 
				
				+    Drop every memorized message value
			
 
				
				+    """
			
 
				
				+    _memoization.clear()
			
 
				
				+
			
 
				
				+
			
 
				
				+def _compute(msg: Message) -> int:
			
 
				
				+    value = _dns_solver.solve(msg) or \
			
 
				
				+        _computeContent(msg) or \
			
 
				
				+        None
			
 
				
				+
			
 
				
				+    _memoization[msg] = value
			
 
				
				     return value
			
 
				
				 
			
 
				
				-def __computeContent(msg: Message) -> int:
			
 
				
				+def _computeContent(msg: Message) -> int:
			
 
				
				     # TODO parse potential math expressions in content
			
 
				
				-    match = re.search(r"\d+", msg.content)
			
 
				
				+    match = msg.content and re.search(r"\d+", msg.content)
			
 
				
				     
			
 
				
				     if match:
			
 
				
				-        value = int(match[0])
			
 
				
				+        value = int(match.group())
			
 
				
				     else:
			
 
				
				         value = None
			
 
				
				     
			
 
				
				-    return value
			
 
				
				-
			
 
				
				-def reset(msg: Message) -> None:
			
 
				
				-    if msg in memoization:
			
 
				
				-        memoization.pop(msg)
			
 
				
				-
			
 
				
				-def reset() -> None:
			
 
				
				-    memoization.clear()
			
 
				
				-
			
 
				
				-def get(msg: Message) -> int:
			
 
				
				-    """
			
 
				
				-    Returns the estimated value counted in this message
			
 
				
				-    """
			
 
				
				-    return memoization.get(msg, __compute__(msg))
			
 
				
				+    return value
			
--- a/million/analyze/retain_counts.py
+++ b/million/analyze/retain_counts.py
@@ -1,15 +1,10 @@
 
				
				-
			
 
				
				-import re
			
 
				
				 from typing import List
			
 
				
				 from million.model.message import Message
			
 
				
				+import million.analyze.message_evaluation as msg_val
			
 
				
				 
			
 
				
				 
			
 
				
				 def retain_counts(messages : List[Message])-> List[Message]:
			
 
				
				     """
			
 
				
				-    Retain only the messages that have a content
			
 
				
				+    Retain only the messages that have a counted value
			
 
				
				     """
			
 
				
				-    return [
			
 
				
				-        m for m in messages 
			
 
				
				-        if m.content and
			
 
				
				-        re.search('(\d{2,}|^\d$)', m.content)
			
 
				
				-        ]
			
 
				
				+    return [msg for msg in messages if msg_val.get(msg)]
			
--- a/million/analyze/word_finder.py
+++ b/million/analyze/word_finder.py
@@ -4,7 +4,7 @@ from million.model.message import Message
 
				
				 
			
 
				
				 
			
 
				
				 def _wordFilter(msg: Message, words: List[str]) -> bool:
			
 
				
				-    rgx = r"(\b"+ r'\b|\b'.join(words) + r"\b)"
			
 
				
				+    rgx = r"(\b"+ r"\b|\b".join(words) + r"\b)"
			
 
				
				     return msg.content and re.search(rgx, msg.content, re.I)
			
 
				
				 
			
 
				
				 def findWords(messages: List[Message], words: List[str]) -> List[Message]:
			
--- a/million/model/fb_export.py
+++ b/million/model/fb_export.py
@@ -1,11 +1,10 @@
 
				
				 from __future__ import annotations
			
 
				
				-
			
 
				
				 from typing import Any, List, Set
			
 
				
				 from pydantic import BaseModel
			
 
				
				-
			
 
				
				 from million.model.message import Message
			
 
				
				 from million.model.participant import Participant
			
 
				
				 
			
 
				
				+
			
 
				
				 class Image(BaseModel):
			
 
				
				     creation_timestamp: int
			
 
				
				     uri: str
			
@@ -28,6 +27,7 @@ class FacebookExport(BaseModel):
 
				
				     image: Image
			
 
				
				     joinable_mode: JoinableMode
			
 
				
				 
			
 
				
				+
			
 
				
				     def merge(self, other: FacebookExport) -> None:
			
 
				
				         if self == other:
			
 
				
				             self.messages.extend(other.messages)
			
@@ -37,7 +37,8 @@ class FacebookExport(BaseModel):
 
				
				     def sort(self) -> None:
			
 
				
				         self.messages.sort(key = lambda m: m.timestamp_ms)
			
 
				
				 
			
 
				
				-    # NOTE Toughen equality conditions ?
			
 
				
				+
			
 
				
				     def __eq__(self, other: FacebookExport) -> bool:
			
 
				
				+        # NOTE Toughen equality conditions ?
			
 
				
				         return self.title == other.title \
			
 
				
				             and self.image == other.image
			
--- a/million/model/message.py
+++ b/million/model/message.py
@@ -1,7 +1,7 @@
 
				
				 from datetime import datetime
			
 
				
				-from math import floor
			
 
				
				 from typing import Any, List
			
 
				
				-from pydantic import BaseModel
			
 
				
				+from uuid import uuid4
			
 
				
				+from pydantic import BaseModel, PrivateAttr, computed_field
			
 
				
				 
			
 
				
				 class Reaction(BaseModel):
			
 
				
				     reaction: str
			
@@ -45,10 +45,18 @@ class Message(BaseModel):
 
				
				     is_unsent: bool | None = None
			
 
				
				     is_geoblocked_for_viewer: bool
			
 
				
				 
			
 
				
				+    _id: str = PrivateAttr(default_factory=lambda: str(uuid4()))
			
 
				
				+
			
 
				
				+
			
 
				
				     def __str__(self) -> str:
			
 
				
				         dt = datetime.fromtimestamp(self.timestamp_ms / 1000)
			
 
				
				         dt_str = dt.strftime("%d/%m/%Y, %H:%M:%S")
			
 
				
				         return f"{self.sender_name}({dt_str}) : {self.content}"
			
 
				
				 
			
 
				
				     def __hash__(self) -> int:
			
 
				
				-        return hash(self.sender_name + str(self.timestamp_ms))
			
 
				
				+        return hash(self.item_id)
			
 
				
				+    
			
 
				
				+    @computed_field
			
 
				
				+    @property
			
 
				
				+    def item_id(self) -> str:
			
 
				
				+        return self._id
			
--- a/million/model/sequence.py
+++ b/million/model/sequence.py
@@ -1,8 +1,5 @@
 
				
				 from __future__ import annotations
			
 
				
				-
			
 
				
				-from pydantic import BaseModel
			
 
				
				-import pydantic
			
 
				
				-
			
 
				
				+from pydantic import validator, BaseModel
			
 
				
				 from million.model.message import Message
			
 
				
				 import million.analyze.message_evaluation as msg_val
			
 
				
				 
			
@@ -11,7 +8,7 @@ class Sequence(BaseModel):
 
				
				     start_message: Message
			
 
				
				     end_message: Message | None = None
			
 
				
				 
			
 
				
				-    @pydantic.validator('end_message', pre=True, always=True)
			
 
				
				+    @validator('end_message', pre=True, always=True)
			
 
				
				     def default_end_message(cls, v, *, values):
			
 
				
				         return v or values['start_message'] 
			
 
				
				 
			
--- a/million/parse/fb_exports.py
+++ b/million/parse/fb_exports.py
@@ -48,6 +48,7 @@ def parse_dirfiles(file_dir: str) -> FacebookExport:
 
				
				     result.sort()
			
 
				
				     return result
			
 
				
				 
			
 
				
				+
			
 
				
				 def __read_broken_fb_json(binary_data):
			
 
				
				     # https://stackoverflow.com/questions/50008296/facebook-json-badly-encoded
			
 
				
				     repaired = re.sub(
			
--- a/scripts/find_gromots.py
+++ b/scripts/find_gromots.py
@@ -1,14 +1,9 @@
 
				
				 from datetime import datetime
			
 
				
				 from million.analyze.word_finder import findWords
			
 
				
				-from million.parse.fb_exports import FacebookExportParser
			
 
				
				+import million.parse.fb_exports as fb
			
 
				
				 
			
 
				
				 
			
 
				
				 DATA_PATH = './data/'
			
 
				
				-
			
 
				
				-parser = FacebookExportParser()
			
 
				
				-
			
 
				
				-export = parser.parse(DATA_PATH)
			
 
				
				-
			
 
				
				 gros_mots = [
			
 
				
				     '.*merde.*',
			
 
				
				     'sexe',
			
@@ -27,6 +22,7 @@ gros_mots = [
 
				
				     'bais.*'
			
 
				
				     ]
			
 
				
				 
			
 
				
				+export = fb.parse_dirfiles(DATA_PATH)
			
 
				
				 msg_gros_mots = findWords(export.messages, gros_mots)
			
 
				
				 
			
 
				
				 msg_gros_mots_grp = {}
			
--- a/scripts/find_holes.py
+++ b/scripts/find_holes.py
@@ -1,5 +1,5 @@
 
				
				 from datetime import datetime
			
 
				
				-from million.analyze.find_holes import compute_sequences, find_holes
			
 
				
				+import million.analyze.find_holes as fh
			
 
				
				 from million.analyze.retain_counts import retain_counts
			
 
				
				 import million.parse.fb_exports as fb
			
 
				
				 
			
@@ -10,13 +10,15 @@ export = fb.parse_dirfiles(DATA_PATH)
 
				
				 
			
 
				
				 filtered = retain_counts(export.messages)
			
 
				
				 
			
 
				
				-sequences = compute_sequences(filtered)
			
 
				
				+sequences = fh.compute_sequences(filtered)
			
 
				
				 
			
 
				
				 actual_counted = sum([s.length() for s in sequences])
			
 
				
				 
			
 
				
				 print(f"Actual counted: {actual_counted}")
			
 
				
				 
			
 
				
				-holes = find_holes(filtered)
			
 
				
				+merged = fh.merge_duplicates(sequences)
			
 
				
				+merged = [s for s in merged if s.length() > 1]
			
 
				
				+holes = fh.find_holes(filtered)
			
 
				
				 
			
 
				
				 print(len(holes))
			
 
				
				 
			
--- a/scripts/read_top.py
+++ b/scripts/read_top.py
@@ -1,4 +1,3 @@
 
				
				-from million.view.bar_chart import plot as bar_chart
			
 
				
				 from million.analyze.count_participations import count_participations
			
 
				
				 from million.analyze.retain_counts import retain_counts
			
 
				
				 import million.parse.fb_exports as fb
			
@@ -7,15 +6,11 @@ import million.parse.fb_exports as fb
 
				
				 DATA_PATH = './data/'
			
 
				
				 
			
 
				
				 export = fb.parse_dirfiles(DATA_PATH)
			
 
				
				-
			
 
				
				 filtered = retain_counts(export.messages)
			
 
				
				 
			
 
				
				 print(len(filtered))
			
 
				
				 
			
 
				
				-counted_participations = count_participations(filtered, export.participants)
			
 
				
				-
			
 
				
				-kept_participations = [
			
 
				
				-    p for p in counted_participations if p['participations'] > 100]
			
 
				
				+participations = count_participations(filtered, export.participants, 100)
			
 
				
				 
			
 
				
				-print("\n".join(
			
 
				
				-    [f"{p['name']}: {p['participations']}" for p in kept_participations]))
			
 
				
				+for name, count in participations.items():
			
 
				
				+    print(f"{name}: {count}")
作成者	SHA1	メッセージ	日付
Figg	183acd4e97	DNS solver en place, envoie tes photos elias ptn	6ヶ月前
Figg	1c6f0a2c9d	Refacto dans la partie analyze petites modifs de syntaxe ailleurs	6ヶ月前
Figg	27a7516a57	message implémente un identifiant généré automatiquement	6ヶ月前
		`@@ -0,0 +1 @@`
	1	`+https://www.youtube.com/watch?v=mC9yute2k_Q 3000`