Przeglądaj źródła

Merge branch 'master' of https://git.eseb.ovh/DemiSel/Messenger_1Million_Stats into DEV-Mael

pull/5/head
Figg 9 miesięcy temu
rodzic
commit
90d098391b
10 zmienionych plików z 212 dodań i 1 usunięć
  1. 20
    0
      .drone.yml
  2. 11
    0
      Dockerfile
  3. 11
    0
      million/analyze/word_finder.py
  4. 85
    1
      poetry.lock
  5. 3
    0
      pyproject.toml
  6. 2
    0
      pytest.ini
  7. 44
    0
      scripts/find_gromots.py
  8. 9
    0
      test/TestCase.py
  9. 0
    0
      test/__init__.py
  10. 27
    0
      test/model/message_test.py

+ 20
- 0
.drone.yml Wyświetl plik

@@ -0,0 +1,20 @@
1
+kind: pipeline
2
+type: docker
3
+name: run_tests
4
+
5
+volumes:
6
+- name: docker_sock
7
+  host:
8
+    path: /var/run/docker.sock
9
+
10
+steps:
11
+- name: build and test in docker image
12
+  image: docker
13
+  volumes:
14
+  - name: docker_sock
15
+    path: /var/run/docker.sock
16
+  commands:
17
+  - docker build --build-arg STAGE=test -t mytestimage .
18
+  - docker run mytestimage python -m pytest test
19
+  when:
20
+    event: [push]

+ 11
- 0
Dockerfile Wyświetl plik

@@ -0,0 +1,11 @@
1
+FROM python:3.11-buster
2
+
3
+
4
+RUN pip install poetry
5
+
6
+COPY pyproject.toml poetry.lock ./
7
+COPY ./million ./million
8
+COPY ./test ./test
9
+
10
+RUN poetry config virtualenvs.create false \
11
+    && poetry install --no-interaction --no-ansi

+ 11
- 0
million/analyze/word_finder.py Wyświetl plik

@@ -0,0 +1,11 @@
1
+import re
2
+from typing import List
3
+from million.model.message import Message
4
+
5
+
6
+def _wordFilter(msg: Message, words: List[str]) -> bool:
7
+    rgx = r"(\b"+ r'\b|\b'.join(words) + r"\b)"
8
+    return msg.content and re.search(rgx, msg.content, re.I)
9
+
10
+def findWords(messages: List[Message], words: List[str]) -> List[Message]:
11
+    return filter(lambda m: _wordFilter(m, words), messages)

+ 85
- 1
poetry.lock Wyświetl plik

@@ -12,6 +12,17 @@ files = [
12 12
 ]
13 13
 
14 14
 [[package]]
15
+name = "colorama"
16
+version = "0.4.6"
17
+description = "Cross-platform colored terminal text."
18
+optional = false
19
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
20
+files = [
21
+    {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
22
+    {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
23
+]
24
+
25
+[[package]]
15 26
 name = "contourpy"
16 27
 version = "1.2.0"
17 28
 description = "Python library for calculating contours of 2D quadrilateral grids"
@@ -90,6 +101,20 @@ docs = ["ipython", "matplotlib", "numpydoc", "sphinx"]
90 101
 tests = ["pytest", "pytest-cov", "pytest-xdist"]
91 102
 
92 103
 [[package]]
104
+name = "exceptiongroup"
105
+version = "1.2.0"
106
+description = "Backport of PEP 654 (exception groups)"
107
+optional = false
108
+python-versions = ">=3.7"
109
+files = [
110
+    {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"},
111
+    {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"},
112
+]
113
+
114
+[package.extras]
115
+test = ["pytest (>=6)"]
116
+
117
+[[package]]
93 118
 name = "fonttools"
94 119
 version = "4.49.0"
95 120
 description = "Tools to manipulate font files"
@@ -155,6 +180,17 @@ unicode = ["unicodedata2 (>=15.1.0)"]
155 180
 woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"]
156 181
 
157 182
 [[package]]
183
+name = "iniconfig"
184
+version = "2.0.0"
185
+description = "brain-dead simple config-ini parsing"
186
+optional = false
187
+python-versions = ">=3.7"
188
+files = [
189
+    {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
190
+    {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
191
+]
192
+
193
+[[package]]
158 194
 name = "kiwisolver"
159 195
 version = "1.4.5"
160 196
 description = "A fast implementation of the Cassowary constraint solver"
@@ -529,6 +565,21 @@ typing = ["typing-extensions"]
529 565
 xmp = ["defusedxml"]
530 566
 
531 567
 [[package]]
568
+name = "pluggy"
569
+version = "1.4.0"
570
+description = "plugin and hook calling mechanisms for python"
571
+optional = false
572
+python-versions = ">=3.8"
573
+files = [
574
+    {file = "pluggy-1.4.0-py3-none-any.whl", hash = "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981"},
575
+    {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"},
576
+]
577
+
578
+[package.extras]
579
+dev = ["pre-commit", "tox"]
580
+testing = ["pytest", "pytest-benchmark"]
581
+
582
+[[package]]
532 583
 name = "pydantic"
533 584
 version = "2.6.1"
534 585
 description = "Data validation using Python type hints"
@@ -653,6 +704,28 @@ files = [
653 704
 diagrams = ["jinja2", "railroad-diagrams"]
654 705
 
655 706
 [[package]]
707
+name = "pytest"
708
+version = "8.0.2"
709
+description = "pytest: simple powerful testing with Python"
710
+optional = false
711
+python-versions = ">=3.8"
712
+files = [
713
+    {file = "pytest-8.0.2-py3-none-any.whl", hash = "sha256:edfaaef32ce5172d5466b5127b42e0d6d35ebbe4453f0e3505d96afd93f6b096"},
714
+    {file = "pytest-8.0.2.tar.gz", hash = "sha256:d4051d623a2e0b7e51960ba963193b09ce6daeb9759a451844a21e4ddedfc1bd"},
715
+]
716
+
717
+[package.dependencies]
718
+colorama = {version = "*", markers = "sys_platform == \"win32\""}
719
+exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
720
+iniconfig = "*"
721
+packaging = "*"
722
+pluggy = ">=1.3.0,<2.0"
723
+tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
724
+
725
+[package.extras]
726
+testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
727
+
728
+[[package]]
656 729
 name = "python-dateutil"
657 730
 version = "2.8.2"
658 731
 description = "Extensions to the standard Python datetime module"
@@ -689,6 +762,17 @@ files = [
689 762
 ]
690 763
 
691 764
 [[package]]
765
+name = "tomli"
766
+version = "2.0.1"
767
+description = "A lil' TOML parser"
768
+optional = false
769
+python-versions = ">=3.7"
770
+files = [
771
+    {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
772
+    {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
773
+]
774
+
775
+[[package]]
692 776
 name = "typing-extensions"
693 777
 version = "4.9.0"
694 778
 description = "Backported and Experimental Type Hints for Python 3.8+"
@@ -713,4 +797,4 @@ files = [
713 797
 [metadata]
714 798
 lock-version = "2.0"
715 799
 python-versions = "^3.10"
716
-content-hash = "2f3cf452fb668f0f7dc134643e52cee57a754cb616cd4525addbb07d985a24d8"
800
+content-hash = "6cc1be4150b37f0fea2143e00fd6378e4db08d286a551fb6d6981593638cba5a"

+ 3
- 0
pyproject.toml Wyświetl plik

@@ -13,6 +13,9 @@ pydantic = "^2.6.1"
13 13
 pandas = "^2.2.0"
14 14
 
15 15
 
16
+[tool.poetry.group.test.dependencies]
17
+pytest = "^8.0.2"
18
+
16 19
 [build-system]
17 20
 requires = ["poetry-core"]
18 21
 build-backend = "poetry.core.masonry.api"

+ 2
- 0
pytest.ini Wyświetl plik

@@ -0,0 +1,2 @@
1
+[pytest]
2
+python_files = *_test.py

+ 44
- 0
scripts/find_gromots.py Wyświetl plik

@@ -0,0 +1,44 @@
1
+from datetime import datetime
2
+from million.analyze.word_finder import findWords
3
+from million.parse.fb_exports import FacebookExportParser
4
+
5
+
6
+DATA_PATH = './data/'
7
+
8
+parser = FacebookExportParser()
9
+
10
+export = parser.parse(DATA_PATH)
11
+
12
+gros_mots = [
13
+    '.*merde.*',
14
+    'sexe',
15
+    'pute',
16
+    'pé?dé?',
17
+    'putain',
18
+    'bite',
19
+    'encul.*',
20
+    'cul',
21
+    'nichon',
22
+    'gueule',
23
+    'con(ne)?',
24
+    'chatte',
25
+    'niqu.*',
26
+    'chi(é|e).*',
27
+    'bais.*'
28
+    ]
29
+
30
+msg_gros_mots = findWords(export.messages, gros_mots)
31
+
32
+msg_gros_mots_grp = {}
33
+
34
+for msg in msg_gros_mots:
35
+    if msg.sender_name not in msg_gros_mots_grp: msg_gros_mots_grp[msg.sender_name] = []
36
+    msg_gros_mots_grp[msg.sender_name].append(msg)
37
+
38
+for name in sorted(msg_gros_mots_grp, key = lambda k: len(msg_gros_mots_grp[k])):
39
+    print(name)
40
+
41
+    for msg in msg_gros_mots_grp[name]:
42
+        time = datetime.fromtimestamp(msg.timestamp_ms / 1000)
43
+        time_str = time.strftime("%d/%m/%Y %H:%M:%S")
44
+        print(f"\t{time_str} : {msg.content}")

+ 9
- 0
test/TestCase.py Wyświetl plik

@@ -0,0 +1,9 @@
1
+
2
+import unittest
3
+
4
+from million.model.message import Message
5
+
6
+
7
+class TestCase(unittest.TestCase):
8
+    def _message_with_text(self, text: str):
9
+        return Message(content=text, sender_name="test", timestamp_ms=0)

+ 0
- 0
test/__init__.py Wyświetl plik


+ 27
- 0
test/model/message_test.py Wyświetl plik

@@ -0,0 +1,27 @@
1
+
2
+
3
+from million.model.message import Message
4
+from test.TestCase import TestCase
5
+
6
+
7
+class MessageTest(TestCase):
8
+
9
+    def test_message_nominal(self, overrides=None, exclude=None):
10
+        message = self._message_with_text("1")
11
+
12
+        assert 1 == message.get_counted_value()
13
+
14
+    def test_message_with_text(self, overrides=None, exclude=None):
15
+        message = self._message_with_text("1 text")
16
+
17
+        assert 1 == message.get_counted_value()
18
+
19
+    def test_message_floored_dot(self, overrides=None, exclude=None):
20
+        message = self._message_with_text("1.5")
21
+
22
+        assert 1 == message.get_counted_value()
23
+
24
+    def test_message_floored_comma(self, overrides=None, exclude=None):
25
+        message = self._message_with_text("1,5")
26
+
27
+        assert 1 == message.get_counted_value()

Ładowanie…
Anuluj
Zapisz