Bladeren bron

update data & invites graph script

master
Elias Sebbar 2 maanden geleden
bovenliggende
commit
2d97828ca0
14 gewijzigde bestanden met toevoegingen van 305107 en 252717 verwijderingen
  1. 30205
    30474
      data/message_1.json
  2. 3320
    0
      data/message_10.json
  3. 30461
    30554
      data/message_2.json
  4. 30782
    30734
      data/message_3.json
  5. 30764
    30699
      data/message_4.json
  6. 30602
    30855
      data/message_5.json
  7. 31026
    31002
      data/message_6.json
  8. 31240
    31426
      data/message_7.json
  9. 31196
    31086
      data/message_8.json
  10. 55315
    5883
      data/message_9.json
  11. 117
    2
      poetry.lock
  12. 2
    0
      pyproject.toml
  13. 71
    0
      scripts/graph_invites.py
  14. 6
    2
      scripts/read_top.py

+ 30205
- 30474
data/message_1.json
Diff onderdrukt omdat het te groot bestand
Bestand weergeven


+ 3320
- 0
data/message_10.json
Diff onderdrukt omdat het te groot bestand
Bestand weergeven


+ 30461
- 30554
data/message_2.json
Diff onderdrukt omdat het te groot bestand
Bestand weergeven


+ 30782
- 30734
data/message_3.json
Diff onderdrukt omdat het te groot bestand
Bestand weergeven


+ 30764
- 30699
data/message_4.json
Diff onderdrukt omdat het te groot bestand
Bestand weergeven


+ 30602
- 30855
data/message_5.json
Diff onderdrukt omdat het te groot bestand
Bestand weergeven


+ 31026
- 31002
data/message_6.json
Diff onderdrukt omdat het te groot bestand
Bestand weergeven


+ 31240
- 31426
data/message_7.json
Diff onderdrukt omdat het te groot bestand
Bestand weergeven


+ 31196
- 31086
data/message_8.json
Diff onderdrukt omdat het te groot bestand
Bestand weergeven


+ 55315
- 5883
data/message_9.json
Diff onderdrukt omdat het te groot bestand
Bestand weergeven


+ 117
- 2
poetry.lock Bestand weergeven

@@ -1,4 +1,4 @@
1
-# This file is automatically @generated by Poetry 1.5.0 and should not be changed by hand.
1
+# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
2 2
 
3 3
 [[package]]
4 4
 name = "annotated-types"
@@ -180,6 +180,66 @@ unicode = ["unicodedata2 (>=15.1.0)"]
180 180
 woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"]
181 181
 
182 182
 [[package]]
183
+name = "igraph"
184
+version = "0.11.6"
185
+description = "High performance graph data structures and algorithms"
186
+optional = false
187
+python-versions = ">=3.8"
188
+files = [
189
+    {file = "igraph-0.11.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3f8b837181e8e87676be3873ce87cc92cc234efd58a2da2f6b4e050db150fcf4"},
190
+    {file = "igraph-0.11.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:245c4b7d7657849eff80416f5df4525c8fc44c74a981ee4d44f0ef2612c3bada"},
191
+    {file = "igraph-0.11.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdb7be3d165073c0136295c0808e9edc57ba096cdb26e94086abb04561f7a292"},
192
+    {file = "igraph-0.11.6-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58974e20df2986a1ae52a16e51ecb387cc0cbeb41c5c0ddff4d373a1bbf1d9c5"},
193
+    {file = "igraph-0.11.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bef14de5e8ab70724a43808b1ed14aaa6fe1002f87e592289027a3827a8f44a"},
194
+    {file = "igraph-0.11.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:86c1e98de2e32d074df8510bf18abfa1f4c5fda4cb28a009985a5d746b0c0125"},
195
+    {file = "igraph-0.11.6-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:ebc5b3d702158abeb2e4d2414374586a2b932e1a07e48352b470600e1733d528"},
196
+    {file = "igraph-0.11.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0efe6d0fb22d3987a800eb3857ed04df9eb4c5dddd0998be05232cb646f1c337"},
197
+    {file = "igraph-0.11.6-cp38-cp38-win32.whl", hash = "sha256:f4e68b27497b1c8ada2fb2bc35ef3fa7b0d72e84306b3d648d3de240fc618c32"},
198
+    {file = "igraph-0.11.6-cp38-cp38-win_amd64.whl", hash = "sha256:5665b33dfbfca5f54ce9b4fea6b97903bd0e99fb1b02acf5e57e600bdfa5a355"},
199
+    {file = "igraph-0.11.6-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:8aabef03d787b519d1075dfc0da4a1109fb113b941334883e3e7947ac30a459e"},
200
+    {file = "igraph-0.11.6-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1f2cc4a518d99cdf6cae514f85e93e56852bc8c325b3abb96037d1d690b5975f"},
201
+    {file = "igraph-0.11.6-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1e859238be52ab8ccc614d18f9362942bc88ce543afc12548f81ae99b10801d"},
202
+    {file = "igraph-0.11.6-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d61fbe5e85eb4ae9efe08c461f9bdeedb02a2b5739fbc223d324a71f40a28be2"},
203
+    {file = "igraph-0.11.6-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6620ba39df29fd42151becf82309b54e57148233c9c3ef890eed62e25eed8a5"},
204
+    {file = "igraph-0.11.6-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:59666589bb3d07f310cda2c5106a8adeeb77c2ef27fecf1c6438b6091f4ca69d"},
205
+    {file = "igraph-0.11.6-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:8750b6d6caebf199cf7dc41c931f58e330153779707391e30f0a29f02666fb6e"},
206
+    {file = "igraph-0.11.6-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:967d6f2c30fe94317da15e459374d0fb8ca3e56020412f201ecd07dd5b5352f2"},
207
+    {file = "igraph-0.11.6-cp39-abi3-win32.whl", hash = "sha256:9744f95a67319eb6cb487ceabf30f5d7940de34bada51f0ba63adbd23e0f94ad"},
208
+    {file = "igraph-0.11.6-cp39-abi3-win_amd64.whl", hash = "sha256:b80e69eb11faa9c57330a9ffebdde5808966efe1c1f638d4d4827ea04df7aca8"},
209
+    {file = "igraph-0.11.6-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0329c16092e2ea7930d5f8368666ce7cb704900cc0ea04e4afe9ea1dd46e44af"},
210
+    {file = "igraph-0.11.6-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:21752313f449bd8688e5688e95ea7231cea5e9199c7162535029be0d9af848ac"},
211
+    {file = "igraph-0.11.6-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea25e136c6c4161f53ff58868b23ff6c845193050ab0e502236d68e5d4174e32"},
212
+    {file = "igraph-0.11.6-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ac84433a03aef15e4b810010b08882b09854a3669450ccf31e392dbe295d2a66"},
213
+    {file = "igraph-0.11.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac697a44e3573169fa2b28c9c37dcf9cf01e0f558b845dd7123860d4c7c8fb89"},
214
+    {file = "igraph-0.11.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:bdeae8bf35316eb1fb27bf667dcf5ecf5fcfb0b8f51831bc1b00c39c09c2d73b"},
215
+    {file = "igraph-0.11.6-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ad7e4aa442935de72554b96733bf6d7f09eac5cee97988a2562bdd3ca173cfa3"},
216
+    {file = "igraph-0.11.6-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:8d2818780358a686178866d01568b9df1f29678581734ad7a78882bab54df004"},
217
+    {file = "igraph-0.11.6-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2352276a20d979f1dea360af4202bb9f0c9a7d2c77f51815c0e625165e82013d"},
218
+    {file = "igraph-0.11.6-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:687fdab543b507d622fa3043f4227e5b26dc61dcf8ff8c0919fccddcc655f8b8"},
219
+    {file = "igraph-0.11.6-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57f7f8214cd48c9a4d97f7346a4152ba2d4ac95fb5ee0df4ecf224fce4ba3d14"},
220
+    {file = "igraph-0.11.6-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:2b9cc69ede53f76ffae03b066609aa90184dd68ef15da8c104a97cebb9210838"},
221
+    {file = "igraph-0.11.6-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:591e1e447c3f0092daf7613a3eaedab83f9a0b0adbaf7702724c5117ded038a5"},
222
+    {file = "igraph-0.11.6-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:ca558eb331bc687bc33e5cd23717e22676e9412f8cda3a31d30c996a0487610d"},
223
+    {file = "igraph-0.11.6-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf43c30e08debb087c9e3da69aa5cf1b6732968da34d55a614e3421b9a452146"},
224
+    {file = "igraph-0.11.6-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d38e8d7db72b187d9d2211d0d06b3271fa9f32b04d49d789e2859b5480db0d0"},
225
+    {file = "igraph-0.11.6-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a318b059051ff78144a1c3cb880f4d933c812bcdb3d833a49cd7168d0427672"},
226
+    {file = "igraph-0.11.6-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2c54027add809b3c5b6685b8deca4ea4763fd000b9ea45c7ee46b7c9d61ff15e"},
227
+    {file = "igraph-0.11.6.tar.gz", hash = "sha256:837f233256c3319f2a35a6a80d94eafe47b43791ef4c6f9e9871061341ac8e28"},
228
+]
229
+
230
+[package.dependencies]
231
+texttable = ">=1.6.2"
232
+
233
+[package.extras]
234
+cairo = ["cairocffi (>=1.2.0)"]
235
+doc = ["Sphinx (>=7.0.0)", "pydoctor (>=23.4.0)", "sphinx-gallery (>=0.14.0)", "sphinx-rtd-theme (>=1.3.0)"]
236
+matplotlib = ["matplotlib (>=3.6.0)"]
237
+plotly = ["plotly (>=5.3.0)"]
238
+plotting = ["cairocffi (>=1.2.0)"]
239
+test = ["Pillow (>=9)", "cairocffi (>=1.2.0)", "matplotlib (>=3.6.0)", "networkx (>=2.5)", "numpy (>=1.19.0)", "pandas (>=1.1.0)", "plotly (>=5.3.0)", "pytest (>=7.0.1)", "pytest-timeout (>=2.1.0)", "scipy (>=1.5.0)"]
240
+test-musl = ["cairocffi (>=1.2.0)", "networkx (>=2.5)", "pytest (>=7.0.1)", "pytest-timeout (>=2.1.0)"]
241
+
242
+[[package]]
183 243
 name = "iniconfig"
184 244
 version = "2.0.0"
185 245
 description = "brain-dead simple config-ini parsing"
@@ -580,6 +640,30 @@ dev = ["pre-commit", "tox"]
580 640
 testing = ["pytest", "pytest-benchmark"]
581 641
 
582 642
 [[package]]
643
+name = "pycairo"
644
+version = "1.26.1"
645
+description = "Python interface for cairo"
646
+optional = false
647
+python-versions = ">=3.8"
648
+files = [
649
+    {file = "pycairo-1.26.1-cp310-cp310-win32.whl", hash = "sha256:b93b9e3072826a346f1f79cb1becc403d1ba4a3971cad61d144db0fe6dcb6be8"},
650
+    {file = "pycairo-1.26.1-cp310-cp310-win_amd64.whl", hash = "sha256:acfc76924ed668d8fea50f6cc6097b9a57ef6cd3dc3f2fa20814380d639a6dd2"},
651
+    {file = "pycairo-1.26.1-cp310-cp310-win_arm64.whl", hash = "sha256:067191315c3b4d09cad1ec57cdb8fc1d72e2574e89389c268a94f22d4fa98b5f"},
652
+    {file = "pycairo-1.26.1-cp311-cp311-win32.whl", hash = "sha256:56a29623aa7b4adbde5024c61ff001455b5a3def79e512742ea45ab36c3fe24b"},
653
+    {file = "pycairo-1.26.1-cp311-cp311-win_amd64.whl", hash = "sha256:8d2889e03a095de5da9e68a589b691a3ada09d60ef18b5fc1b1b99f2a7794297"},
654
+    {file = "pycairo-1.26.1-cp311-cp311-win_arm64.whl", hash = "sha256:7a307111de345304ed8eadd7f81ebd7fb1fc711224aa314a4e8e33af7dfa3d27"},
655
+    {file = "pycairo-1.26.1-cp312-cp312-win32.whl", hash = "sha256:5cc1808e9e30ccd0f4d84ba7700db5aab5673f8b6b901760369ebb88a0823436"},
656
+    {file = "pycairo-1.26.1-cp312-cp312-win_amd64.whl", hash = "sha256:36131a726f568b2dbc5e78ff50fbaa379e69db00614d46d66b1e4289caf9b1ce"},
657
+    {file = "pycairo-1.26.1-cp312-cp312-win_arm64.whl", hash = "sha256:5577b51543ea4c283c15f436d891e9eaf6fd43fe74882adb032fba2c271f3fe9"},
658
+    {file = "pycairo-1.26.1-cp38-cp38-win32.whl", hash = "sha256:27ec7b42c58af35dc11352881262dce4254378b0f11be0959d1c13edb4539d2c"},
659
+    {file = "pycairo-1.26.1-cp38-cp38-win_amd64.whl", hash = "sha256:27357994d277b3fd10a45e9ef58f80a4cb5e3291fe76c5edd58d2d06335eb8e7"},
660
+    {file = "pycairo-1.26.1-cp39-cp39-win32.whl", hash = "sha256:e68300d1c2196d1d34de3432885ae9ff78e10426fa16f765742a11c6f8fe0a71"},
661
+    {file = "pycairo-1.26.1-cp39-cp39-win_amd64.whl", hash = "sha256:ce049930e294c29b53c68dcaab3df97cc5de7eb1d3d8e8a9f5c77e7164cd6e85"},
662
+    {file = "pycairo-1.26.1-cp39-cp39-win_arm64.whl", hash = "sha256:22e1db531d4ed3167a98f0ea165bfa2a30df9d6eb22361c38158c031065999a4"},
663
+    {file = "pycairo-1.26.1.tar.gz", hash = "sha256:a11b999ce55b798dbf13516ab038e0ce8b6ec299b208d7c4e767a6f7e68e8430"},
664
+]
665
+
666
+[[package]]
583 667
 name = "pydantic"
584 668
 version = "2.6.1"
585 669
 description = "Data validation using Python type hints"
@@ -740,6 +824,26 @@ files = [
740 824
 six = ">=1.5"
741 825
 
742 826
 [[package]]
827
+name = "python-igraph"
828
+version = "0.11.6"
829
+description = "High performance graph data structures and algorithms (legacy package)"
830
+optional = false
831
+python-versions = ">=3.6"
832
+files = [
833
+    {file = "python_igraph-0.11.6-py3-none-any.whl", hash = "sha256:39b2157eb819cea2b9ec18975c443f0ac36c001ec7a89e145c56a8bdf28a6bf1"},
834
+    {file = "python_igraph-0.11.6.tar.gz", hash = "sha256:8cdda8bc36be9498614f4a7ccb1ebc0b83d8e24069abef8e932a979476e7930b"},
835
+]
836
+
837
+[package.dependencies]
838
+igraph = "0.11.6"
839
+
840
+[package.extras]
841
+cairo = ["cairocffi (>=1.2.0)"]
842
+matplotlib = ["matplotlib (>=3.3.0)"]
843
+plotly = ["plotly (>=5.3.0)"]
844
+plotting = ["cairocffi (>=1.2.0)"]
845
+
846
+[[package]]
743 847
 name = "pytz"
744 848
 version = "2024.1"
745 849
 description = "World timezone definitions, modern and historical"
@@ -762,6 +866,17 @@ files = [
762 866
 ]
763 867
 
764 868
 [[package]]
869
+name = "texttable"
870
+version = "1.7.0"
871
+description = "module to create simple ASCII tables"
872
+optional = false
873
+python-versions = "*"
874
+files = [
875
+    {file = "texttable-1.7.0-py2.py3-none-any.whl", hash = "sha256:72227d592c82b3d7f672731ae73e4d1f88cd8e2ef5b075a7a7f01a23a3743917"},
876
+    {file = "texttable-1.7.0.tar.gz", hash = "sha256:2d2068fb55115807d3ac77a4ca68fa48803e84ebb0ee2340f858107a36522638"},
877
+]
878
+
879
+[[package]]
765 880
 name = "tomli"
766 881
 version = "2.0.1"
767 882
 description = "A lil' TOML parser"
@@ -797,4 +912,4 @@ files = [
797 912
 [metadata]
798 913
 lock-version = "2.0"
799 914
 python-versions = "^3.10"
800
-content-hash = "6cc1be4150b37f0fea2143e00fd6378e4db08d286a551fb6d6981593638cba5a"
915
+content-hash = "cfd60af98dcef95ed9f592cd7c4d616a3763174bffb33a612bb537bbb89d928f"

+ 2
- 0
pyproject.toml Bestand weergeven

@@ -11,6 +11,8 @@ python = "^3.10"
11 11
 matplotlib = "^3.8.3"
12 12
 pydantic = "^2.6.1"
13 13
 pandas = "^2.2.0"
14
+python-igraph = "^0.11.6"
15
+pycairo = "^1.26.1"
14 16
 
15 17
 
16 18
 [tool.poetry.group.test.dependencies]

+ 71
- 0
scripts/graph_invites.py Bestand weergeven

@@ -0,0 +1,71 @@
1
+
2
+from million.model.message import Message
3
+import million.parse.fb_exports as fb
4
+import re
5
+import math
6
+import igraph as ig
7
+
8
+DATA_PATH = './data/'
9
+
10
+export = fb.parse_dirfiles(DATA_PATH)
11
+
12
+
13
+def extraire_nom(entree: Message):
14
+    if entree.content is None:
15
+        return None
16
+
17
+    regex = r"(.+) a(?:vez)? ajouté (.+)(?: et (.+))? au groupe."
18
+
19
+    match = re.match(regex, entree.content)
20
+    if match is not None:
21
+        return (
22
+            match.group(1) if match.group(1) != "Vous" else entree.sender_name, 
23
+            *tuple(match.group(2).split(" et "))
24
+        )
25
+    else:
26
+        return None
27
+
28
+
29
+invites = []
30
+for message in export.messages:
31
+    noms = extraire_nom(message)
32
+    if noms is None:
33
+        continue
34
+    
35
+    invites.append((message, noms))
36
+
37
+
38
+print(f"Total invites: {len(invites)}")
39
+
40
+edges = []
41
+for invite in invites:
42
+    for invitee in invite[1]:
43
+        edges.append((invite[0].sender_name, invitee))
44
+
45
+vertices_as_names = list(set([edge[0] for edge in edges] + [edge[1] for edge in edges]))
46
+
47
+# Create a graph
48
+nb_vertices = len(vertices_as_names)
49
+vertices_as_indexes = {name: i for i, name in enumerate(vertices_as_names)}
50
+edges_as_indexes = [(vertices_as_indexes[edge[0]], vertices_as_indexes[edge[1]]) for edge in edges]
51
+
52
+g = ig.Graph(nb_vertices, edges_as_indexes)
53
+g.vs["name"] = vertices_as_names
54
+
55
+# Plot the graph
56
+
57
+visual_style = {
58
+    "vertex_size": 2,
59
+    "vertex_label": g.vs["name"],
60
+    "vertex_label_size": 10,
61
+    "vertex_color": "blue",
62
+    "edge_width": 0.5,
63
+    "edges_arrow_width": 20,
64
+    "layout": g.layout("rt_circular"),
65
+    "bbox": (2160, 1920),
66
+    "margin": 100,
67
+    "vertex_label_angle": math.pi * 1.5,
68
+    "vertex_label_dist": 10,
69
+}
70
+
71
+ig.plot(g, "output/invite_graph.png", **visual_style)

+ 6
- 2
scripts/read_top.py Bestand weergeven

@@ -17,5 +17,9 @@ counted_participations = count_participations(filtered, export.participants)
17 17
 kept_participations = [
18 18
     p for p in counted_participations if p['participations'] > 100]
19 19
 
20
-print("\n".join(
21
-    [f"{p['name']}: {p['participations']}" for p in kept_participations]))
20
+print(
21
+    "\n".join([
22
+        f"{k}:{p['name']}: {p['participations']}"
23
+        for (k, p) in enumerate(kept_participations)
24
+     ])
25
+     )

Laden…
Annuleren
Opslaan