|
@@ -3,6 +3,7 @@
|
3
|
3
|
# ------------------------------------------------
|
4
|
4
|
|
5
|
5
|
from matplotlib import pyplot as plt
|
|
6
|
+from datetime import datetime
|
6
|
7
|
import os
|
7
|
8
|
import json
|
8
|
9
|
import re
|
|
@@ -15,6 +16,9 @@ import sys, getopt
|
15
|
16
|
|
16
|
17
|
DATA_PATH = './data/'
|
17
|
18
|
|
|
19
|
+MONTH_MODE = False
|
|
20
|
+MONTH = None
|
|
21
|
+
|
18
|
22
|
OTHER_LABEL = 'Les Autres'
|
19
|
23
|
|
20
|
24
|
# JSON tags
|
|
@@ -28,19 +32,26 @@ SENDER = 'sender_name'
|
28
|
32
|
HELP = """General options :
|
29
|
33
|
-h, --help Consulter l'aide
|
30
|
34
|
--path=<path> Redéfinir le chemin d'accès aux données (par défaut ./data)
|
|
35
|
+ --month <mm/yyyy>
|
31
|
36
|
"""
|
32
|
37
|
|
33
|
38
|
# ------------------------------------------------
|
34
|
39
|
# Functions
|
35
|
40
|
# ------------------------------------------------
|
36
|
41
|
|
|
42
|
+def printHelp():
|
|
43
|
+ print('Usage:\n '+os.path.basename(__file__)+' <command> [option]\n')
|
|
44
|
+ print(HELP)
|
|
45
|
+ sys.exit(2)
|
|
46
|
+
|
37
|
47
|
def handleArguments(argv):
|
|
48
|
+ global MONTH_MODE
|
|
49
|
+ global MONTH
|
|
50
|
+
|
38
|
51
|
try:
|
39
|
|
- opts, args = getopt.getopt(argv, 'h',['help','path='])
|
|
52
|
+ opts, args = getopt.getopt(argv, 'h',['help','path=', 'month='])
|
40
|
53
|
except getopt.GetoptError:
|
41
|
|
- print('Usage:\n '+os.path.basename(__file__)+' <command> [option]\n')
|
42
|
|
- print(HELP)
|
43
|
|
- sys.exit(2)
|
|
54
|
+ printHelp()
|
44
|
55
|
|
45
|
56
|
for opt, arg in opts:
|
46
|
57
|
if opt in ('-h', '--help'):
|
|
@@ -48,6 +59,15 @@ def handleArguments(argv):
|
48
|
59
|
sys.exit()
|
49
|
60
|
elif opt in ('--path'):
|
50
|
61
|
DATA_PATH = arg
|
|
62
|
+ elif opt in ('--month'):
|
|
63
|
+ MONTH_MODE = True
|
|
64
|
+ try:
|
|
65
|
+ t = arg.split("/")
|
|
66
|
+ int(t[0])
|
|
67
|
+ int(t[1])
|
|
68
|
+ MONTH = t
|
|
69
|
+ except:
|
|
70
|
+ printHelp()
|
51
|
71
|
|
52
|
72
|
def readBrokenFbJson(datafile_path):
|
53
|
73
|
# ntm facebook
|
|
@@ -65,7 +85,6 @@ def computeData():
|
65
|
85
|
datafiles_path = [DATA_PATH + filename for filename in os.listdir(DATA_PATH)]
|
66
|
86
|
messages, participants = [], []
|
67
|
87
|
|
68
|
|
- print(datafiles_path)
|
69
|
88
|
|
70
|
89
|
for datafile_path in datafiles_path:
|
71
|
90
|
datacontent = readBrokenFbJson(datafile_path)
|
|
@@ -82,7 +101,15 @@ def cleanParticipants(rawParticipants):
|
82
|
101
|
return set([participant[NAME] for participant in rawParticipants])
|
83
|
102
|
|
84
|
103
|
def cleanMessages(rawMessages):
|
85
|
|
- cleanMessages = [message for message in rawMessages if CONTENT in message]
|
|
104
|
+ if MONTH_MODE:
|
|
105
|
+ cleanMessages = [
|
|
106
|
+ message for message in rawMessages if \
|
|
107
|
+ CONTENT in message and \
|
|
108
|
+ datetime.fromtimestamp(message[TIMESTAMP]/1000).month == int(MONTH[0]) and \
|
|
109
|
+ datetime.fromtimestamp(message[TIMESTAMP]/1000).year == int(MONTH[1])
|
|
110
|
+ ]
|
|
111
|
+ else:
|
|
112
|
+ cleanMessages = [message for message in rawMessages if CONTENT in message]
|
86
|
113
|
return sorted(cleanMessages, key = lambda x: x[TIMESTAMP])
|
87
|
114
|
|
88
|
115
|
# TODO tester l'approche en recherche incrémentale
|
|
@@ -96,16 +123,19 @@ def computeParticipation(messages):
|
96
|
123
|
|
97
|
124
|
for message in messages:
|
98
|
125
|
sender = message[SENDER]
|
99
|
|
- result[sender] = result[sender]+1 if sender in result else 1
|
|
126
|
+ countParticipation(result, sender, message)
|
100
|
127
|
|
101
|
128
|
return sorted(result.items(), key = lambda x: x[1])
|
102
|
129
|
|
|
130
|
+def countParticipation(participations, sender, message):
|
|
131
|
+ participations[sender] = participations[sender] + 1 if sender in participations else 1
|
|
132
|
+
|
103
|
133
|
def mergeSmallParticipation(rawParticipation, threshold = 1):
|
104
|
134
|
values = [e[1] for e in rawParticipation]
|
105
|
135
|
labels = [e[0] for e in rawParticipation]
|
106
|
136
|
|
107
|
137
|
totalValues = sum(values)
|
108
|
|
-
|
|
138
|
+ idx = 0
|
109
|
139
|
for idx, value in enumerate(values):
|
110
|
140
|
if 100 * value / totalValues >= threshold: break
|
111
|
141
|
|