1
+ import sys
2
+ import getopt
3
+ import math
4
+ from readfactors import FactorCount
5
+ from operator import itemgetter
6
+ import itertools
7
+
8
+ LAST_MONTHS = 3 # number of months that we consider for date parameters in the filters of a form timestamp <= Date0
9
+ START_YEAR = 2010 # default value that gets over-written from the data generator output
10
+
11
+ class MonthYearCount :
12
+ def __init__ (self , month , year , count ):
13
+ self .month = month
14
+ self .year = year
15
+ self .count = count
16
+
17
+
18
+ class TimeParameter :
19
+ def __init__ (self , year , month , day , duration ):
20
+ self .month = month
21
+ self .year = year
22
+ self .day = day
23
+ self .duration = duration
24
+
25
+ def findTimeParameters (persons , factors , procedure , timestampSelection ):
26
+ if "w" == procedure :
27
+ medians = computeTimeMedians (factors , lastmonthcount = 12 )
28
+ else :
29
+ medians = computeTimeMedians (factors )
30
+
31
+ timeParams = timestampSelection (factors ,medians )
32
+
33
+ return timeParams
34
+
35
+
36
+ def getMedian (data , sort_key , getEntireTuple = False ):
37
+ if len (data ) == 0 :
38
+ if getEntireTuple :
39
+ return MonthYearCount (0 ,0 ,0 )
40
+ return 0
41
+
42
+ if len (data ) == 1 :
43
+ if getEntireTuple :
44
+ return data [0 ]
45
+ return data [0 ].count
46
+
47
+ srtd = sorted (data ,key = sort_key )
48
+ mid = len (data )/ 2
49
+
50
+ if len (data ) % 2 == 0 :
51
+ if getEntireTuple :
52
+ return srtd [mid ]
53
+ return (sort_key (srtd [mid - 1 ]) + sort_key (srtd [mid ])) / 2.0
54
+
55
+ if getEntireTuple :
56
+ return srtd [mid ]
57
+ return sort_key (srtd [mid ])
58
+
59
+
60
+ def MonthYearToDate (myc , day ):
61
+ return "%d-%d-%d" % (myc .year , myc .month , day )
62
+
63
+
64
+ def getTimeParamsWithMedian (factors , (medianFirstMonth , medianLastMonth , median )):
65
+ # strategy: find the median of the given distribution, then increase the time interval until it matches the given parameter
66
+ res = []
67
+ for values in factors :
68
+ input = sorted (values ,key = lambda myc : (myc .year , myc .month ))
69
+ currentMedian = getMedian (values ,lambda myc : myc .count , True )
70
+ if int (median ) == 0 or int (currentMedian .count ) == 0 or int (currentMedian .year ) == 0 :
71
+ res .append (TimeParameter (START_YEAR ,1 ,1 ,0 ))
72
+ continue
73
+ if currentMedian .count > median :
74
+ duration = int (28 * currentMedian .count / median )
75
+ res .append (TimeParameter (currentMedian .year , currentMedian .month , 1 , duration ))
76
+ else :
77
+ duration = int (28 * median / currentMedian .count )
78
+ res .append (TimeParameter (currentMedian .year , currentMedian .month , 1 , duration ))
79
+ return res
80
+
81
+ def getTimeParamsBeforeMedian (factors , (medianFirstMonth , medianLastMonth , median )):
82
+ # strategy: find the interval [0: median] with the sum of counts as close as possible to medianFirstMonth
83
+ res = []
84
+ i = 0
85
+ for values in factors :
86
+ input = sorted (values ,key = lambda myc : (myc .year , myc .month ))
87
+ localsum = 0
88
+ best = MonthYearCount (0 ,0 ,0 )
89
+ for myc in input :
90
+ localsum += myc .count
91
+ i += 1
92
+ if localsum >= medianFirstMonth :
93
+ day = max (28 - 28 * (localsum - medianFirstMonth )/ myc .count ,1 )
94
+ res .append (TimeParameter (myc .year , myc .month , day , None ))
95
+ break
96
+ best = myc
97
+
98
+ if localsum < medianFirstMonth :
99
+ res .append (TimeParameter (best .year , best .month , 28 , None ))
100
+
101
+ return res
102
+
103
+ def getTimeParamsAfterMedian (factors , (medianFirstMonth , medianLastMonth , median )):
104
+ # strategy: find the interval [median: end] with the sum of counts as close as possible to medianFirstMonth
105
+ res = []
106
+
107
+ for values in factors :
108
+ input = sorted (values ,key = lambda myc : (- myc .year , - myc .month ))
109
+ localsum = 0
110
+ best = MonthYearCount (0 ,0 ,0 )
111
+ for myc in input :
112
+ localsum += myc .count
113
+ if localsum >= medianLastMonth :
114
+ day = max (28 * (localsum - medianLastMonth )/ myc .count ,1 )
115
+ res .append (TimeParameter (myc .year , myc .month , day , None ))
116
+ break
117
+ best = myc
118
+
119
+ if localsum < medianLastMonth :
120
+ res .append (TimeParameter (best .year , best .month , 1 , None ))
121
+ return res
122
+
123
+ def computeTimeMedians (factors , lastmonthcount = LAST_MONTHS ):
124
+ mediantimes = []
125
+ lastmonths = []
126
+ firstmonths = []
127
+ for values in factors :
128
+ values .sort (key = lambda myc : (myc .year , myc .month ))
129
+
130
+ l = len (values )
131
+ lastmonthsum = sum (myc .count for myc in values [max (l - lastmonthcount ,0 ):l ])
132
+ lastmonths .append (lastmonthsum )
133
+ cutoff_max = l - lastmonthcount
134
+ if cutoff_max < 0 :
135
+ cutoff_max = l
136
+ firstmonthsum = sum (myc .count for myc in values [0 :cutoff_max ])
137
+ firstmonths .append (firstmonthsum )
138
+ mediantimes .append (getMedian (values ,lambda myc : myc .count ))
139
+
140
+ median = getMedian (mediantimes , lambda x : x )
141
+ medianLastMonth = getMedian (lastmonths , lambda x : x )
142
+ medianFirstMonth = getMedian (firstmonths , lambda x : x )
143
+
144
+ return (medianFirstMonth , medianLastMonth , median )
145
+
146
+ def readTimeParams (persons , factorFiles , friendFiles ):
147
+
148
+ postCounts = {}
149
+ groupCounts = {}
150
+ offset = 8
151
+ monthcount = 12 * 3 + 1
152
+
153
+ for inputFactorFile in factorFiles :
154
+ with open (inputFactorFile , 'r' ) as f :
155
+ personCount = int (f .readline ())
156
+ for i in range (personCount ):
157
+ line = f .readline ().split ("," )
158
+ person = int (line [0 ])
159
+ localPostCounts = map (int ,line [offset :offset + monthcount ])
160
+ localGroupCounts = map (int , line [offset + monthcount :])
161
+ if not person in postCounts :
162
+ postCounts [person ] = localPostCounts
163
+ else :
164
+ postCounts [person ] = [sum (x ) for x in zip (postCounts [person ], localPostCounts )]
165
+
166
+ if not person in groupCounts :
167
+ groupCounts [person ] = localGroupCounts
168
+ else :
169
+ groupCounts [person ] = [sum (x ) for x in zip (groupCounts [person ], localGroupCounts )]
170
+
171
+ friendsPostsCounts = {}
172
+ fGroupCount = {}
173
+ for inputFriendFile in friendFiles :
174
+ with open (inputFriendFile , 'r' ) as f :
175
+ for line in f :
176
+ people = map (int , line .split ("," ))
177
+ person = people [0 ]
178
+ friendsPostsCounts [person ] = [0 ]* monthcount
179
+ for friend in people [1 :]:
180
+ if not friend in postCounts :
181
+ continue
182
+ friendsPostsCounts [person ] = [x + y for x ,y in zip (friendsPostsCounts [person ], postCounts [friend ])]
183
+ fGroupCount [person ] = [0 ]* monthcount
184
+ for friend in people [1 :]:
185
+ if not friend in groupCounts :
186
+ continue
187
+ fGroupCount [person ] = [x + y for x ,y in zip (fGroupCount [person ], groupCounts [friend ])]
188
+
189
+
190
+ ffPostCounts = {}
191
+ ffGroupCount = {}
192
+ for inputFriendFile in friendFiles :
193
+ with open (inputFriendFile , 'r' ) as f :
194
+ for line in f :
195
+ people = map (int , line .split ("," ))
196
+ person = people [0 ]
197
+ ffPostCounts [person ] = [0 ]* monthcount
198
+ for friend in people [1 :]:
199
+ if not friend in friendsPostsCounts :
200
+ continue
201
+ ffPostCounts [person ] = [x + y for x ,y in zip (ffPostCounts [person ],friendsPostsCounts [friend ])]
202
+ ffGroupCount [person ] = [0 ]* monthcount
203
+ for friend in people [1 :]:
204
+ if not friend in fGroupCount :
205
+ continue
206
+ ffGroupCount [person ] = [x + y for x ,y in zip (ffGroupCount [person ],fGroupCount [friend ])]
207
+
208
+ return (friendsPostsCounts , ffPostCounts , ffGroupCount )
209
+
210
+
211
+
212
+ def findTimeParams (input , factorFiles , friendFiles , startYear ):
213
+ START_YEAR = startYear
214
+ fPostCount = {}
215
+ ffPostCount = {}
216
+ persons = []
217
+ for queryId in input :
218
+ persons += input [queryId ][0 ]
219
+
220
+ (fPostCount , ffPostCount , ffGroupCount ) = readTimeParams (set (persons ),factorFiles , friendFiles )
221
+
222
+ mapParam = {
223
+ "f" : fPostCount ,
224
+ "ff" : ffPostCount ,
225
+ "ffg" : ffGroupCount
226
+ }
227
+
228
+ output = {}
229
+ for queryId in input :
230
+ factors = mapParam [input [queryId ][1 ]]
231
+ mycFactors = []
232
+ for person in input [queryId ][0 ]:
233
+ countsPerMonth = factors [person ]
234
+ myc = []
235
+ for (month ,count ) in enumerate (countsPerMonth ):
236
+ if count == 0 :
237
+ continue
238
+ year = startYear + month / 12
239
+ myc .append (MonthYearCount (month % 12 + 1 , int (year ), count ))
240
+ mycFactors .append (myc )
241
+
242
+ output [queryId ] = findTimeParameters (input [queryId ][0 ], mycFactors , input [queryId ][1 ], input [queryId ][2 ])
243
+
244
+ return output
245
+
246
+ def main (argv = None ):
247
+ if argv is None :
248
+ argv = sys .argv
249
+
250
+ if len (argv )< 2 :
251
+ print "arguments: <input persons file>"
252
+ return 1
253
+
254
+ f = open (argv [1 ])
255
+
256
+ factors = prepareTimedFactors (f , getFriendFriendPostByTime )
257
+
258
+ medians = computeTimeMedians (factors )
259
+
260
+ timestamps = getTimeParamsWithMedian (factors ,medians )
261
+
262
+ if __name__ == "__main__" :
263
+ sys .exit (main ())
0 commit comments