9
9
from timeparameters import *
10
10
from calendar import timegm
11
11
12
-
13
12
# class ParamsWriter:
14
13
# def __init__(self, name, num_params):
15
14
# self.files = []
@@ -93,18 +92,18 @@ def post_month_params(sample, lower_bound, upper_bound):
93
92
results .append ([[start_day , end_day ], count_sum ])
94
93
return results
95
94
96
- def post_three_month_params (sample , lower_bound , upper_bound ):
97
- results = []
98
- for ix in range (0 , len (sample )/ 12 ):
99
- start_ix = ix * 12
100
- count_sum = 0
101
- for offset , count in sample [start_ix :start_ix + 12 ]:
102
- count_sum += count
103
- if count_sum > lower_bound and count_sum < upper_bound :
104
- start_day = sample [start_ix ][0 ]
105
- end_day = sample [start_ix + 12 ][0 ]
106
- results .append ([[start_day , end_day ], count_sum ])
107
- return results
95
+ # def post_three_month_params(sample, lower_bound, upper_bound):
96
+ # results = []
97
+ # for ix in range(0, len(sample)/12):
98
+ # start_ix = ix*12
99
+ # count_sum = 0
100
+ # for offset, count in sample[start_ix:start_ix+12]:
101
+ # count_sum += count
102
+ # if count_sum > lower_bound and count_sum < upper_bound:
103
+ # start_day = sample[start_ix][0]
104
+ # end_day = sample[start_ix+12][0]
105
+ # results.append([[start_day, end_day], count_sum])
106
+ # return results
108
107
109
108
110
109
def key_params (sample , lower_bound , upper_bound ):
@@ -115,38 +114,24 @@ def key_params(sample, lower_bound, upper_bound):
115
114
return results
116
115
117
116
def serialize_q1 (post_weeks ):
118
- f1 = open ('params/q1.1.params' , 'w+' )
119
- fcounts = open ('params/q1.counts.params' , 'w+' )
117
+ writer = ParamsWriter ("q1" , 1 )
120
118
for week , count in post_weeks :
121
- f1 .write (str (week )+ "\n " )
122
- fcounts .write (str (count )+ "\n " )
119
+ writer .append ([str (week )], [count ])
123
120
124
121
def serialize_q2 (country_sets , post_day_ranges ):
125
- # Generate Q2 params
126
- f1 = open ('params/q2.1.params' , 'w+' )
127
- f2 = open ('params/q2.2.params' , 'w+' )
128
- f3 = open ('params/q2.3.params' , 'w+' )
129
- fcounts = open ('params/q2.counts.params' , 'w+' )
122
+ writer = ParamsWriter ("q2" , 3 )
130
123
random .seed (1988 + 2 )
131
124
for country_set , count_country in country_sets :
132
125
for day_range , count_post in post_day_ranges :
133
126
if random .randint (0 ,len (country_sets ) + len (post_day_ranges )) == 0 :
134
- f1 .write (str (day_range [0 ])+ "\n " )
135
- f2 .write (str (day_range [1 ])+ "\n " )
136
- f3 .write ("ctry_name = '" + "' or ctry_name = '" .join (country_set )+ "'\n " )
137
- fcounts .write (str (count_post )+ "|" + str (count_country )+ "\n " )
127
+ writer .append ([str (day_range [0 ]), str (day_range [1 ]), "," .join (country_set )], [count_post ,count_post ,count_country ])
138
128
139
129
def serialize_q3 (post_months ):
140
- # Generate Q2 params
141
- f1 = open ('params/q3.1.params' , 'w+' )
142
- f2 = open ('params/q3.2.params' , 'w+' )
143
- fcounts = open ('params/q3.counts.params' , 'w+' )
130
+ writer = ParamsWriter ("q3" , 2 )
144
131
for ix in range (0 ,len (post_months )):
145
132
week_range_a , count_a = post_months [ix ]
146
133
for week_range_b , count_b in post_months [ix + 1 :]:
147
- f1 .write (str (week_range_a [0 ])+ "\n " )
148
- f2 .write (str (week_range_b [0 ])+ "\n " )
149
- fcounts .write (str (count_a )+ "|" + str (count_b )+ "\n " )
134
+ writer .append ([str (week_range_a ),str (week_range_b )], [count_a ,count_b ])
150
135
151
136
def serialize_q4 (tagclasses , countries ):
152
137
writer = ParamsWriter ("q4" , 2 )
@@ -188,23 +173,19 @@ def serialize_q10(tags):
188
173
writer .append ([tag ], [count ])
189
174
190
175
def serialize_q12 (post_weeks ):
191
- f1 = open ('params/q12.1.params' , 'w+' )
192
- fcounts = open ('params/q12.counts.params' , 'w+' )
176
+ writer = ParamsWriter ("q12" , 1 )
193
177
for week , count in post_weeks :
194
- f1 .write (str (week )+ "\n " )
195
- fcounts .write (str (count )+ "\n " )
178
+ writer .append ([str (week )], [count ])
196
179
197
180
def serialize_q13 (countries ):
198
181
writer = ParamsWriter ("q13" , 1 )
199
182
for country , count in countries :
200
183
writer .append ([country ], [count ])
201
184
202
185
def serialize_q14 (creationdates ):
203
- f1 = open ('params/q14.1.params' , 'w+' )
204
- fcounts = open ('params/q14.counts.params' , 'w+' )
186
+ writer = ParamsWriter ("q14" , 1 )
205
187
for creation , count in creationdates :
206
- f1 .write (str (creation [0 ])+ "\n " )
207
- fcounts .write (str (count )+ "\n " )
188
+ writer .append ([str (creation )], [count ])
208
189
209
190
def serialize_q15 (countries ):
210
191
writer = ParamsWriter ("q15" , 1 )
@@ -223,10 +204,9 @@ def serialize_q17(countries):
223
204
writer .append ([country ], [count ])
224
205
225
206
def serialize_q18 (post_weeks ):
226
- f1 = open ('params/q18.1.params' , 'w+' )
227
- fcounts = open ('params/q18.counts.params' , 'w+' )
207
+ writer = ParamsWriter ("q18" , 1 )
228
208
for week , count in post_weeks :
229
- f1 . write ( str (week )+ " \n " )
209
+ writer . append ([ str (week )], [ count ] )
230
210
231
211
def serialize_q19 (tagclasses ):
232
212
writer = ParamsWriter ("q19" , 2 )
@@ -257,6 +237,18 @@ def serialize_q24(tagclasses):
257
237
for tagclass , count in tagclasses :
258
238
writer .append ([tagclass ], [count ])
259
239
240
+ def convert_posts_histo (histogram ):
241
+ week_posts = []
242
+ month = 0
243
+ while (histogram .existParam (month )):
244
+ monthTotal = histogram .getValue (month , "p" )
245
+ week_posts .append ([month * 30 , monthTotal / 4 ])
246
+ week_posts .append ([month * 30 + 7 , monthTotal / 4 ])
247
+ week_posts .append ([month * 30 + 14 , monthTotal / 4 ])
248
+ week_posts .append ([month * 30 + 21 , monthTotal / 4 ])
249
+ month = month + 1
250
+ return week_posts
251
+
260
252
def main (argv = None ):
261
253
if argv is None :
262
254
argv = sys .argv
@@ -277,7 +269,8 @@ def main(argv=None):
277
269
friendsFiles .append (indir + file )
278
270
279
271
# read precomputed counts from files
280
- (personFactors , countryFactors , tagFactors , tagClassFactors , nameFactors , givenNames , ts ) = readfactors .load (factorFiles , friendsFiles )
272
+ (personFactors , countryFactors , tagFactors , tagClassFactors , nameFactors , givenNames , ts , postsHisto ) = readfactors .load (factorFiles , friendsFiles )
273
+ week_posts = convert_posts_histo (postsHisto )
281
274
282
275
country_sample = []
283
276
for key , value in countryFactors .values .iteritems ():
@@ -293,21 +286,43 @@ def main(argv=None):
293
286
total_posts = 0
294
287
for day , count in tag_posts :
295
288
total_posts += count
289
+
290
+ person_sum = 0
291
+ for country , count in country_sample :
292
+ person_sum += count
293
+
294
+ country_lower_threshold = 0.1 * total_posts * 0.9
295
+ country_upper_threshold = 0.1 * total_posts * 1.1
296
+ country_sets = country_sets_params (country_sample , country_lower_threshold , country_upper_threshold , 4 )
297
+
298
+ post_lower_threshold = 0.1 * total_posts * 0.9
299
+ post_upper_threshold = 0.1 * total_posts * 1.1
300
+ post_day_ranges = post_date_range_params (week_posts , post_lower_threshold , post_upper_threshold )
296
301
302
+ post_lower_threshold = (total_posts / (week_posts [len (week_posts )- 1 ][0 ]/ 7 / 4 ))* 0.8
303
+ post_upper_threshold = (total_posts / (week_posts [len (week_posts )- 1 ][0 ]/ 7 / 4 ))* 1.2
304
+ post_months = post_month_params (week_posts , post_lower_threshold , post_upper_threshold )
305
+
306
+ serialize_q2 (country_sets , post_day_ranges )
307
+ serialize_q3 (post_months )
308
+ serialize_q14 (post_month_params (week_posts , post_lower_threshold * 2 , post_upper_threshold * 2 ))
309
+
310
+ serialize_q1 (post_date_right_open_range_params (week_posts , 0.3 * total_posts , 0.6 * total_posts ))
311
+ serialize_q12 (post_date_right_open_range_params (week_posts , 0.3 * total_posts , 0.6 * total_posts ))
312
+ serialize_q18 (post_date_right_open_range_params (week_posts , 0.3 * total_posts , 0.6 * total_posts ))
313
+
297
314
serialize_q4 (key_params (tagclass_posts , total_posts / 20 , total_posts / 10 ), key_params (country_sample , total_posts / 120 , total_posts / 70 ))
298
315
serialize_q5 (key_params (country_sample , total_posts / 200 , total_posts / 100 ))
299
316
serialize_q6 (key_params (tag_posts , total_posts / 1300 , total_posts / 900 ))
300
317
serialize_q7 (key_params (tag_posts , total_posts / 900 , total_posts / 600 ))
301
318
serialize_q8 (key_params (tag_posts , total_posts / 600 , total_posts / 300 ))
302
319
serialize_q9 (key_params (tagclass_posts , 6000 , 25000 ))
303
320
serialize_q10 (key_params (tag_posts , total_posts / 900 , total_posts / 600 ))
304
- # serialize_q12(post_date_right_open_range_params(week_posts, 0.3*total_posts, 0.6*total_posts))
305
321
serialize_q13 (key_params (country_sample , total_posts / 200 , total_posts / 100 ))
306
322
# serialize_q14(post_month_params(week_posts, post_lower_threshold*2, post_upper_threshold*2))
307
323
serialize_q15 (key_params (country_sample , total_posts / 200 , total_posts / 100 ))
308
324
serialize_q16 (key_params (tagclass_posts , total_posts / 30 , total_posts / 10 ), key_params (country_sample , total_posts / 110 , total_posts / 70 ))
309
325
serialize_q17 (key_params (country_sample , total_posts / 200 , total_posts / 100 ))
310
- # serialize_q18(post_date_right_open_range_params(week_posts, 0.3*total_posts, 0.6*total_posts))
311
326
serialize_q19 (key_params (tagclass_posts , total_posts / 60 , total_posts / 10 ))
312
327
serialize_q21 (key_params (country_sample , total_posts / 200 , total_posts / 100 ))
313
328
serialize_q22 (key_params (country_sample , total_posts / 120 , total_posts / 40 ))
0 commit comments