@@ -179,8 +179,24 @@ def serialize_q10(tags):
179
179
for tag , count in tags :
180
180
writer .append ([tag ], [count ])
181
181
182
- def serialize_q11 ():
182
+ def serialize_q11 (countries , bad_words ):
183
183
writer = ParamsWriter ("q11" , ["country" , "blacklist" ])
184
+ random .seed (1988 + 2 )
185
+ for country , count in countries :
186
+ num_words = random .randint (1 ,min (len (bad_words ),4 ));
187
+ random .shuffle (bad_words )
188
+ blacklist = bad_words [0 :num_words ]
189
+ writer .append ([country ,";" .join (blacklist )], [count ])
190
+
191
+ num_words = random .randint (1 ,min (len (bad_words ),10 ));
192
+ random .shuffle (bad_words )
193
+ blacklist = bad_words [0 :num_words ]
194
+ writer .append ([country ,";" .join (blacklist )], [count ])
195
+
196
+ num_words = random .randint (1 ,min (len (bad_words ),7 ));
197
+ random .shuffle (bad_words )
198
+ blacklist = bad_words [0 :num_words ]
199
+ writer .append ([country ,";" .join (blacklist )], [count ])
184
200
185
201
def serialize_q12 (post_weeks ):
186
202
writer = ParamsWriter ("q12" , ["creationDate" , "likeCount" ])
@@ -202,11 +218,12 @@ def serialize_q15(countries):
202
218
for country , count in countries :
203
219
writer .append ([country ], [count ])
204
220
205
- def serialize_q16 (tagclasses , countries ):
206
- writer = ParamsWriter ("q16" , ["todoPerson" ,"tag" ,"country" ])
221
+ def serialize_q16 (persons , tagclasses , countries ):
222
+ writer = ParamsWriter ("q16" , ["person" ,"tag" ,"country" ])
223
+ random .seed (1988 + 2 )
207
224
for tag , count_a in tagclasses :
208
225
for country , count_b in countries :
209
- writer .append ([str (11052 ) , tag , country ], [count_a , count_b ])
226
+ writer .append ([str (persons [ random . randint ( 0 , len ( persons ))]) , tag , country ], [0 , count_a , count_b ])
210
227
211
228
def serialize_q17 (countries ):
212
229
writer = ParamsWriter ("q17" , ["country" ])
@@ -226,8 +243,10 @@ def serialize_q19(tagclasses):
226
243
for tag_class_b , count_b in tagclasses [ix + 1 :]:
227
244
writer .append ([str (format_date (PERS_DATE )),tag_class_a , tag_class_b ], [count_a , count_b ])
228
245
229
- def serialize_q20 ():
230
- writer = ParamsWriter ("q20" , [])
246
+ def serialize_q20 (tagclasses ):
247
+ writer = ParamsWriter ("q20" , ["tagclass" ])
248
+ for tagclass , count in tagclasses :
249
+ writer .append ([tagclass ], [count ])
231
250
232
251
def serialize_q21 (countries ):
233
252
writer = ParamsWriter ("q21" , ["country" ,"endDate" ])
@@ -294,6 +313,12 @@ def main(argv=None):
294
313
(personFactors , countryFactors , tagFactors , tagClassFactors , nameFactors , givenNames , ts , postsHisto ) = readfactors .load (factorFiles , friendsFiles )
295
314
week_posts = convert_posts_histo (postsHisto )
296
315
316
+ persons = []
317
+ for key , _ in personFactors .values .iteritems ():
318
+ persons .append (key )
319
+ random .seed (1988 )
320
+ random .shuffle (persons )
321
+
297
322
country_sample = []
298
323
for key , value in countryFactors .values .iteritems ():
299
324
country_sample .append ([key , value .getValue ("p" )])
@@ -321,6 +346,7 @@ def main(argv=None):
321
346
post_upper_threshold = 0.1 * total_posts * 1.1
322
347
post_day_ranges = post_date_range_params (week_posts , post_lower_threshold , post_upper_threshold )
323
348
349
+ bad_words = ['Augustine' ,'William' ,'James' ,'with' ,'Henry' ,'Robert' ,'from' ,'Pope' ,'Hippo' ,'album' ,'David' ,'has' ,'one' ,'also' ,'Green' ,'which' ,'that' ]
324
350
#post_lower_threshold = (total_posts/(week_posts[len(week_posts)-1][0]/7/4))*0.8
325
351
#post_upper_threshold = (total_posts/(week_posts[len(week_posts)-1][0]/7/4))*1.2
326
352
non_empty_weeks = len (week_posts )
@@ -348,18 +374,18 @@ def main(argv=None):
348
374
serialize_q9 (key_params (tagclass_posts , 6000 , 25000 ))
349
375
serialize_q10 (key_params (tag_posts , total_posts / 900 , total_posts / 600 ))
350
376
serialize_q13 (key_params (country_sample , total_posts / 200 , total_posts / 100 ))
351
- # serialize_q14(post_month_params(week_posts, post_lower_threshold*2, post_upper_threshold*2))
352
377
serialize_q15 (key_params (country_sample , total_posts / 200 , total_posts / 100 ))
353
- serialize_q16 (key_params (tagclass_posts , total_posts / 30 , total_posts / 10 ), key_params (country_sample , total_posts / 110 , total_posts / 70 ))
378
+ serialize_q16 (persons , key_params (tagclass_posts , total_posts / 30 , total_posts / 10 ), key_params (country_sample , total_posts / 80 , total_posts / 20 ))
354
379
serialize_q17 (key_params (country_sample , total_posts / 200 , total_posts / 100 ))
355
380
serialize_q19 (key_params (tagclass_posts , total_posts / 60 , total_posts / 10 ))
356
381
serialize_q21 (key_params (country_sample , total_posts / 200 , total_posts / 100 ))
357
382
serialize_q22 (key_params (country_sample , total_posts / 120 , total_posts / 40 ))
358
383
serialize_q23 (key_params (country_sample , total_posts / 200 , total_posts / 100 ))
359
384
serialize_q24 (key_params (tagclass_posts , total_posts / 140 , total_posts / 5 ))
360
385
361
- serialize_q11 ()
362
- serialize_q20 ()
386
+ # TODO: Refine
387
+ serialize_q20 (key_params (tagclass_posts , total_posts / 20 , total_posts / 2 ))
388
+ serialize_q11 (key_params (country_sample , total_posts / 80 , total_posts / 20 ), bad_words )
363
389
364
390
if __name__ == "__main__" :
365
391
sys .exit (main ())
0 commit comments