@@ -124,64 +124,64 @@ def key_params(sample, lower_bound, upper_bound):
124
124
results .append ([key , count ])
125
125
return results
126
126
127
- def serializes_q1 (outdir , post_weeks ):
127
+ def serialize_q1 (outdir , post_weeks ):
128
128
writer = ParamsWriter (outdir , "q1" , ["date" ])
129
129
for week , count in post_weeks :
130
130
writer .append ([str (week )])
131
131
132
- def serializes_q2 (outdir , countries , post_day_ranges ):
132
+ def serialize_q2 (outdir , countries , post_day_ranges ):
133
133
writer = ParamsWriter (outdir , "q2" , ["date1" , "date2" , "country1" , "country2" ])
134
134
for day_range , count_post in post_day_ranges :
135
135
for ix in range (0 ,len (countries )):
136
136
country_1 , count_1 = countries [ix ]
137
137
for country_2 , count_2 in countries [ix + 1 :]:
138
138
writer .append ([str (day_range [0 ]),str (day_range [1 ]),country_1 ,country_2 ])
139
139
140
- def serializes_q3 (outdir , post_months ):
140
+ def serialize_q3 (outdir , post_months ):
141
141
writer = ParamsWriter (outdir , "q3" , ["year" , "month" ] )
142
142
# TODO year, month
143
143
144
- def serializes_q4 (outdir , tagclasses , countries ):
144
+ def serialize_q4 (outdir , tagclasses , countries ):
145
145
writer = ParamsWriter (outdir , "q4" , ["tagClass" , "country" ])
146
146
for tag , count_a in tagclasses :
147
147
for country , count_b in countries :
148
148
writer .append ([tag ,country ])
149
149
150
- def serializes_q5 (outdir , countries ):
150
+ def serialize_q5 (outdir , countries ):
151
151
writer = ParamsWriter (outdir , "q5" , ["country" ])
152
152
for country , count in countries :
153
153
writer .append ([country ])
154
154
155
155
156
- def serializes_q6 (outdir , tags ):
156
+ def serialize_q6 (outdir , tags ):
157
157
writer = ParamsWriter (outdir , "q6" , ["tag" ])
158
158
for tag , count in tags :
159
159
writer .append ([tag ])
160
160
161
- def serializes_q7 (outdir , tags ):
161
+ def serialize_q7 (outdir , tags ):
162
162
writer = ParamsWriter (outdir , "q7" , ["tag" ])
163
163
for tag , count in tags :
164
164
writer .append ([tag ])
165
165
166
- def serializes_q8 (outdir , tags ):
166
+ def serialize_q8 (outdir , tags ):
167
167
writer = ParamsWriter (outdir , "q8" , ["tag" ])
168
168
for tag , count in tags :
169
169
writer .append ([tag ])
170
170
171
- def serializes_q9 (outdir , tagclasses ):
171
+ def serialize_q9 (outdir , tagclasses ):
172
172
writer = ParamsWriter (outdir , "q9" , ["tagClass1" , "tagClass2" , "threshold" ])
173
173
for ix in range (0 ,len (tagclasses )):
174
174
tag_class_a , count_a = tagclasses [ix ]
175
175
for tag_class_b , count_b in tagclasses [ix + 1 :]:
176
176
writer .append ([tag_class_a , tag_class_b , str (200 )])
177
177
178
- def serializes_q10 (outdir , tags , post_weeks ):
178
+ def serialize_q10 (outdir , tags , post_weeks ):
179
179
writer = ParamsWriter (outdir , "q10" , ["tag" , "date" ])
180
180
for tag , count in tags :
181
181
for week , count in post_weeks :
182
182
writer .append ([tag , str (week )])
183
183
184
- def serializes_q11 (outdir , countries , bad_words ):
184
+ def serialize_q11 (outdir , countries , bad_words ):
185
185
writer = ParamsWriter (outdir , "q11" , ["country" , "blacklist" ])
186
186
random .seed (1988 + 2 )
187
187
for country , count in countries :
@@ -200,92 +200,91 @@ def serializes_q11(outdir, countries, bad_words):
200
200
blacklist = bad_words [0 :num_words ]
201
201
writer .append ([country ,";" .join (blacklist )])
202
202
203
- def serializes_q12 (outdir , post_weeks ):
203
+ def serialize_q12 (outdir , post_weeks ):
204
204
writer = ParamsWriter (outdir , "q12" , ["date" , "likeThreshold" ])
205
205
for week , count in post_weeks :
206
206
writer .append ([str (week ),str (400 )])
207
207
208
- def serializes_q13 (outdir , countries ):
208
+ def serialize_q13 (outdir , countries ):
209
209
writer = ParamsWriter (outdir , "q13" , ["country" ])
210
210
for country , count in countries :
211
211
writer .append ([country ])
212
212
213
- def serializes_q14 (outdir , creationdates ):
213
+ def serialize_q14 (outdir , creationdates ):
214
214
writer = ParamsWriter (outdir , "q14" , ["begin" , "end" ])
215
215
for creation , count in creationdates :
216
216
writer .append ([str (creation [0 ]),str (creation [1 ])])
217
217
218
- def serializes_q15 (outdir , countries ):
218
+ def serialize_q15 (outdir , countries ):
219
219
writer = ParamsWriter (outdir , "q15" , ["country" ])
220
220
for country , count in countries :
221
221
writer .append ([country ])
222
222
223
- def serializes_q16 (outdir , persons , tagclasses , countries , path_bounds ):
223
+ def serialize_q16 (outdir , persons , tagclasses , countries , path_bounds ):
224
224
writer = ParamsWriter (outdir , "q16" , ["person" , "tag" , "country" , "minPathDistance" , "maxPathDistance" ])
225
225
random .seed (1988 + 2 )
226
226
for tag , count_a in tagclasses :
227
227
for country , count_b in countries :
228
228
for minDist , maxDist in path_bounds :
229
229
writer .append ([str (persons [random .randint (0 , len (persons ))]), tag , country , str (minDist ), str (maxDist )])
230
230
231
- def serializes_q17 (outdir , countries ):
231
+ def serialize_q17 (outdir , countries ):
232
232
writer = ParamsWriter (outdir , "q17" , ["country" ])
233
233
for country , count in countries :
234
234
writer .append ([country ])
235
235
236
- def serializes_q18 (outdir , post_weeks , lengths , languages ):
236
+ def serialize_q18 (outdir , post_weeks , lengths , languages ):
237
237
writer = ParamsWriter (outdir , "q18" , ["date" , "lengthThreshold" , "languages" ])
238
238
for week , count in post_weeks :
239
239
for length in lengths :
240
240
for language_set in languages :
241
241
writer .append ([str (week ), str (length ), ";" .join (language_set )])
242
242
243
- def serializes_q19 (outdir , tagclasses ):
243
+ def serialize_q19 (outdir , tagclasses ):
244
244
PERS_DATE = datetime .strptime ("1989-1-1" , "%Y-%m-%d" )
245
245
writer = ParamsWriter (outdir , "q19" , ["date" , "tagClass1" , "tagClass2" ])
246
246
for ix in range (0 ,len (tagclasses )):
247
247
tag_class_a , count_a = tagclasses [ix ]
248
248
for tag_class_b , count_b in tagclasses [ix + 1 :]:
249
249
writer .append ([str (format_date (PERS_DATE )),tag_class_a , tag_class_b ])
250
250
251
- def serializes_q20 (outdir , tagclasses ):
251
+ def serialize_q20 (outdir , tagclasses ):
252
252
writer = ParamsWriter (outdir , "q20" , ["tagClasses" ]) # TODO tagclasses
253
253
for tagclass , count in tagclasses :
254
254
writer .append ([tagclass ])
255
255
256
- def serializes_q21 (outdir , countries ):
256
+ def serialize_q21 (outdir , countries ):
257
257
writer = ParamsWriter (outdir , "q21" , ["country" , "endDate" ])
258
258
for country , count in countries :
259
259
writer .append ([country ,str (format_date (END_DATE ))])
260
260
261
- def serializes_q22 (outdir , countries ):
261
+ def serialize_q22 (outdir , countries ):
262
262
writer = ParamsWriter (outdir , "q22" , ["country1" , "country2" ])
263
263
for ix in range (0 ,len (countries )):
264
264
country_a , count_a = countries [ix ]
265
265
for country_b , count_b in countries [ix + 1 :]:
266
266
writer .append ([country_a , country_b ])
267
267
268
- def serializes_q23 (outdir , countries ):
268
+ def serialize_q23 (outdir , countries ):
269
269
writer = ParamsWriter (outdir , "q23" , ["country" ])
270
270
for country , count in countries :
271
271
writer .append ([country ])
272
272
273
- def serializes_q24 (outdir , tagclasses ):
273
+ def serialize_q24 (outdir , tagclasses ):
274
274
writer = ParamsWriter (outdir , "q24" , ["tagClass" ])
275
275
for tagclass , count in tagclasses :
276
276
writer .append ([tagclass ])
277
277
278
278
def serialize_q25 (outdir , persons , post_month_ranges ):
279
- writer = ParamsWriter ("q25" , ["person1Id" , "person2Id" , "startDate" , "endDate" ])
279
+ writer = ParamsWriter (outdir , "q25" , ["person1Id" , "person2Id" , "startDate" , "endDate" ])
280
280
for day_range , count_post in post_month_ranges :
281
281
count = min (len (persons ), 10 )
282
282
for _ in range (0 , count ):
283
283
person1Id = persons [random .randint (0 , len (persons ) - 1 )]
284
284
while True :
285
285
person2Id = persons [random .randint (0 , len (persons ) - 1 )]
286
286
if person2Id != person1Id :
287
- writer .append ([str (person1Id ), str (person2Id ), str (day_range [0 ]), str (day_range [1 ])],
288
- [0 , 0 , count_post , count_post ])
287
+ writer .append ([str (person1Id ), str (person2Id ), str (day_range [0 ]), str (day_range [1 ])])
289
288
break
290
289
291
290
@@ -381,34 +380,35 @@ def main(argv=None):
381
380
language_codes = prob_language_codes ()
382
381
post_lengths = prob_post_lengths ()
383
382
384
- serializes_q2 (outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ), post_day_ranges ) # TODO determine constants
385
- serializes_q3 (outdir , post_months )
386
- serializes_q14 (outdir , post_month_params (week_posts , post_lower_threshold * 2 , post_upper_threshold * 2 ))
387
-
388
- serializes_q1 (outdir , post_date_right_open_range_params (week_posts , 0.3 * total_posts , 0.6 * total_posts ))
389
- serializes_q12 (outdir , post_date_right_open_range_params (week_posts , 0.3 * total_posts , 0.6 * total_posts ))
390
- serializes_q18 (outdir , post_date_right_open_range_params (week_posts , 0.3 * total_posts , 0.6 * total_posts ), post_lengths , language_codes )
391
- serializes_q10 (outdir , key_params (tag_posts , total_posts / 900 , total_posts / 600 ), post_date_right_open_range_params (week_posts , 0.3 * total_posts , 0.6 * total_posts ))
392
-
393
- serializes_q4 (outdir , key_params (tagclass_posts , total_posts / 20 , total_posts / 10 ), key_params (country_sample , total_posts / 120 , total_posts / 70 ))
394
- serializes_q5 (outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ))
395
- serializes_q6 (outdir , key_params (tag_posts , total_posts / 1300 , total_posts / 900 ))
396
- serializes_q7 (outdir , key_params (tag_posts , total_posts / 900 , total_posts / 600 ))
397
- serializes_q8 (outdir , key_params (tag_posts , total_posts / 600 , total_posts / 300 ))
398
- serializes_q9 (outdir , key_params (tagclass_posts , 6000 , 25000 ))
399
- serializes_q13 (outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ))
400
- serializes_q15 (outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ))
401
- serializes_q16 (outdir , persons , key_params (tagclass_posts , total_posts / 30 , total_posts / 10 ), key_params (country_sample , total_posts / 80 , total_posts / 20 ), path_bounds )
402
- serializes_q17 (outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ))
403
- serializes_q19 (outdir , key_params (tagclass_posts , total_posts / 60 , total_posts / 10 ))
404
- serializes_q21 (outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ))
405
- serializes_q22 (outdir , key_params (country_sample , total_posts / 120 , total_posts / 40 ))
406
- serializes_q23 (outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ))
407
- serializes_q24 (outdir , key_params (tagclass_posts , total_posts / 140 , total_posts / 5 )) serialize_q25 (outdir , persons , post_months )
383
+ serialize_q2 (outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ), post_day_ranges ) # TODO determine constants
384
+ serialize_q3 (outdir , post_months )
385
+ serialize_q14 (outdir , post_month_params (week_posts , post_lower_threshold * 2 , post_upper_threshold * 2 ))
386
+
387
+ serialize_q1 (outdir , post_date_right_open_range_params (week_posts , 0.3 * total_posts , 0.6 * total_posts ))
388
+ serialize_q12 (outdir , post_date_right_open_range_params (week_posts , 0.3 * total_posts , 0.6 * total_posts ))
389
+ serialize_q18 (outdir , post_date_right_open_range_params (week_posts , 0.3 * total_posts , 0.6 * total_posts ), post_lengths , language_codes )
390
+ serialize_q10 (outdir , key_params (tag_posts , total_posts / 900 , total_posts / 600 ), post_date_right_open_range_params (week_posts , 0.3 * total_posts , 0.6 * total_posts ))
391
+
392
+ serialize_q4 (outdir , key_params (tagclass_posts , total_posts / 20 , total_posts / 10 ), key_params (country_sample , total_posts / 120 , total_posts / 70 ))
393
+ serialize_q5 (outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ))
394
+ serialize_q6 (outdir , key_params (tag_posts , total_posts / 1300 , total_posts / 900 ))
395
+ serialize_q7 (outdir , key_params (tag_posts , total_posts / 900 , total_posts / 600 ))
396
+ serialize_q8 (outdir , key_params (tag_posts , total_posts / 600 , total_posts / 300 ))
397
+ serialize_q9 (outdir , key_params (tagclass_posts , 6000 , 25000 ))
398
+ serialize_q13 (outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ))
399
+ serialize_q15 (outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ))
400
+ serialize_q16 (outdir , persons , key_params (tagclass_posts , total_posts / 30 , total_posts / 10 ), key_params (country_sample , total_posts / 80 , total_posts / 20 ), path_bounds )
401
+ serialize_q17 (outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ))
402
+ serialize_q19 (outdir , key_params (tagclass_posts , total_posts / 60 , total_posts / 10 ))
403
+ serialize_q21 (outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ))
404
+ serialize_q22 (outdir , key_params (country_sample , total_posts / 120 , total_posts / 40 ))
405
+ serialize_q23 (outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ))
406
+ serialize_q24 (outdir , key_params (tagclass_posts , total_posts / 140 , total_posts / 5 ))
407
+ serialize_q25 (outdir , persons , post_months )
408
408
409
409
# TODO: Refine
410
- serializes_q20 (outdir , key_params (tagclass_posts , total_posts / 20 , total_posts / 2 ))
411
- serializes_q11 (outdir , key_params (country_sample , total_posts / 80 , total_posts / 20 ), bad_words )
410
+ serialize_q20 (outdir , key_params (tagclass_posts , total_posts / 20 , total_posts / 2 ))
411
+ serialize_q11 (outdir , key_params (country_sample , total_posts / 80 , total_posts / 20 ), bad_words )
412
412
413
413
if __name__ == "__main__" :
414
414
sys .exit (main ())
0 commit comments