@@ -30,8 +30,8 @@ def format_date(date):
30
30
# self.files[i].write(param+"\n")
31
31
32
32
class ParamsWriter :
33
- def __init__ (self , name , param_names ):
34
- self .file = codecs .open (sys . argv [ 2 ] + "/" + name + "_param.txt" , "w" ,encoding = "utf-8" )
33
+ def __init__ (self , outdir , name , param_names ):
34
+ self .file = codecs .open (outdir + "/" + name + "_param.txt" , "w" ,encoding = "utf-8" )
35
35
for i in range (0 ,len (param_names )):
36
36
if i > 0 :
37
37
self .file .write ("|" )
@@ -122,67 +122,67 @@ def key_params(sample, lower_bound, upper_bound):
122
122
results .append ([key , count ])
123
123
return results
124
124
125
- def serialize_q1 ( post_weeks ):
126
- writer = ParamsWriter ("q1" , ["date" ])
125
+ def serializes_q1 ( outdir , post_weeks ):
126
+ writer = ParamsWriter (outdir , "q1" , ["date" ])
127
127
for week , count in post_weeks :
128
128
writer .append ([str (week )], [count ])
129
129
130
- def serialize_q2 ( country_sets , post_day_ranges ):
131
- writer = ParamsWriter ("q2" , ["date1" ,"date2" ,"countries" ,"endDate" ,"messageThreshold" ])
130
+ def serializes_q2 ( outdir , country_sets , post_day_ranges ):
131
+ writer = ParamsWriter (outdir , "q2" , ["date1" ,"date2" ,"countries" ,"endDate" ,"messageThreshold" ])
132
132
random .seed (1988 + 2 )
133
133
for country_set , count_country in country_sets :
134
134
for day_range , count_post in post_day_ranges :
135
135
if random .randint (0 ,len (country_sets ) + len (post_day_ranges )) == 0 :
136
136
writer .append ([str (day_range [0 ]), str (day_range [1 ]), ";" .join (country_set ), str (format_date (END_DATE )),str (20 )], [count_post ,count_post ,count_country ,333 ])
137
137
138
- def serialize_q3 ( post_months ):
139
- writer = ParamsWriter ("q3" , ["range1Start" ,"range1End" ,"range2Start" ,"range2End" ])
138
+ def serializes_q3 ( outdir , post_months ):
139
+ writer = ParamsWriter (outdir , "q3" , ["range1Start" ,"range1End" ,"range2Start" ,"range2End" ])
140
140
for ix in range (0 ,len (post_months )):
141
141
week_range_a , count_a = post_months [ix ]
142
142
for week_range_b , count_b in post_months [ix + 1 :]:
143
143
writer .append ([str (week_range_a [0 ]),str (week_range_a [1 ]),str (week_range_b [0 ]),str (week_range_b [1 ])], [count_a ,count_b ])
144
144
145
- def serialize_q4 ( tagclasses , countries ):
146
- writer = ParamsWriter ("q4" , ["tagClass" ,"country" ])
145
+ def serializes_q4 ( outdir , tagclasses , countries ):
146
+ writer = ParamsWriter (outdir , "q4" , ["tagClass" ,"country" ])
147
147
for tag , count_a in tagclasses :
148
148
for country , count_b in countries :
149
149
writer .append ([tag ,country ], [count_a ,count_b ])
150
150
151
- def serialize_q5 ( countries ):
152
- writer = ParamsWriter ("q5" , ["country" ])
151
+ def serializes_q5 ( outdir , countries ):
152
+ writer = ParamsWriter (outdir , "q5" , ["country" ])
153
153
for country , count in countries :
154
154
writer .append ([country ], [count ])
155
155
156
156
157
- def serialize_q6 ( tags ):
158
- writer = ParamsWriter ("q6" , ["tag" ])
157
+ def serializes_q6 ( outdir , tags ):
158
+ writer = ParamsWriter (outdir , "q6" , ["tag" ])
159
159
for tag , count in tags :
160
160
writer .append ([tag ], [count ])
161
161
162
- def serialize_q7 ( tags ):
163
- writer = ParamsWriter ("q7" , ["tag" ])
162
+ def serializes_q7 ( outdir , tags ):
163
+ writer = ParamsWriter (outdir , "q7" , ["tag" ])
164
164
for tag , count in tags :
165
165
writer .append ([tag ], [count ])
166
166
167
- def serialize_q8 ( tags ):
168
- writer = ParamsWriter ("q8" , ["tag" ])
167
+ def serializes_q8 ( outdir , tags ):
168
+ writer = ParamsWriter (outdir , "q8" , ["tag" ])
169
169
for tag , count in tags :
170
170
writer .append ([tag ], [count ])
171
171
172
- def serialize_q9 ( tagclasses ):
173
- writer = ParamsWriter ("q9" , ["tagClass1" , "tagClass2" , "threshold" ])
172
+ def serializes_q9 ( outdir , tagclasses ):
173
+ writer = ParamsWriter (outdir , "q9" , ["tagClass1" , "tagClass2" , "threshold" ])
174
174
for ix in range (0 ,len (tagclasses )):
175
175
tag_class_a , count_a = tagclasses [ix ]
176
176
for tag_class_b , count_b in tagclasses [ix + 1 :]:
177
177
writer .append ([tag_class_a , tag_class_b , str (200 )], [count_a , count_b ])
178
178
179
- def serialize_q10 ( tags ):
180
- writer = ParamsWriter ("q10" , ["tag" ])
179
+ def serializes_q10 ( outdir , tags ):
180
+ writer = ParamsWriter (outdir , "q10" , ["tag" ])
181
181
for tag , count in tags :
182
182
writer .append ([tag ], [count ])
183
183
184
- def serialize_q11 ( countries , bad_words ):
185
- writer = ParamsWriter ("q11" , ["country" , "blacklist" ])
184
+ def serializes_q11 ( outdir , countries , bad_words ):
185
+ writer = ParamsWriter (outdir , "q11" , ["country" , "blacklist" ])
186
186
random .seed (1988 + 2 )
187
187
for country , count in countries :
188
188
num_words = random .randint (1 ,min (len (bad_words ),4 ));
@@ -200,75 +200,75 @@ def serialize_q11(countries, bad_words):
200
200
blacklist = bad_words [0 :num_words ]
201
201
writer .append ([country ,";" .join (blacklist )], [count ])
202
202
203
- def serialize_q12 ( post_weeks ):
204
- writer = ParamsWriter ("q12" , ["creationDate" , "likeCount" ])
203
+ def serializes_q12 ( outdir , post_weeks ):
204
+ writer = ParamsWriter (outdir , "q12" , ["creationDate" , "likeCount" ])
205
205
for week , count in post_weeks :
206
206
writer .append ([str (week ),str (400 )], [count ])
207
207
208
- def serialize_q13 ( countries ):
209
- writer = ParamsWriter ("q13" , ["country" ])
208
+ def serializes_q13 ( outdir , countries ):
209
+ writer = ParamsWriter (outdir , "q13" , ["country" ])
210
210
for country , count in countries :
211
211
writer .append ([country ], [count ])
212
212
213
- def serialize_q14 ( creationdates ):
214
- writer = ParamsWriter ("q14" , ["begin" ,"end" ])
213
+ def serializes_q14 ( outdir , creationdates ):
214
+ writer = ParamsWriter (outdir , "q14" , ["begin" ,"end" ])
215
215
for creation , count in creationdates :
216
216
writer .append ([str (creation [0 ]),str (creation [1 ])], [count ])
217
217
218
- def serialize_q15 ( countries ):
219
- writer = ParamsWriter ("q15" , ["country" ])
218
+ def serializes_q15 ( outdir , countries ):
219
+ writer = ParamsWriter (outdir , "q15" , ["country" ])
220
220
for country , count in countries :
221
221
writer .append ([country ], [count ])
222
222
223
- def serialize_q16 ( persons , tagclasses , countries ):
224
- writer = ParamsWriter ("q16" , ["person" ,"tag" ,"country" ])
223
+ def serializes_q16 ( outdir , persons , tagclasses , countries ):
224
+ writer = ParamsWriter (outdir , "q16" , ["person" ,"tag" ,"country" ])
225
225
random .seed (1988 + 2 )
226
226
for tag , count_a in tagclasses :
227
227
for country , count_b in countries :
228
228
writer .append ([str (persons [random .randint (0 ,len (persons ))]), tag , country ], [0 , count_a , count_b ])
229
229
230
- def serialize_q17 ( countries ):
231
- writer = ParamsWriter ("q17" , ["country" ])
230
+ def serializes_q17 ( outdir , countries ):
231
+ writer = ParamsWriter (outdir , "q17" , ["country" ])
232
232
for country , count in countries :
233
233
writer .append ([country ], [count ])
234
234
235
- def serialize_q18 ( post_weeks ):
236
- writer = ParamsWriter ("q18" , ["creationDate" ])
235
+ def serializes_q18 ( outdir , post_weeks ):
236
+ writer = ParamsWriter (outdir , "q18" , ["creationDate" ])
237
237
for week , count in post_weeks :
238
238
writer .append ([str (week )], [count ])
239
239
240
- def serialize_q19 ( tagclasses ):
240
+ def serializes_q19 ( outdir , tagclasses ):
241
241
PERS_DATE = datetime .strptime ("1989-1-1" ,"%Y-%m-%d" )
242
- writer = ParamsWriter ("q19" , ["date" ,"tagClass1" ,"tagClass2" ])
242
+ writer = ParamsWriter (outdir , "q19" , ["date" ,"tagClass1" ,"tagClass2" ])
243
243
for ix in range (0 ,len (tagclasses )):
244
244
tag_class_a , count_a = tagclasses [ix ]
245
245
for tag_class_b , count_b in tagclasses [ix + 1 :]:
246
246
writer .append ([str (format_date (PERS_DATE )),tag_class_a , tag_class_b ], [count_a , count_b ])
247
247
248
- def serialize_q20 ( tagclasses ):
249
- writer = ParamsWriter ("q20" , ["tagclass" ])
248
+ def serializes_q20 ( outdir , tagclasses ):
249
+ writer = ParamsWriter (outdir , "q20" , ["tagclass" ])
250
250
for tagclass , count in tagclasses :
251
251
writer .append ([tagclass ], [count ])
252
252
253
- def serialize_q21 ( countries ):
254
- writer = ParamsWriter ("q21" , ["country" ,"endDate" ])
253
+ def serializes_q21 ( outdir , countries ):
254
+ writer = ParamsWriter (outdir , "q21" , ["country" ,"endDate" ])
255
255
for country , count in countries :
256
256
writer .append ([country ,str (format_date (END_DATE ))], [count ])
257
257
258
- def serialize_q22 ( countries ):
259
- writer = ParamsWriter ("q22" , ["country1" ,"country2" ])
258
+ def serializes_q22 ( outdir , countries ):
259
+ writer = ParamsWriter (outdir , "q22" , ["country1" ,"country2" ])
260
260
for ix in range (0 ,len (countries )):
261
261
country_a , count_a = countries [ix ]
262
262
for country_b , count_b in countries [ix + 1 :]:
263
263
writer .append ([country_a , country_b ], [count_a , count_b ])
264
264
265
- def serialize_q23 ( countries ):
266
- writer = ParamsWriter ("q23" , ["country" ])
265
+ def serializes_q23 ( outdir , countries ):
266
+ writer = ParamsWriter (outdir , "q23" , ["country" ])
267
267
for country , count in countries :
268
268
writer .append ([country ], [count ])
269
269
270
- def serialize_q24 ( tagclasses ):
271
- writer = ParamsWriter ("q24" , ["tagClass" ])
270
+ def serializes_q24 ( outdir , tagclasses ):
271
+ writer = ParamsWriter (outdir , "q24" , ["tagClass" ])
272
272
for tagclass , count in tagclasses :
273
273
writer .append ([tagclass ], [count ])
274
274
@@ -297,14 +297,14 @@ def main(argv=None):
297
297
argv = sys .argv
298
298
299
299
if len (argv ) < 3 :
300
- print "arguments: <input dir> <output>"
300
+ print "arguments: <input dir> <output dir >"
301
301
return 1
302
302
303
303
indir = argv [1 ]+ "/"
304
+ outdir = argv [2 ]+ "/"
304
305
activityFactorFiles = []
305
306
personFactorFiles = []
306
307
friendsFiles = []
307
- outdir = argv [2 ]+ "/"
308
308
309
309
for file in os .listdir (indir ):
310
310
if file .endswith ("activityFactors.txt" ):
@@ -363,34 +363,34 @@ def main(argv=None):
363
363
post_upper_threshold = (total_posts / (non_empty_weeks / 4 ))* 1.2
364
364
post_months = post_month_params (week_posts , post_lower_threshold , post_upper_threshold )
365
365
366
- serialize_q2 ( country_sets , post_day_ranges )
367
- serialize_q3 ( post_months )
368
- serialize_q14 ( post_month_params (week_posts , post_lower_threshold * 2 , post_upper_threshold * 2 ))
369
-
370
- serialize_q1 ( post_date_right_open_range_params (week_posts , 0.3 * total_posts , 0.6 * total_posts ))
371
- serialize_q12 ( post_date_right_open_range_params (week_posts , 0.3 * total_posts , 0.6 * total_posts ))
372
- serialize_q18 ( post_date_right_open_range_params (week_posts , 0.3 * total_posts , 0.6 * total_posts ))
373
-
374
- serialize_q4 ( key_params (tagclass_posts , total_posts / 20 , total_posts / 10 ), key_params (country_sample , total_posts / 120 , total_posts / 70 ))
375
- serialize_q5 ( key_params (country_sample , total_posts / 200 , total_posts / 100 ))
376
- serialize_q6 ( key_params (tag_posts , total_posts / 1300 , total_posts / 900 ))
377
- serialize_q7 ( key_params (tag_posts , total_posts / 900 , total_posts / 600 ))
378
- serialize_q8 ( key_params (tag_posts , total_posts / 600 , total_posts / 300 ))
379
- serialize_q9 ( key_params (tagclass_posts , 6000 , 25000 ))
380
- serialize_q10 ( key_params (tag_posts , total_posts / 900 , total_posts / 600 ))
381
- serialize_q13 ( key_params (country_sample , total_posts / 200 , total_posts / 100 ))
382
- serialize_q15 ( key_params (country_sample , total_posts / 200 , total_posts / 100 ))
383
- serialize_q16 ( persons , key_params (tagclass_posts , total_posts / 30 , total_posts / 10 ), key_params (country_sample , total_posts / 80 , total_posts / 20 ))
384
- serialize_q17 ( key_params (country_sample , total_posts / 200 , total_posts / 100 ))
385
- serialize_q19 ( key_params (tagclass_posts , total_posts / 60 , total_posts / 10 ))
386
- serialize_q21 ( key_params (country_sample , total_posts / 200 , total_posts / 100 ))
387
- serialize_q22 ( key_params (country_sample , total_posts / 120 , total_posts / 40 ))
388
- serialize_q23 ( key_params (country_sample , total_posts / 200 , total_posts / 100 ))
389
- serialize_q24 ( key_params (tagclass_posts , total_posts / 140 , total_posts / 5 ))
366
+ serializes_q2 ( outdir , country_sets , post_day_ranges )
367
+ serializes_q3 ( outdir , post_months )
368
+ serializes_q14 ( outdir , post_month_params (week_posts , post_lower_threshold * 2 , post_upper_threshold * 2 ))
369
+
370
+ serializes_q1 ( outdir , post_date_right_open_range_params (week_posts , 0.3 * total_posts , 0.6 * total_posts ))
371
+ serializes_q12 ( outdir , post_date_right_open_range_params (week_posts , 0.3 * total_posts , 0.6 * total_posts ))
372
+ serializes_q18 ( outdir , post_date_right_open_range_params (week_posts , 0.3 * total_posts , 0.6 * total_posts ))
373
+
374
+ serializes_q4 ( outdir , key_params (tagclass_posts , total_posts / 20 , total_posts / 10 ), key_params (country_sample , total_posts / 120 , total_posts / 70 ))
375
+ serializes_q5 ( outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ))
376
+ serializes_q6 ( outdir , key_params (tag_posts , total_posts / 1300 , total_posts / 900 ))
377
+ serializes_q7 ( outdir , key_params (tag_posts , total_posts / 900 , total_posts / 600 ))
378
+ serializes_q8 ( outdir , key_params (tag_posts , total_posts / 600 , total_posts / 300 ))
379
+ serializes_q9 ( outdir , key_params (tagclass_posts , 6000 , 25000 ))
380
+ serializes_q10 ( outdir , key_params (tag_posts , total_posts / 900 , total_posts / 600 ))
381
+ serializes_q13 ( outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ))
382
+ serializes_q15 ( outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ))
383
+ serializes_q16 ( outdir , persons , key_params (tagclass_posts , total_posts / 30 , total_posts / 10 ), key_params (country_sample , total_posts / 80 , total_posts / 20 ))
384
+ serializes_q17 ( outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ))
385
+ serializes_q19 ( outdir , key_params (tagclass_posts , total_posts / 60 , total_posts / 10 ))
386
+ serializes_q21 ( outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ))
387
+ serializes_q22 ( outdir , key_params (country_sample , total_posts / 120 , total_posts / 40 ))
388
+ serializes_q23 ( outdir , key_params (country_sample , total_posts / 200 , total_posts / 100 ))
389
+ serializes_q24 ( outdir , key_params (tagclass_posts , total_posts / 140 , total_posts / 5 ))
390
390
391
391
# TODO: Refine
392
- serialize_q20 ( key_params (tagclass_posts , total_posts / 20 , total_posts / 2 ))
393
- serialize_q11 ( key_params (country_sample , total_posts / 80 , total_posts / 20 ), bad_words )
392
+ serializes_q20 ( outdir , key_params (tagclass_posts , total_posts / 20 , total_posts / 2 ))
393
+ serializes_q11 ( outdir , key_params (country_sample , total_posts / 80 , total_posts / 20 ), bad_words )
394
394
395
395
if __name__ == "__main__" :
396
396
sys .exit (main ())
0 commit comments