Skip to content

Commit 1eafbce

Browse files
committed
Add implementations for 25, update for 16 and 18 (@mkaufmann)
1 parent bbfbe67 commit 1eafbce

File tree

1 file changed

+50
-15
lines changed

1 file changed

+50
-15
lines changed

paramgenerator/generateparamsbi.py

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,29 @@ def post_month_params(sample, lower_bound, upper_bound):
8181
results.append([[start_day, end_day], count_sum])
8282
return results
8383

84+
def enumerate_path_bounds(minLength,maxLength,minDifference):
85+
results = []
86+
for i in range(minLength, maxLength):
87+
for j in range(i+minDifference,maxLength):
88+
results.append([i,j])
89+
return results
90+
91+
def prob_language_codes():
92+
results = []
93+
results.append(["ar"])
94+
for i in range(0, 2):
95+
results.append(["tk"])
96+
for i in range(0, 8):
97+
results.append(["uz"])
98+
for i in range(0, 2):
99+
results.append(["uz","tk"])
100+
return results
101+
102+
def prob_post_lengths():
103+
results = [20,40,113,97,240]
104+
return results
105+
106+
84107
# def post_three_month_params(sample, lower_bound, upper_bound):
85108
# results = []
86109
# for ix in range(0, len(sample)/12):
@@ -197,24 +220,25 @@ def serializes_q15(outdir, countries):
197220
for country, count in countries:
198221
writer.append([country])
199222

200-
def serializes_q16(outdir, persons, tagclasses, countries):
223+
def serializes_q16(outdir, persons, tagclasses, countries, path_bounds):
201224
writer = ParamsWriter(outdir, "q16", ["person", "tag", "country", "minPathDistance", "maxPathDistance"])
202225
random.seed(1988+2)
203226
for tag, count_a in tagclasses:
204227
for country, count_b in countries:
205-
writer.append([str(persons[random.randint(0,len(persons))]), tag, country])
206-
# TODO minPathDistance and maxPathDistance are missing
228+
for minDist, maxDist in path_bounds:
229+
writer.append([str(persons[random.randint(0, len(persons))]), tag, country, str(minDist), str(maxDist)])
207230

208231
def serializes_q17(outdir, countries):
209232
writer = ParamsWriter(outdir, "q17", ["country"])
210233
for country, count in countries:
211234
writer.append([country])
212235

213-
def serializes_q18(outdir, post_weeks):
236+
def serializes_q18(outdir, post_weeks, lengths, languages):
214237
writer = ParamsWriter(outdir, "q18", ["date", "lengthThreshold", "languages"])
215238
for week, count in post_weeks:
216-
writer.append([str(week)])
217-
# TODO lengthThreshold and languages are missing
239+
for length in lengths:
240+
for language_set in languages:
241+
writer.append([str(week), str(length), ";".join(language_set)])
218242

219243
def serializes_q19(outdir, tagclasses):
220244
PERS_DATE=datetime.strptime("1989-1-1", "%Y-%m-%d")
@@ -251,9 +275,19 @@ def serializes_q24(outdir, tagclasses):
251275
for tagclass, count in tagclasses:
252276
writer.append([tagclass])
253277

254-
def serializes_q25(outdir):
255-
writer = ParamsWriter(outdir, "q25", ["person1Id", "person2Id", "startDate", "endDate"])
256-
# TODO
278+
def serialize_q25(outdir, persons, post_month_ranges):
279+
writer = ParamsWriter("q25", ["person1Id", "person2Id", "startDate", "endDate"])
280+
for day_range, count_post in post_month_ranges:
281+
count = min(len(persons), 10)
282+
for _ in range(0, count):
283+
person1Id = persons[random.randint(0, len(persons) - 1)]
284+
while True:
285+
person2Id = persons[random.randint(0, len(persons) - 1)]
286+
if person2Id != person1Id:
287+
writer.append([str(person1Id), str(person2Id), str(day_range[0]), str(day_range[1])],
288+
[0, 0, count_post, count_post])
289+
break
290+
257291

258292
def add_months(sourcedate,months):
259293
month = sourcedate.month - 1 + months
@@ -343,13 +377,17 @@ def main(argv=None):
343377
post_upper_threshold = (total_posts/(non_empty_weeks/4))*1.2
344378
post_months = post_month_params(week_posts, post_lower_threshold, post_upper_threshold)
345379

380+
path_bounds = enumerate_path_bounds(3, 9, 2)
381+
language_codes = prob_language_codes()
382+
post_lengths = prob_post_lengths()
383+
346384
serializes_q2 (outdir, key_params(country_sample, total_posts/200, total_posts/100), post_day_ranges) # TODO determine constants
347385
serializes_q3 (outdir, post_months)
348386
serializes_q14(outdir, post_month_params(week_posts, post_lower_threshold*2, post_upper_threshold*2))
349387

350388
serializes_q1 (outdir, post_date_right_open_range_params(week_posts, 0.3*total_posts, 0.6*total_posts))
351389
serializes_q12(outdir, post_date_right_open_range_params(week_posts, 0.3*total_posts, 0.6*total_posts))
352-
serializes_q18(outdir, post_date_right_open_range_params(week_posts, 0.3*total_posts, 0.6*total_posts))
390+
serializes_q18(outdir, post_date_right_open_range_params(week_posts, 0.3*total_posts, 0.6*total_posts), post_lengths, language_codes)
353391
serializes_q10(outdir, key_params(tag_posts, total_posts/900, total_posts/600), post_date_right_open_range_params(week_posts, 0.3*total_posts, 0.6*total_posts))
354392

355393
serializes_q4 (outdir, key_params(tagclass_posts, total_posts/20, total_posts/10), key_params(country_sample, total_posts/120, total_posts/70))
@@ -360,20 +398,17 @@ def main(argv=None):
360398
serializes_q9 (outdir, key_params(tagclass_posts, 6000, 25000))
361399
serializes_q13(outdir, key_params(country_sample, total_posts/200, total_posts/100))
362400
serializes_q15(outdir, key_params(country_sample, total_posts/200, total_posts/100))
363-
serializes_q16(outdir, persons, key_params(tagclass_posts, total_posts/30, total_posts/10), key_params(country_sample, total_posts/80, total_posts/20))
401+
serializes_q16(outdir, persons, key_params(tagclass_posts, total_posts/30, total_posts/10), key_params(country_sample, total_posts/80, total_posts/20), path_bounds)
364402
serializes_q17(outdir, key_params(country_sample, total_posts/200, total_posts/100))
365403
serializes_q19(outdir, key_params(tagclass_posts, total_posts/60, total_posts/10))
366404
serializes_q21(outdir, key_params(country_sample, total_posts/200, total_posts/100))
367405
serializes_q22(outdir, key_params(country_sample, total_posts/120, total_posts/40))
368406
serializes_q23(outdir, key_params(country_sample, total_posts/200, total_posts/100))
369-
serializes_q24(outdir, key_params(tagclass_posts, total_posts/140, total_posts/5))
407+
serializes_q24(outdir, key_params(tagclass_posts, total_posts/140, total_posts/5)) serialize_q25(outdir, persons, post_months)
370408

371409
# TODO: Refine
372410
serializes_q20(outdir, key_params(tagclass_posts, total_posts/20, total_posts/2))
373411
serializes_q11(outdir, key_params(country_sample, total_posts/80, total_posts/20), bad_words)
374412

375-
# TODO: implement
376-
#serializes_q25(outdir, ...)
377-
378413
if __name__ == "__main__":
379414
sys.exit(main())

0 commit comments

Comments
 (0)