Skip to content

Commit c5f0c08

Browse files
authored
Merge pull request #72 from ldbc/revamp-interactive-paramgen
Update parameter names in Interactive parameter generator
2 parents 10e4e0e + 2baa4a4 commit c5f0c08

File tree

2 files changed

+26
-67
lines changed

2 files changed

+26
-67
lines changed

paramgenerator/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,6 @@ For standalone testing, provide the input and output directories as parameters.
77
For example:
88

99
```bash
10+
./generateparams.py ../hadoop ../substitution_parameters
1011
./generateparamsbi.py ../hadoop ../substitution_parameters
1112
```

paramgenerator/generateparams.py

Lines changed: 25 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,17 @@
11
#!/usr/bin/env python2
22

3-
import sys
43
import discoverparams
54
import readfactors
65
import random
7-
import json
86
import os
97
import codecs
108
from datetime import date
119
from timeparameters import *
1210
from calendar import timegm
1311

14-
PERSON_PREFIX = "http://www.ldbc.eu/ldbc_socialnet/1.0/data/pers"
15-
COUNTRY_PREFIX = "http://dbpedia.org/resource/"
1612
SEED = 1
1713

18-
def findNameParameters(names, amount = 100):
14+
def findNameParameters(names):
1915
srtd = sorted(names,key=lambda x: -x[1])
2016
res = []
2117
hist = {}
@@ -72,54 +68,42 @@ def writeCSV(self):
7268

7369
def handlePersonParam(person):
7470
return str(person)
75-
#return {"PersonID": person, "PersonURI":(PERSON_PREFIX+str("%020d"%person))}
7671

7772
def handleTimeParam(timeParam):
78-
#print timeParam.year
79-
#print timeParam.year
80-
res = str(timegm(date(year=int(timeParam.year),
73+
res = str(timegm(date(year=int(timeParam.year),
8174
month=int(timeParam.month), day=int(timeParam.day)).timetuple())*1000)
8275
return res
8376

8477
def handleTimeDurationParam(timeParam):
85-
#print timeParam.year
86-
res = str(timegm(date(year=int(timeParam.year),
78+
res = str(timegm(date(year=int(timeParam.year),
8779
month=int(timeParam.month), day=int(timeParam.day)).timetuple())*1000)
8880
res += "|"+str(timeParam.duration)
8981
return res
9082

9183

9284
def handlePairCountryParam((Country1, Country2)):
9385
return Country1+"|"+Country2
94-
#return {"Country1":Country1, "Country2":Country2, "Country1URI":(COUNTRY_PREFIX + Country1), "Country2URI":(COUNTRY_PREFIX + Country2)}
9586

9687
def handleCountryParam(Country):
9788
return Country
98-
#return {"Country":Country, "CountryURI": (COUNTRY_PREFIX + Country)}
9989

10090
def handleTagParam(tag):
10191
return tag
102-
#return {"Tag": tag}
10392

10493
def handleTagTypeParam(tagType):
10594
return tagType
106-
#return {"TagType": tagType}
10795

108-
def handleHSParam((HS0, HS1)):
109-
return str(HS0)
110-
#return {"HS0":HS0, "HS1":HS1}
96+
def handleMonthParam(month):
97+
return str(month)
11198

11299
def handleFirstNameParam(firstName):
113100
return firstName
114-
#return {"Name":firstName}
115101

116102
def handlePairPersonParam((person1, person2)):
117103
return str(person1)+"|"+str(person2)
118-
#return {"Person1ID":person1, "Person2ID":person2, "Person2URI":(PERSON_PREFIX+str(person2)), "Person1URI":(PERSON_PREFIX+str(person1))}
119104

120105
def handleWorkYearParam(timeParam):
121106
return str(timeParam)
122-
#return {"Date0":timeParam}
123107

124108
def main(argv=None):
125109
if argv is None:
@@ -149,7 +133,6 @@ def main(argv=None):
149133
(personFactors, countryFactors, tagFactors, tagClassFactors, nameFactors, givenNames, ts, postHisto) = readfactors.load(personFactorFiles, activityFactorFiles, friendsFiles)
150134

151135
# find person parameters
152-
print "find parameter bindings for Persons"
153136
selectedPersonParams = {}
154137
for i in range(1, 15):
155138
factors = readfactors.getFactorsForQuery(i, personFactors)
@@ -168,7 +151,6 @@ def main(argv=None):
168151
secondPerson[i].append(selectedPersonParams[i][j])
169152

170153
# find country parameters for Query 3 and 11
171-
print "find parameter bindings for Countries"
172154
selectedCountryParams = {}
173155
for i in [3, 11]:
174156
factors = readfactors.getCountryFactorsForQuery(i, countryFactors)
@@ -189,18 +171,6 @@ def main(argv=None):
189171
break
190172
secondCountry.append(selectedCountryParams[3][i])
191173

192-
#find tag parameters for Query 6
193-
#print "find parameter bindings for Tags"
194-
# old tag selection
195-
#selectedTagParams = {}
196-
#for i in [6]:
197-
# selectedTagParams[i] = discoverparams.generate(tagFactors, portion=0.1)
198-
# # make sure there are as many tag paramters as person parameters
199-
# oldlen = len(selectedTagParams[i])
200-
# newlen = len(selectedPersonParams[i])
201-
# selectedTagParams[i].extend([selectedTagParams[i][random.randint(0, oldlen-1)] for j in range(newlen-oldlen)])
202-
203-
#print "find parameter bindings for Tags"
204174
(leftTagFactors, rightTagFactors) = discoverparams.divideFactors(tagFactors, 0.7)
205175
leftSize = len(leftTagFactors)
206176
rightSize = len(rightTagFactors)
@@ -227,38 +197,24 @@ def main(argv=None):
227197
selectedPersons = selectedPersonParams[2] + selectedPersonParams[3]+selectedPersonParams[4]
228198
selectedPersons += selectedPersonParams[5] + selectedPersonParams[9]
229199

230-
selectedTimeParams = {}
231200
timeSelectionInput = {
232201
2: (selectedPersonParams[2], "f", getTimeParamsBeforeMedian),
233202
3: (selectedPersonParams[3], "ff", getTimeParamsWithMedian),
234203
4: (selectedPersonParams[4], "f", getTimeParamsWithMedian),
235204
5: (selectedPersonParams[5], "ffg", getTimeParamsAfterMedian),
236205
9: (selectedPersonParams[9], "ff", getTimeParamsBeforeMedian)
237-
#11: (selectedPersonParams[11], "w", getTimeParamsBeforeMedian) # friends of friends work
238206
}
239207

240-
print "find parameter bindings for Timestamps"
241208
selectedTimeParams = findTimeParams(timeSelectionInput, personFactorFiles, activityFactorFiles, friendsFiles, ts[1])
242209
# Query 11 takes WorksFrom timestamp
243210
selectedTimeParams[11] = [random.randint(ts[2], ts[3]) for j in range(len(selectedPersonParams[11]))]
244211

245-
# Query 10 additionally needs the HS parameter
246-
HS = []
212+
# Query 10 additionally needs the month parameter
213+
months = []
247214
for person in selectedPersonParams[10]:
248-
HS0 = random.randint(1, 12)
249-
if HS0 == 12:
250-
HS1 = 1
251-
else:
252-
HS1 = HS0 + 1
253-
HS.append((HS0, HS1))
254-
255-
# Query 1 takes first name as a parameter
256-
#nameParams = findNameParameters(nameFactors)# discoverparams.generate(nameFactors)
257-
## if there are fewer first names than person parameters, repeat some of the names
258-
#if len(nameParams) < len(selectedPersonParams[2]):
259-
# oldlen = len(nameParams)
260-
# newlen = len(selectedPersonParams[2])
261-
# nameParams.extend([nameParams[random.randint(0, oldlen-1)] for j in range(newlen-oldlen)])
215+
month = random.randint(1, 12)
216+
months.append(month)
217+
262218
nameParams = []
263219
for person in selectedPersonParams[1]:
264220
n = givenNames.getValue(person)
@@ -271,26 +227,28 @@ def main(argv=None):
271227
csvWriter = CSVSerializer()
272228
csvWriter.setOutputFile(outdir+"interactive_%d_param.txt"%(i))
273229
if i != 13 and i != 14: # these three queries take two Persons as parameters
274-
csvWriter.registerHandler(handlePersonParam, selectedPersonParams[i], "Person")
230+
csvWriter.registerHandler(handlePersonParam, selectedPersonParams[i], "personId")
275231
csvWriters[i] = csvWriter
276232

277233
# add output for Time parameter
278234
for i in timeSelectionInput:
279235
if i==3 or i==4:
280-
csvWriters[i].registerHandler(handleTimeDurationParam, selectedTimeParams[i], "Date0|Duration")
281-
else:
282-
csvWriters[i].registerHandler(handleTimeParam, selectedTimeParams[i], "Date0")
236+
csvWriters[i].registerHandler(handleTimeDurationParam, selectedTimeParams[i], "startDate|durationDays")
237+
elif i==2 or i==9:
238+
csvWriters[i].registerHandler(handleTimeParam, selectedTimeParams[i], "maxDate")
239+
elif i==5:
240+
csvWriters[i].registerHandler(handleTimeParam, selectedTimeParams[i], "minDate")
283241

284242
# other, query-specific parameters
285-
csvWriters[1].registerHandler(handleFirstNameParam, nameParams, "Name")
286-
csvWriters[3].registerHandler(handlePairCountryParam, zip(selectedCountryParams[3],secondCountry),"Country1|Country2")
287-
csvWriters[6].registerHandler(handleTagParam, selectedTagParams[6],"Tag")
288-
csvWriters[10].registerHandler(handleHSParam, HS, "HS0")
289-
csvWriters[11].registerHandler(handleCountryParam, selectedCountryParams[11],"Country")
290-
csvWriters[11].registerHandler(handleWorkYearParam, selectedTimeParams[11],"Year")
291-
csvWriters[12].registerHandler(handleTagTypeParam, selectedTagTypeParams[12],"TagType")
292-
csvWriters[13].registerHandler(handlePairPersonParam, zip(selectedPersonParams[13], secondPerson[13]),"Person1|Person2")
293-
csvWriters[14].registerHandler(handlePairPersonParam, zip(selectedPersonParams[14], secondPerson[14]),"Person1|Person2")
243+
csvWriters[1].registerHandler(handleFirstNameParam, nameParams, "firstName")
244+
csvWriters[3].registerHandler(handlePairCountryParam, zip(selectedCountryParams[3],secondCountry), "countryXName|countryYName")
245+
csvWriters[6].registerHandler(handleTagParam, selectedTagParams[6], "tagName")
246+
csvWriters[10].registerHandler(handleMonthParam, months, "month")
247+
csvWriters[11].registerHandler(handleCountryParam, selectedCountryParams[11], "countryName")
248+
csvWriters[11].registerHandler(handleWorkYearParam, selectedTimeParams[11], "workFromYear")
249+
csvWriters[12].registerHandler(handleTagTypeParam, selectedTagTypeParams[12], "tagClassName")
250+
csvWriters[13].registerHandler(handlePairPersonParam, zip(selectedPersonParams[13], secondPerson[13]), "person1Id|person2Id")
251+
csvWriters[14].registerHandler(handlePairPersonParam, zip(selectedPersonParams[14], secondPerson[14]), "person1Id|person2Id")
294252

295253

296254
for j in csvWriters:

0 commit comments

Comments
 (0)