1
1
#!/usr/bin/env python2
2
2
3
- import sys
4
3
import discoverparams
5
4
import readfactors
6
5
import random
7
- import json
8
6
import os
9
7
import codecs
10
8
from datetime import date
11
9
from timeparameters import *
12
10
from calendar import timegm
13
11
14
- PERSON_PREFIX = "http://www.ldbc.eu/ldbc_socialnet/1.0/data/pers"
15
- COUNTRY_PREFIX = "http://dbpedia.org/resource/"
16
12
SEED = 1
17
13
18
- def findNameParameters (names , amount = 100 ):
14
+ def findNameParameters (names ):
19
15
srtd = sorted (names ,key = lambda x : - x [1 ])
20
16
res = []
21
17
hist = {}
@@ -72,54 +68,42 @@ def writeCSV(self):
72
68
73
69
def handlePersonParam (person ):
74
70
return str (person )
75
- #return {"PersonID": person, "PersonURI":(PERSON_PREFIX+str("%020d"%person))}
76
71
77
72
def handleTimeParam (timeParam ):
78
- #print timeParam.year
79
- #print timeParam.year
80
- res = str (timegm (date (year = int (timeParam .year ),
73
+ res = str (timegm (date (year = int (timeParam .year ),
81
74
month = int (timeParam .month ), day = int (timeParam .day )).timetuple ())* 1000 )
82
75
return res
83
76
84
77
def handleTimeDurationParam (timeParam ):
85
- #print timeParam.year
86
- res = str (timegm (date (year = int (timeParam .year ),
78
+ res = str (timegm (date (year = int (timeParam .year ),
87
79
month = int (timeParam .month ), day = int (timeParam .day )).timetuple ())* 1000 )
88
80
res += "|" + str (timeParam .duration )
89
81
return res
90
82
91
83
92
84
def handlePairCountryParam ((Country1 , Country2 )):
93
85
return Country1 + "|" + Country2
94
- #return {"Country1":Country1, "Country2":Country2, "Country1URI":(COUNTRY_PREFIX + Country1), "Country2URI":(COUNTRY_PREFIX + Country2)}
95
86
96
87
def handleCountryParam (Country ):
97
88
return Country
98
- #return {"Country":Country, "CountryURI": (COUNTRY_PREFIX + Country)}
99
89
100
90
def handleTagParam (tag ):
101
91
return tag
102
- #return {"Tag": tag}
103
92
104
93
def handleTagTypeParam (tagType ):
105
94
return tagType
106
- #return {"TagType": tagType}
107
95
108
- def handleHSParam ((HS0 , HS1 )):
109
- return str (HS0 )
110
- #return {"HS0":HS0, "HS1":HS1}
96
+ def handleMonthParam (month ):
97
+ return str (month )
111
98
112
99
def handleFirstNameParam (firstName ):
113
100
return firstName
114
- #return {"Name":firstName}
115
101
116
102
def handlePairPersonParam ((person1 , person2 )):
117
103
return str (person1 )+ "|" + str (person2 )
118
- #return {"Person1ID":person1, "Person2ID":person2, "Person2URI":(PERSON_PREFIX+str(person2)), "Person1URI":(PERSON_PREFIX+str(person1))}
119
104
120
105
def handleWorkYearParam (timeParam ):
121
106
return str (timeParam )
122
- #return {"Date0":timeParam}
123
107
124
108
def main (argv = None ):
125
109
if argv is None :
@@ -149,7 +133,6 @@ def main(argv=None):
149
133
(personFactors , countryFactors , tagFactors , tagClassFactors , nameFactors , givenNames , ts , postHisto ) = readfactors .load (personFactorFiles , activityFactorFiles , friendsFiles )
150
134
151
135
# find person parameters
152
- print "find parameter bindings for Persons"
153
136
selectedPersonParams = {}
154
137
for i in range (1 , 15 ):
155
138
factors = readfactors .getFactorsForQuery (i , personFactors )
@@ -168,7 +151,6 @@ def main(argv=None):
168
151
secondPerson [i ].append (selectedPersonParams [i ][j ])
169
152
170
153
# find country parameters for Query 3 and 11
171
- print "find parameter bindings for Countries"
172
154
selectedCountryParams = {}
173
155
for i in [3 , 11 ]:
174
156
factors = readfactors .getCountryFactorsForQuery (i , countryFactors )
@@ -189,18 +171,6 @@ def main(argv=None):
189
171
break
190
172
secondCountry .append (selectedCountryParams [3 ][i ])
191
173
192
- #find tag parameters for Query 6
193
- #print "find parameter bindings for Tags"
194
- # old tag selection
195
- #selectedTagParams = {}
196
- #for i in [6]:
197
- # selectedTagParams[i] = discoverparams.generate(tagFactors, portion=0.1)
198
- # # make sure there are as many tag paramters as person parameters
199
- # oldlen = len(selectedTagParams[i])
200
- # newlen = len(selectedPersonParams[i])
201
- # selectedTagParams[i].extend([selectedTagParams[i][random.randint(0, oldlen-1)] for j in range(newlen-oldlen)])
202
-
203
- #print "find parameter bindings for Tags"
204
174
(leftTagFactors , rightTagFactors ) = discoverparams .divideFactors (tagFactors , 0.7 )
205
175
leftSize = len (leftTagFactors )
206
176
rightSize = len (rightTagFactors )
@@ -227,38 +197,24 @@ def main(argv=None):
227
197
selectedPersons = selectedPersonParams [2 ] + selectedPersonParams [3 ]+ selectedPersonParams [4 ]
228
198
selectedPersons += selectedPersonParams [5 ] + selectedPersonParams [9 ]
229
199
230
- selectedTimeParams = {}
231
200
timeSelectionInput = {
232
201
2 : (selectedPersonParams [2 ], "f" , getTimeParamsBeforeMedian ),
233
202
3 : (selectedPersonParams [3 ], "ff" , getTimeParamsWithMedian ),
234
203
4 : (selectedPersonParams [4 ], "f" , getTimeParamsWithMedian ),
235
204
5 : (selectedPersonParams [5 ], "ffg" , getTimeParamsAfterMedian ),
236
205
9 : (selectedPersonParams [9 ], "ff" , getTimeParamsBeforeMedian )
237
- #11: (selectedPersonParams[11], "w", getTimeParamsBeforeMedian) # friends of friends work
238
206
}
239
207
240
- print "find parameter bindings for Timestamps"
241
208
selectedTimeParams = findTimeParams (timeSelectionInput , personFactorFiles , activityFactorFiles , friendsFiles , ts [1 ])
242
209
# Query 11 takes WorksFrom timestamp
243
210
selectedTimeParams [11 ] = [random .randint (ts [2 ], ts [3 ]) for j in range (len (selectedPersonParams [11 ]))]
244
211
245
- # Query 10 additionally needs the HS parameter
246
- HS = []
212
+ # Query 10 additionally needs the month parameter
213
+ months = []
247
214
for person in selectedPersonParams [10 ]:
248
- HS0 = random .randint (1 , 12 )
249
- if HS0 == 12 :
250
- HS1 = 1
251
- else :
252
- HS1 = HS0 + 1
253
- HS .append ((HS0 , HS1 ))
254
-
255
- # Query 1 takes first name as a parameter
256
- #nameParams = findNameParameters(nameFactors)# discoverparams.generate(nameFactors)
257
- ## if there are fewer first names than person parameters, repeat some of the names
258
- #if len(nameParams) < len(selectedPersonParams[2]):
259
- # oldlen = len(nameParams)
260
- # newlen = len(selectedPersonParams[2])
261
- # nameParams.extend([nameParams[random.randint(0, oldlen-1)] for j in range(newlen-oldlen)])
215
+ month = random .randint (1 , 12 )
216
+ months .append (month )
217
+
262
218
nameParams = []
263
219
for person in selectedPersonParams [1 ]:
264
220
n = givenNames .getValue (person )
@@ -271,26 +227,28 @@ def main(argv=None):
271
227
csvWriter = CSVSerializer ()
272
228
csvWriter .setOutputFile (outdir + "interactive_%d_param.txt" % (i ))
273
229
if i != 13 and i != 14 : # these three queries take two Persons as parameters
274
- csvWriter .registerHandler (handlePersonParam , selectedPersonParams [i ], "Person " )
230
+ csvWriter .registerHandler (handlePersonParam , selectedPersonParams [i ], "personId " )
275
231
csvWriters [i ] = csvWriter
276
232
277
233
# add output for Time parameter
278
234
for i in timeSelectionInput :
279
235
if i == 3 or i == 4 :
280
- csvWriters [i ].registerHandler (handleTimeDurationParam , selectedTimeParams [i ], "Date0|Duration" )
281
- else :
282
- csvWriters [i ].registerHandler (handleTimeParam , selectedTimeParams [i ], "Date0" )
236
+ csvWriters [i ].registerHandler (handleTimeDurationParam , selectedTimeParams [i ], "startDate|durationDays" )
237
+ elif i == 2 or i == 9 :
238
+ csvWriters [i ].registerHandler (handleTimeParam , selectedTimeParams [i ], "maxDate" )
239
+ elif i == 5 :
240
+ csvWriters [i ].registerHandler (handleTimeParam , selectedTimeParams [i ], "minDate" )
283
241
284
242
# other, query-specific parameters
285
- csvWriters [1 ].registerHandler (handleFirstNameParam , nameParams , "Name " )
286
- csvWriters [3 ].registerHandler (handlePairCountryParam , zip (selectedCountryParams [3 ],secondCountry ),"Country1|Country2 " )
287
- csvWriters [6 ].registerHandler (handleTagParam , selectedTagParams [6 ],"Tag " )
288
- csvWriters [10 ].registerHandler (handleHSParam , HS , "HS0 " )
289
- csvWriters [11 ].registerHandler (handleCountryParam , selectedCountryParams [11 ],"Country " )
290
- csvWriters [11 ].registerHandler (handleWorkYearParam , selectedTimeParams [11 ],"Year " )
291
- csvWriters [12 ].registerHandler (handleTagTypeParam , selectedTagTypeParams [12 ],"TagType " )
292
- csvWriters [13 ].registerHandler (handlePairPersonParam , zip (selectedPersonParams [13 ], secondPerson [13 ]),"Person1|Person2 " )
293
- csvWriters [14 ].registerHandler (handlePairPersonParam , zip (selectedPersonParams [14 ], secondPerson [14 ]),"Person1|Person2 " )
243
+ csvWriters [1 ].registerHandler (handleFirstNameParam , nameParams , "firstName " )
244
+ csvWriters [3 ].registerHandler (handlePairCountryParam , zip (selectedCountryParams [3 ],secondCountry ), "countryXName|countryYName " )
245
+ csvWriters [6 ].registerHandler (handleTagParam , selectedTagParams [6 ], "tagName " )
246
+ csvWriters [10 ].registerHandler (handleMonthParam , months , "month " )
247
+ csvWriters [11 ].registerHandler (handleCountryParam , selectedCountryParams [11 ], "countryName " )
248
+ csvWriters [11 ].registerHandler (handleWorkYearParam , selectedTimeParams [11 ], "workFromYear " )
249
+ csvWriters [12 ].registerHandler (handleTagTypeParam , selectedTagTypeParams [12 ], "tagClassName " )
250
+ csvWriters [13 ].registerHandler (handlePairPersonParam , zip (selectedPersonParams [13 ], secondPerson [13 ]), "person1Id|person2Id " )
251
+ csvWriters [14 ].registerHandler (handlePairPersonParam , zip (selectedPersonParams [14 ], secondPerson [14 ]), "person1Id|person2Id " )
294
252
295
253
296
254
for j in csvWriters :
0 commit comments