Skip to content

Commit 4cd55da

Browse files
committed
now export to CSV; convert time into UNIX timestamp
1 parent 8760695 commit 4cd55da

File tree

2 files changed

+62
-35
lines changed

2 files changed

+62
-35
lines changed

paramgenerator/generateparams.py

Lines changed: 59 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
import json
66
import os
77
import codecs
8+
from datetime import date
89
from timeparameters import *
10+
from calendar import timegm
911

1012
PERSON_PREFIX = "http://www.ldbc.eu/ldbc_socialnet/1.0/data/pers"
1113
COUNTRY_PREFIX = "http://dbpedia.org/resource/"
@@ -33,74 +35,94 @@ def findNameParameters(names, amount = 100):
3335

3436
return res
3537

36-
class JSONSerializer:
38+
class CSVSerializer:
3739
def __init__(self):
3840
self.handlers = []
3941
self.inputs = []
4042

4143
def setOutputFile(self, outputFile):
4244
self.outputFile=outputFile
4345

44-
def registerHandler(self, handler, inputParams):
46+
def registerHandler(self, handler, inputParams, header):
47+
handler.header = header
4548
self.handlers.append(handler)
4649
self.inputs.append(inputParams)
4750

48-
def writeJSON(self):
51+
def writeCSV(self):
4952
output = codecs.open(self.outputFile, "w", encoding="utf-8")
5053

5154
if len(self.inputs) == 0:
5255
return
5356

57+
headers = [self.handlers[j].header for j in range(len(self.handlers))]
58+
output.write("|".join(headers))
59+
output.write("\n")
60+
5461
for i in range(len(self.inputs[0])):
55-
# compile a single JSON object from multiple handlers
56-
jsonDict = {}
62+
# compile a single CSV line from multiple handlers
63+
csvLine = []
5764
for j in range(len(self.handlers)):
5865
handler = self.handlers[j]
5966
data = self.inputs[j][i]
60-
jsonDict.update(handler(data))
61-
output.write(json.dumps(jsonDict, ensure_ascii=False))
62-
output.write("\n")
67+
csvLine.append(handler(data))
6368

69+
output.write("|".join(csvLine))
70+
output.write("\n")
6471
output.close()
6572

6673
def handlePersonParam(person):
67-
return {"PersonID": person, "PersonURI":(PERSON_PREFIX+str("%020d"%person))}
74+
return str(person)
75+
#return {"PersonID": person, "PersonURI":(PERSON_PREFIX+str("%020d"%person))}
6876

6977
def handleTimeParam(timeParam):
70-
res={"Date0": "%d-%d-%d"%(timeParam.year, timeParam.month, timeParam.day)}
71-
if timeParam.duration is not None:
72-
res["Duration"] = timeParam.duration
78+
#print timeParam.year
79+
res = str(timegm(date(year=timeParam.year, month=timeParam.month, day=timeParam.day).timetuple()))
7380
return res
7481

82+
def handleTimeDurationParam(timeParam):
83+
#print timeParam.year
84+
res = str(timegm(date(year=timeParam.year, month=timeParam.month, day=timeParam.day).timetuple()))
85+
res += "|"+str(timeParam.duration)
86+
return res
87+
88+
7589
def handlePairCountryParam((Country1, Country2)):
76-
return {"Country1":Country1, "Country2":Country2, "Country1URI":(COUNTRY_PREFIX + Country1), "Country2URI":(COUNTRY_PREFIX + Country2)}
90+
return Country1+"|"+Country2
91+
#return {"Country1":Country1, "Country2":Country2, "Country1URI":(COUNTRY_PREFIX + Country1), "Country2URI":(COUNTRY_PREFIX + Country2)}
7792

7893
def handleCountryParam(Country):
79-
return {"Country":Country, "CountryURI": (COUNTRY_PREFIX + Country)}
94+
return Country
95+
#return {"Country":Country, "CountryURI": (COUNTRY_PREFIX + Country)}
8096

8197
def handleTagParam(tag):
82-
return {"Tag": tag}
98+
return tag
99+
#return {"Tag": tag}
83100

84101
def handleTagTypeParam(tagType):
85-
return {"TagType": tagType}
102+
return tagType
103+
#return {"TagType": tagType}
86104

87105
def handleHSParam((HS0, HS1)):
88-
return {"HS0":HS0, "HS1":HS1}
106+
return str(HS0)+"|"+str(HS1)
107+
#return {"HS0":HS0, "HS1":HS1}
89108

90109
def handleFirstNameParam(firstName):
91-
return {"Name":firstName}
110+
return firstName
111+
#return {"Name":firstName}
92112

93113
def handlePairPersonParam((person1, person2)):
94-
return {"Person1ID":person1, "Person2ID":person2, "Person2URI":(PERSON_PREFIX+str(person2)), "Person1URI":(PERSON_PREFIX+str(person1))}
114+
return str(person1)+"|"+str(person2)
115+
#return {"Person1ID":person1, "Person2ID":person2, "Person2URI":(PERSON_PREFIX+str(person2)), "Person1URI":(PERSON_PREFIX+str(person1))}
95116

96117
def handleWorkYearParam(timeParam):
97-
return {"Date0":timeParam}
118+
return str(timeParam)
119+
#return {"Date0":timeParam}
98120

99121
def main(argv=None):
100122
if argv is None:
101123
argv = sys.argv
102124

103-
if len(argv)< 3:
125+
if len(argv) < 3:
104126
print "arguments: <input dir> <output>"
105127
return 1
106128

@@ -220,30 +242,33 @@ def main(argv=None):
220242
jsonWriters = {}
221243
# all the queries have Person as parameter
222244
for i in range(1,15):
223-
jsonWriter = JSONSerializer()
245+
jsonWriter = CSVSerializer()
224246
jsonWriter.setOutputFile(outdir+"query_%d_param.txt"%(i))
225247
if i != 13 and i != 14: # these three queries take two Persons as parameters
226-
jsonWriter.registerHandler(handlePersonParam, selectedPersonParams[i])
248+
jsonWriter.registerHandler(handlePersonParam, selectedPersonParams[i], "Person")
227249
jsonWriters[i] = jsonWriter
228250

229251
# add output for Time parameter
230252
for i in timeSelectionInput:
231-
jsonWriters[i].registerHandler(handleTimeParam, selectedTimeParams[i])
253+
if i==3 or i==4:
254+
jsonWriters[i].registerHandler(handleTimeDurationParam, selectedTimeParams[i], "Date0|Duration")
255+
else:
256+
jsonWriters[i].registerHandler(handleTimeParam, selectedTimeParams[i], "Date0")
232257

233258
# other, query-specific parameters
234-
jsonWriters[1].registerHandler(handleFirstNameParam, nameParams)
235-
jsonWriters[3].registerHandler(handlePairCountryParam, zip(selectedCountryParams[3],secondCountry))
236-
jsonWriters[6].registerHandler(handleTagParam, selectedTagParams[6])
237-
jsonWriters[10].registerHandler(handleHSParam, HS)
238-
jsonWriters[11].registerHandler(handleCountryParam, selectedCountryParams[11])
239-
jsonWriters[11].registerHandler(handleWorkYearParam, selectedTimeParams[11])
240-
jsonWriters[12].registerHandler(handleTagTypeParam, selectedTagTypeParams[12])
241-
jsonWriters[13].registerHandler(handlePairPersonParam, zip(selectedPersonParams[13], secondPerson[13]))
242-
jsonWriters[14].registerHandler(handlePairPersonParam, zip(selectedPersonParams[14], secondPerson[14]))
259+
jsonWriters[1].registerHandler(handleFirstNameParam, nameParams, "Name")
260+
jsonWriters[3].registerHandler(handlePairCountryParam, zip(selectedCountryParams[3],secondCountry),"Country1|Country2")
261+
jsonWriters[6].registerHandler(handleTagParam, selectedTagParams[6],"Tag")
262+
jsonWriters[10].registerHandler(handleHSParam, HS, "HS0|HS1")
263+
jsonWriters[11].registerHandler(handleCountryParam, selectedCountryParams[11],"Country")
264+
jsonWriters[11].registerHandler(handleWorkYearParam, selectedTimeParams[11],"Date0")
265+
jsonWriters[12].registerHandler(handleTagTypeParam, selectedTagTypeParams[12],"TagType")
266+
jsonWriters[13].registerHandler(handlePairPersonParam, zip(selectedPersonParams[13], secondPerson[13]),"Person1|Person2")
267+
jsonWriters[14].registerHandler(handlePairPersonParam, zip(selectedPersonParams[14], secondPerson[14]),"Person1|Person2")
243268

244269

245270
for j in jsonWriters:
246-
jsonWriters[j].writeJSON()
271+
jsonWriters[j].writeCSV()
247272

248273
if __name__ == "__main__":
249274
sys.exit(main())

paramgenerator/timeparameters.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import itertools
88

99
LAST_MONTHS = 3 # number of months that we consider for date parameters in the filters of a form timestamp <= Date0
10+
START_YEAR = 2010 # default value that gets over-written from the data generator output
1011

1112
class MonthYearCount:
1213
def __init__(self, month, year, count):
@@ -68,7 +69,7 @@ def getTimeParamsWithMedian(factors, (medianFirstMonth, medianLastMonth, median)
6869
input = sorted(values,key=lambda myc: (myc.year, myc.month))
6970
currentMedian = getMedian(values,lambda myc: myc.count, True)
7071
if int(median) == 0 or int(currentMedian.count) == 0:
71-
res.append(TimeParameter(0,0,1,0))
72+
res.append(TimeParameter(START_YEAR,1,1,0))
7273
continue
7374
if currentMedian.count > median:
7475
duration = int(28*currentMedian.count/median)
@@ -210,6 +211,7 @@ def readTimeParams(persons, factorFiles, friendFiles):
210211

211212

212213
def findTimeParams(input, factorFiles, friendFiles, startYear):
214+
START_YEAR = startYear
213215
fPostCount = {}
214216
ffPostCount = {}
215217
persons = []

0 commit comments

Comments
 (0)