|
5 | 5 | import json
|
6 | 6 | import os
|
7 | 7 | import codecs
|
| 8 | +from datetime import date |
8 | 9 | from timeparameters import *
|
| 10 | +from calendar import timegm |
9 | 11 |
|
10 | 12 | PERSON_PREFIX = "http://www.ldbc.eu/ldbc_socialnet/1.0/data/pers"
|
11 | 13 | COUNTRY_PREFIX = "http://dbpedia.org/resource/"
|
@@ -33,74 +35,94 @@ def findNameParameters(names, amount = 100):
|
33 | 35 |
|
34 | 36 | return res
|
35 | 37 |
|
36 |
| -class JSONSerializer: |
| 38 | +class CSVSerializer: |
37 | 39 | def __init__(self):
|
38 | 40 | self.handlers = []
|
39 | 41 | self.inputs = []
|
40 | 42 |
|
41 | 43 | def setOutputFile(self, outputFile):
|
42 | 44 | self.outputFile=outputFile
|
43 | 45 |
|
44 |
| - def registerHandler(self, handler, inputParams): |
| 46 | + def registerHandler(self, handler, inputParams, header): |
| 47 | + handler.header = header |
45 | 48 | self.handlers.append(handler)
|
46 | 49 | self.inputs.append(inputParams)
|
47 | 50 |
|
48 |
| - def writeJSON(self): |
| 51 | + def writeCSV(self): |
49 | 52 | output = codecs.open(self.outputFile, "w", encoding="utf-8")
|
50 | 53 |
|
51 | 54 | if len(self.inputs) == 0:
|
52 | 55 | return
|
53 | 56 |
|
| 57 | + headers = [self.handlers[j].header for j in range(len(self.handlers))] |
| 58 | + output.write("|".join(headers)) |
| 59 | + output.write("\n") |
| 60 | + |
54 | 61 | for i in range(len(self.inputs[0])):
|
55 |
| - # compile a single JSON object from multiple handlers |
56 |
| - jsonDict = {} |
| 62 | + # compile a single CSV line from multiple handlers |
| 63 | + csvLine = [] |
57 | 64 | for j in range(len(self.handlers)):
|
58 | 65 | handler = self.handlers[j]
|
59 | 66 | data = self.inputs[j][i]
|
60 |
| - jsonDict.update(handler(data)) |
61 |
| - output.write(json.dumps(jsonDict, ensure_ascii=False)) |
62 |
| - output.write("\n") |
| 67 | + csvLine.append(handler(data)) |
63 | 68 |
|
| 69 | + output.write("|".join(csvLine)) |
| 70 | + output.write("\n") |
64 | 71 | output.close()
|
65 | 72 |
|
66 | 73 | def handlePersonParam(person):
|
67 |
| - return {"PersonID": person, "PersonURI":(PERSON_PREFIX+str("%020d"%person))} |
| 74 | + return str(person) |
| 75 | + #return {"PersonID": person, "PersonURI":(PERSON_PREFIX+str("%020d"%person))} |
68 | 76 |
|
69 | 77 | def handleTimeParam(timeParam):
|
70 |
| - res={"Date0": "%d-%d-%d"%(timeParam.year, timeParam.month, timeParam.day)} |
71 |
| - if timeParam.duration is not None: |
72 |
| - res["Duration"] = timeParam.duration |
| 78 | + #print timeParam.year |
| 79 | + res = str(timegm(date(year=timeParam.year, month=timeParam.month, day=timeParam.day).timetuple())) |
73 | 80 | return res
|
74 | 81 |
|
| 82 | +def handleTimeDurationParam(timeParam): |
| 83 | + #print timeParam.year |
| 84 | + res = str(timegm(date(year=timeParam.year, month=timeParam.month, day=timeParam.day).timetuple())) |
| 85 | + res += "|"+str(timeParam.duration) |
| 86 | + return res |
| 87 | + |
| 88 | + |
75 | 89 | def handlePairCountryParam((Country1, Country2)):
|
76 |
| - return {"Country1":Country1, "Country2":Country2, "Country1URI":(COUNTRY_PREFIX + Country1), "Country2URI":(COUNTRY_PREFIX + Country2)} |
| 90 | + return Country1+"|"+Country2 |
| 91 | + #return {"Country1":Country1, "Country2":Country2, "Country1URI":(COUNTRY_PREFIX + Country1), "Country2URI":(COUNTRY_PREFIX + Country2)} |
77 | 92 |
|
78 | 93 | def handleCountryParam(Country):
|
79 |
| - return {"Country":Country, "CountryURI": (COUNTRY_PREFIX + Country)} |
| 94 | + return Country |
| 95 | + #return {"Country":Country, "CountryURI": (COUNTRY_PREFIX + Country)} |
80 | 96 |
|
81 | 97 | def handleTagParam(tag):
|
82 |
| - return {"Tag": tag} |
| 98 | + return tag |
| 99 | + #return {"Tag": tag} |
83 | 100 |
|
84 | 101 | def handleTagTypeParam(tagType):
|
85 |
| - return {"TagType": tagType} |
| 102 | + return tagType |
| 103 | + #return {"TagType": tagType} |
86 | 104 |
|
87 | 105 | def handleHSParam((HS0, HS1)):
|
88 |
| - return {"HS0":HS0, "HS1":HS1} |
| 106 | + return str(HS0)+"|"+str(HS1) |
| 107 | + #return {"HS0":HS0, "HS1":HS1} |
89 | 108 |
|
90 | 109 | def handleFirstNameParam(firstName):
|
91 |
| - return {"Name":firstName} |
| 110 | + return firstName |
| 111 | + #return {"Name":firstName} |
92 | 112 |
|
93 | 113 | def handlePairPersonParam((person1, person2)):
|
94 |
| - return {"Person1ID":person1, "Person2ID":person2, "Person2URI":(PERSON_PREFIX+str(person2)), "Person1URI":(PERSON_PREFIX+str(person1))} |
| 114 | + return str(person1)+"|"+str(person2) |
| 115 | + #return {"Person1ID":person1, "Person2ID":person2, "Person2URI":(PERSON_PREFIX+str(person2)), "Person1URI":(PERSON_PREFIX+str(person1))} |
95 | 116 |
|
96 | 117 | def handleWorkYearParam(timeParam):
|
97 |
| - return {"Date0":timeParam} |
| 118 | + return str(timeParam) |
| 119 | + #return {"Date0":timeParam} |
98 | 120 |
|
99 | 121 | def main(argv=None):
|
100 | 122 | if argv is None:
|
101 | 123 | argv = sys.argv
|
102 | 124 |
|
103 |
| - if len(argv)< 3: |
| 125 | + if len(argv) < 3: |
104 | 126 | print "arguments: <input dir> <output>"
|
105 | 127 | return 1
|
106 | 128 |
|
@@ -220,30 +242,33 @@ def main(argv=None):
|
220 | 242 | jsonWriters = {}
|
221 | 243 | # all the queries have Person as parameter
|
222 | 244 | for i in range(1,15):
|
223 |
| - jsonWriter = JSONSerializer() |
| 245 | + jsonWriter = CSVSerializer() |
224 | 246 | jsonWriter.setOutputFile(outdir+"query_%d_param.txt"%(i))
|
225 | 247 | if i != 13 and i != 14: # these three queries take two Persons as parameters
|
226 |
| - jsonWriter.registerHandler(handlePersonParam, selectedPersonParams[i]) |
| 248 | + jsonWriter.registerHandler(handlePersonParam, selectedPersonParams[i], "Person") |
227 | 249 | jsonWriters[i] = jsonWriter
|
228 | 250 |
|
229 | 251 | # add output for Time parameter
|
230 | 252 | for i in timeSelectionInput:
|
231 |
| - jsonWriters[i].registerHandler(handleTimeParam, selectedTimeParams[i]) |
| 253 | + if i==3 or i==4: |
| 254 | + jsonWriters[i].registerHandler(handleTimeDurationParam, selectedTimeParams[i], "Date0|Duration") |
| 255 | + else: |
| 256 | + jsonWriters[i].registerHandler(handleTimeParam, selectedTimeParams[i], "Date0") |
232 | 257 |
|
233 | 258 | # other, query-specific parameters
|
234 |
| - jsonWriters[1].registerHandler(handleFirstNameParam, nameParams) |
235 |
| - jsonWriters[3].registerHandler(handlePairCountryParam, zip(selectedCountryParams[3],secondCountry)) |
236 |
| - jsonWriters[6].registerHandler(handleTagParam, selectedTagParams[6]) |
237 |
| - jsonWriters[10].registerHandler(handleHSParam, HS) |
238 |
| - jsonWriters[11].registerHandler(handleCountryParam, selectedCountryParams[11]) |
239 |
| - jsonWriters[11].registerHandler(handleWorkYearParam, selectedTimeParams[11]) |
240 |
| - jsonWriters[12].registerHandler(handleTagTypeParam, selectedTagTypeParams[12]) |
241 |
| - jsonWriters[13].registerHandler(handlePairPersonParam, zip(selectedPersonParams[13], secondPerson[13])) |
242 |
| - jsonWriters[14].registerHandler(handlePairPersonParam, zip(selectedPersonParams[14], secondPerson[14])) |
| 259 | + jsonWriters[1].registerHandler(handleFirstNameParam, nameParams, "Name") |
| 260 | + jsonWriters[3].registerHandler(handlePairCountryParam, zip(selectedCountryParams[3],secondCountry),"Country1|Country2") |
| 261 | + jsonWriters[6].registerHandler(handleTagParam, selectedTagParams[6],"Tag") |
| 262 | + jsonWriters[10].registerHandler(handleHSParam, HS, "HS0|HS1") |
| 263 | + jsonWriters[11].registerHandler(handleCountryParam, selectedCountryParams[11],"Country") |
| 264 | + jsonWriters[11].registerHandler(handleWorkYearParam, selectedTimeParams[11],"Date0") |
| 265 | + jsonWriters[12].registerHandler(handleTagTypeParam, selectedTagTypeParams[12],"TagType") |
| 266 | + jsonWriters[13].registerHandler(handlePairPersonParam, zip(selectedPersonParams[13], secondPerson[13]),"Person1|Person2") |
| 267 | + jsonWriters[14].registerHandler(handlePairPersonParam, zip(selectedPersonParams[14], secondPerson[14]),"Person1|Person2") |
243 | 268 |
|
244 | 269 |
|
245 | 270 | for j in jsonWriters:
|
246 |
| - jsonWriters[j].writeJSON() |
| 271 | + jsonWriters[j].writeCSV() |
247 | 272 |
|
248 | 273 | if __name__ == "__main__":
|
249 | 274 | sys.exit(main())
|
0 commit comments