Skip to content

Commit c9760b0

Browse files
committed
put seed into random number generator in Parameter Curation
1 parent d2ea1ae commit c9760b0

File tree

2 files changed

+21
-7
lines changed

2 files changed

+21
-7
lines changed

paramgenerator/generateparams.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,20 @@
44
import random
55
import json
66
import os
7+
import codecs
78
from timeparameters import *
89

910
PERSON_PREFIX = "http://www.ldbc.eu/ldbc_socialnet/1.0/data/pers"
1011
COUNTRY_PREFIX = "http://dbpedia.org/resource/"
12+
SEED = 1
1113

1214
def findNameParameters(names, amount = 100):
1315
srtd = sorted(names,key=lambda x: -x[1])
14-
return map(lambda x:x[0], srtd[:amount-1])
16+
res = []
17+
for t in srtd:
18+
if t[1] > 100 and t[1] < 150:
19+
res.append(t[0])
20+
return res
1521

1622
class JSONSerializer:
1723
def __init__(self):
@@ -26,7 +32,7 @@ def registerHandler(self, handler, inputParams):
2632
self.inputs.append(inputParams)
2733

2834
def writeJSON(self):
29-
output = open(self.outputFile, "w")
35+
output = codecs.open(self.outputFile, "w", encoding="utf-8")
3036

3137
if len(self.inputs) == 0:
3238
return
@@ -38,7 +44,8 @@ def writeJSON(self):
3844
handler = self.handlers[j]
3945
data = self.inputs[j][i]
4046
jsonDict.update(handler(data))
41-
output.write(json.dumps(jsonDict)+"\n")
47+
output.write(json.dumps(jsonDict, ensure_ascii=False))
48+
output.write("\n")
4249

4350
output.close()
4451

@@ -58,7 +65,7 @@ def handleCountryParam(Country):
5865
return {"Country":Country, "CountryURI": (COUNTRY_PREFIX + Country)}
5966

6067
def handleTagParam(tag):
61-
return {"Tag": tag.encode("utf-8")}
68+
return {"Tag": tag}
6269

6370
def handleTagTypeParam(tagType):
6471
return {"TagType": tagType}
@@ -67,7 +74,7 @@ def handleHSParam((HS0, HS1)):
6774
return {"HS0":HS0, "HS1":HS1}
6875

6976
def handleFirstNameParam(firstName):
70-
return {"Name":firstName.decode("utf-8")}
77+
return {"Name":firstName}
7178

7279
def handlePairPersonParam((person1, person2)):
7380
return {"Person1ID":person1, "Person2ID":person2, "Person2URI":(PERSON_PREFIX+str(person2)), "Person1URI":(PERSON_PREFIX+str(person1))}
@@ -87,7 +94,8 @@ def main(argv=None):
8794
factorFiles=[]
8895
friendsFiles = []
8996
outdir = argv[2]+"/"
90-
97+
random.seed(SEED)
98+
9199
for file in os.listdir(indir):
92100
if file.endswith("factors.txt"):
93101
factorFiles.append(indir+file)

paramgenerator/readfactors.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
FACTOR_MAP = {value: key for (key, value) in enumerate(FACTORS)}
88

9+
910
class FactorCount:
1011
def __init__(self):
1112
self.values = [0]*len(FACTORS)
@@ -90,6 +91,11 @@ def load(factorFiles, friendFiles):
9091
line = f.readline()
9192
count = line[1+line.rfind(","):]
9293
name = line[:line.rfind(",")]
94+
try:
95+
name.decode('ascii')
96+
except UnicodeEncodeError:
97+
continue
98+
9399
if not name in tags:
94100
tags[name] = 0
95101
tags[name] += int(count)
@@ -167,7 +173,7 @@ def getFactorsForQuery(queryId, factors):
167173
5: getColumns(factors, ["ff", "ffg"]),
168174
6: getColumns(factors, ["f","ff", "ffp", "ffpt"]),
169175
7: getColumns(factors, ["pl", "p"]),
170-
8: getColumns(factors, ["p", "pr"]), ### add "pr"
176+
8: getColumns(factors, ["pr","p"]), ### add "pr"
171177
9: getColumns(factors, ["f", "ffp", "ff"]),
172178
10: getColumns(factors, ["f","ff", "ffp", "ffpt"]),
173179
11: getColumns(factors, ["f","ff", "ffw"]),

0 commit comments

Comments
 (0)