Skip to content

Commit b7a8cd0

Browse files
committed
Fixed param generation scripts to work with the new factors version
Fixed minor bug at writing factors table
1 parent 3ebbedd commit b7a8cd0

File tree

8 files changed

+29
-71
lines changed

8 files changed

+29
-71
lines changed

paramgenerator/discoverparams.pyc

6.27 KB
Binary file not shown.

paramgenerator/generateparams.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@ def findNameParameters(names, amount = 100):
3434
i -= 1
3535
return res
3636

37-
38-
3937
class CSVSerializer:
4038
def __init__(self):
4139
self.handlers = []
@@ -130,19 +128,23 @@ def main(argv=None):
130128
return 1
131129

132130
indir = argv[1]+"/"
133-
factorFiles=[]
131+
activityFactorFiles=[]
132+
personFactorFiles=[]
134133
friendsFiles = []
135134
outdir = argv[2]+"/"
136135
random.seed(SEED)
137136

137+
138138
for file in os.listdir(indir):
139-
if file.endswith("factors.txt"):
140-
factorFiles.append(indir+file)
139+
if file.endswith("activityFactors.txt"):
140+
activityFactorFiles.append(indir+file)
141+
if file.endswith("personFactors.txt"):
142+
personFactorFiles.append(indir+file)
141143
if file.startswith("m0friendList"):
142144
friendsFiles.append(indir+file)
143145

144146
# read precomputed counts from files
145-
(personFactors, countryFactors, tagFactors, tagClassFactors, nameFactors, givenNames, ts, postHisto) = readfactors.load(factorFiles, friendsFiles)
147+
(personFactors, countryFactors, tagFactors, tagClassFactors, nameFactors, givenNames, ts, postHisto) = readfactors.load(personFactorFiles, activityFactorFiles, friendsFiles)
146148

147149
# find person parameters
148150
print "find parameter bindings for Persons"
@@ -234,7 +236,7 @@ def main(argv=None):
234236
}
235237

236238
print "find parameter bindings for Timestamps"
237-
selectedTimeParams = findTimeParams(timeSelectionInput, factorFiles, friendsFiles, ts[1])
239+
selectedTimeParams = findTimeParams(timeSelectionInput, personFactorFiles, activityFactorFiles, friendsFiles, ts[1])
238240
# Query 11 takes WorksFrom timestamp
239241
selectedTimeParams[11] = [random.randint(ts[2], ts[3]) for j in range(len(selectedPersonParams[11]))]
240242

paramgenerator/generateparamsbi.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -299,18 +299,21 @@ def main(argv=None):
299299
return 1
300300

301301
indir = argv[1]+"/"
302-
factorFiles=[]
302+
activityFactorFiles=[]
303+
personFactorFiles=[]
303304
friendsFiles = []
304305
outdir = argv[2]+"/"
305306

306307
for file in os.listdir(indir):
307-
if file.endswith("factors.txt"):
308-
factorFiles.append(indir+file)
308+
if file.endswith("activityFactors.txt"):
309+
activityFactorFiles.append(indir+file)
310+
if file.endswith("personFactors.txt"):
311+
personFactorFiles.append(indir+file)
309312
if file.startswith("m0friendList"):
310313
friendsFiles.append(indir+file)
311314

312315
# read precomputed counts from files
313-
(personFactors, countryFactors, tagFactors, tagClassFactors, nameFactors, givenNames, ts, postsHisto) = readfactors.load(factorFiles, friendsFiles)
316+
(personFactors, countryFactors, tagFactors, tagClassFactors, nameFactors, givenNames, ts, postsHisto) = readfactors.load(personFactorFiles,activityFactorFiles, friendsFiles)
314317
week_posts = convert_posts_histo(postsHisto)
315318

316319
persons = []

paramgenerator/readfactors.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def setValue(self, person, value):
5353
def getValue(self, person):
5454
return self.values[person]
5555

56-
def load(factorFiles, friendFiles):
56+
def load(personFactorFiles,activityFactorFiles, friendFiles):
5757
print "loading input for parameter generation"
5858
results = Factors()
5959
countries = Factors()
@@ -65,12 +65,10 @@ def load(factorFiles, friendFiles):
6565
names = {}
6666
timestamp = [0,0,0,0]
6767

68-
for inputFileName in factorFiles:
69-
with codecs.open(inputFileName, "r", "utf-8") as f:
70-
line = f.readline()
71-
personCount = int(line)
72-
for i in range(personCount):
73-
line = f.readline().split(",")
68+
for inputfileName in personFactorFiles:
69+
with codecs.open(inputfileName, "r", "utf-8") as f:
70+
for line in f.readlines():
71+
line = line.split(",")
7472
person = int(line[0])
7573
if not results.existParam(person):
7674
results.addNewParam(person)
@@ -88,6 +86,8 @@ def load(factorFiles, friendFiles):
8886
postsHisto.addNewParam(i)
8987
postsHisto.addValue(i, "p", int(line[9+i]))
9088

89+
for inputFileName in activityFactorFiles:
90+
with codecs.open(inputFileName, "r", "utf-8") as f:
9191
countryCount = int(f.readline())
9292
for i in range(countryCount):
9393
line = f.readline().split(",")

paramgenerator/readfactors.pyc

8.96 KB
Binary file not shown.

paramgenerator/timeparameters.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -143,18 +143,17 @@ def computeTimeMedians(factors, lastmonthcount = LAST_MONTHS):
143143

144144
return (medianFirstMonth, medianLastMonth, median)
145145

146-
def readTimeParams(persons, factorFiles, friendFiles):
146+
def readTimeParams(persons, personFactorFiles, activityFactorFiles, friendFiles):
147147

148148
postCounts = {}
149149
groupCounts = {}
150150
offset = 8
151151
monthcount = 12*3 + 1
152152

153-
for inputFactorFile in factorFiles:
153+
for inputFactorFile in personFactorFiles:
154154
with open(inputFactorFile, 'r') as f:
155-
personCount = int(f.readline())
156-
for i in range(personCount):
157-
line = f.readline().split(",")
155+
for line in f.readlines():
156+
line = line.split(",")
158157
person = int(line[0])
159158
localPostCounts = map(int,line[offset:offset+monthcount])
160159
localGroupCounts = map(int, line[offset+monthcount:])
@@ -209,15 +208,15 @@ def readTimeParams(persons, factorFiles, friendFiles):
209208

210209

211210

212-
def findTimeParams(input, factorFiles, friendFiles, startYear):
211+
def findTimeParams(input, personFactorFiles, activityFactorFiles, friendFiles, startYear):
213212
START_YEAR = startYear
214213
fPostCount = {}
215214
ffPostCount = {}
216215
persons = []
217216
for queryId in input:
218217
persons += input[queryId][0]
219218

220-
(fPostCount, ffPostCount, ffGroupCount) = readTimeParams(set(persons),factorFiles, friendFiles)
219+
(fPostCount, ffPostCount, ffGroupCount) = readTimeParams(set(persons),personFactorFiles, activityFactorFiles, friendFiles)
221220

222221
mapParam = {
223222
"f" : fPostCount,

paramgenerator/timeparameters.pyc

10 KB
Binary file not shown.

src/main/java/ldbc/snb/datagen/util/FactorTable.java

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -304,52 +304,6 @@ public void writePersonFactors(OutputStream writer ) {
304304

305305
public void writeActivityFactors(OutputStream writer ) {
306306
try {
307-
/*Iterator<Map.Entry<Long,PersonCounts>> iter = personCounts_.entrySet().iterator();
308-
while(iter.hasNext()) {
309-
Map.Entry<Long,PersonCounts> entry = iter.next();
310-
if (medianFirstName_.get(entry.getKey()) == null) {
311-
iter.remove();
312-
}
313-
}*/
314-
writer.write(Integer.toString(personCounts_.size()).getBytes("UTF8"));
315-
writer.write("\n".getBytes("UTF8"));
316-
for (Map.Entry<Long, PersonCounts> c: personCounts_.entrySet()){
317-
PersonCounts count = c.getValue();
318-
// correct the group counts
319-
//count.numberOfGroups += count.numberOfFriends;
320-
String name = medianFirstName_.get(c.getKey());
321-
if( name != null ) {
322-
StringBuffer strbuf = new StringBuffer();
323-
strbuf.append(c.getKey()); strbuf.append(",");
324-
strbuf.append(name);
325-
strbuf.append(",");
326-
strbuf.append(count.numFriends());
327-
strbuf.append(",");
328-
strbuf.append(count.numPosts());
329-
strbuf.append(",");
330-
strbuf.append(count.numLikes());
331-
strbuf.append(",");
332-
strbuf.append(count.numTagsOfMessages());
333-
strbuf.append(",");
334-
strbuf.append(count.numForums());
335-
strbuf.append(",");
336-
strbuf.append(count.numWorkPlaces());
337-
strbuf.append(",");
338-
strbuf.append(count.numComments());
339-
strbuf.append(",");
340-
341-
for (Long bucket : count.numMessagesPerMonth()) {
342-
strbuf.append(bucket);
343-
strbuf.append(",");
344-
}
345-
for (Long bucket : count.numForumsPerMonth()) {
346-
strbuf.append(bucket);
347-
strbuf.append(",");
348-
}
349-
strbuf.setCharAt(strbuf.length() - 1, '\n');
350-
writer.write(strbuf.toString().getBytes("UTF8"));
351-
}
352-
}
353307
writer.write(Integer.toString(postsPerCountry_.size()).getBytes("UTF8"));
354308
writer.write("\n".getBytes("UTF8"));
355309
for (Map.Entry<Integer, Long> c: postsPerCountry_.entrySet()){

0 commit comments

Comments
 (0)