Skip to content

Commit d4fe499

Browse files
committed
nothing
2 parents a1dc1d6 + d67cf5f commit d4fe499

File tree

73 files changed

+14263
-481
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+14263
-481
lines changed

.project

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<projectDescription>
3+
<name>ldbc_snb_datagen</name>
4+
<comment></comment>
5+
<projects>
6+
</projects>
7+
<buildSpec>
8+
<buildCommand>
9+
<name>org.eclipse.jdt.core.javabuilder</name>
10+
<arguments>
11+
</arguments>
12+
</buildCommand>
13+
<buildCommand>
14+
<name>org.eclipse.m2e.core.maven2Builder</name>
15+
<arguments>
16+
</arguments>
17+
</buildCommand>
18+
</buildSpec>
19+
<natures>
20+
<nature>org.eclipse.jdt.core.javanature</nature>
21+
<nature>org.eclipse.m2e.core.maven2Nature</nature>
22+
</natures>
23+
</projectDescription>

paramgenerator/discoverparams.pyc

6.27 KB
Binary file not shown.

paramgenerator/generateparams.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@ def findNameParameters(names, amount = 100):
3434
i -= 1
3535
return res
3636

37-
38-
3937
class CSVSerializer:
4038
def __init__(self):
4139
self.handlers = []
@@ -130,19 +128,23 @@ def main(argv=None):
130128
return 1
131129

132130
indir = argv[1]+"/"
133-
factorFiles=[]
131+
activityFactorFiles=[]
132+
personFactorFiles=[]
134133
friendsFiles = []
135134
outdir = argv[2]+"/"
136135
random.seed(SEED)
137136

137+
138138
for file in os.listdir(indir):
139-
if file.endswith("factors.txt"):
140-
factorFiles.append(indir+file)
139+
if file.endswith("activityFactors.txt"):
140+
activityFactorFiles.append(indir+file)
141+
if file.endswith("personFactors.txt"):
142+
personFactorFiles.append(indir+file)
141143
if file.startswith("m0friendList"):
142144
friendsFiles.append(indir+file)
143145

144146
# read precomputed counts from files
145-
(personFactors, countryFactors, tagFactors, tagClassFactors, nameFactors, givenNames, ts, postHisto) = readfactors.load(factorFiles, friendsFiles)
147+
(personFactors, countryFactors, tagFactors, tagClassFactors, nameFactors, givenNames, ts, postHisto) = readfactors.load(personFactorFiles, activityFactorFiles, friendsFiles)
146148

147149
# find person parameters
148150
print "find parameter bindings for Persons"
@@ -234,7 +236,7 @@ def main(argv=None):
234236
}
235237

236238
print "find parameter bindings for Timestamps"
237-
selectedTimeParams = findTimeParams(timeSelectionInput, factorFiles, friendsFiles, ts[1])
239+
selectedTimeParams = findTimeParams(timeSelectionInput, personFactorFiles, activityFactorFiles, friendsFiles, ts[1])
238240
# Query 11 takes WorksFrom timestamp
239241
selectedTimeParams[11] = [random.randint(ts[2], ts[3]) for j in range(len(selectedPersonParams[11]))]
240242

paramgenerator/generateparamsbi.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -299,18 +299,21 @@ def main(argv=None):
299299
return 1
300300

301301
indir = argv[1]+"/"
302-
factorFiles=[]
302+
activityFactorFiles=[]
303+
personFactorFiles=[]
303304
friendsFiles = []
304305
outdir = argv[2]+"/"
305306

306307
for file in os.listdir(indir):
307-
if file.endswith("factors.txt"):
308-
factorFiles.append(indir+file)
308+
if file.endswith("activityFactors.txt"):
309+
activityFactorFiles.append(indir+file)
310+
if file.endswith("personFactors.txt"):
311+
personFactorFiles.append(indir+file)
309312
if file.startswith("m0friendList"):
310313
friendsFiles.append(indir+file)
311314

312315
# read precomputed counts from files
313-
(personFactors, countryFactors, tagFactors, tagClassFactors, nameFactors, givenNames, ts, postsHisto) = readfactors.load(factorFiles, friendsFiles)
316+
(personFactors, countryFactors, tagFactors, tagClassFactors, nameFactors, givenNames, ts, postsHisto) = readfactors.load(personFactorFiles,activityFactorFiles, friendsFiles)
314317
week_posts = convert_posts_histo(postsHisto)
315318

316319
persons = []

paramgenerator/readfactors.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def setValue(self, person, value):
5353
def getValue(self, person):
5454
return self.values[person]
5555

56-
def load(factorFiles, friendFiles):
56+
def load(personFactorFiles,activityFactorFiles, friendFiles):
5757
print "loading input for parameter generation"
5858
results = Factors()
5959
countries = Factors()
@@ -65,12 +65,10 @@ def load(factorFiles, friendFiles):
6565
names = {}
6666
timestamp = [0,0,0,0]
6767

68-
for inputFileName in factorFiles:
69-
with codecs.open(inputFileName, "r", "utf-8") as f:
70-
line = f.readline()
71-
personCount = int(line)
72-
for i in range(personCount):
73-
line = f.readline().split(",")
68+
for inputfileName in personFactorFiles:
69+
with codecs.open(inputfileName, "r", "utf-8") as f:
70+
for line in f.readlines():
71+
line = line.split(",")
7472
person = int(line[0])
7573
if not results.existParam(person):
7674
results.addNewParam(person)
@@ -88,6 +86,8 @@ def load(factorFiles, friendFiles):
8886
postsHisto.addNewParam(i)
8987
postsHisto.addValue(i, "p", int(line[9+i]))
9088

89+
for inputFileName in activityFactorFiles:
90+
with codecs.open(inputFileName, "r", "utf-8") as f:
9191
countryCount = int(f.readline())
9292
for i in range(countryCount):
9393
line = f.readline().split(",")

paramgenerator/readfactors.pyc

8.96 KB
Binary file not shown.

paramgenerator/timeparameters.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -143,18 +143,17 @@ def computeTimeMedians(factors, lastmonthcount = LAST_MONTHS):
143143

144144
return (medianFirstMonth, medianLastMonth, median)
145145

146-
def readTimeParams(persons, factorFiles, friendFiles):
146+
def readTimeParams(persons, personFactorFiles, activityFactorFiles, friendFiles):
147147

148148
postCounts = {}
149149
groupCounts = {}
150150
offset = 8
151151
monthcount = 12*3 + 1
152152

153-
for inputFactorFile in factorFiles:
153+
for inputFactorFile in personFactorFiles:
154154
with open(inputFactorFile, 'r') as f:
155-
personCount = int(f.readline())
156-
for i in range(personCount):
157-
line = f.readline().split(",")
155+
for line in f.readlines():
156+
line = line.split(",")
158157
person = int(line[0])
159158
localPostCounts = map(int,line[offset:offset+monthcount])
160159
localGroupCounts = map(int, line[offset+monthcount:])
@@ -209,15 +208,15 @@ def readTimeParams(persons, factorFiles, friendFiles):
209208

210209

211210

212-
def findTimeParams(input, factorFiles, friendFiles, startYear):
211+
def findTimeParams(input, personFactorFiles, activityFactorFiles, friendFiles, startYear):
213212
START_YEAR = startYear
214213
fPostCount = {}
215214
ffPostCount = {}
216215
persons = []
217216
for queryId in input:
218217
persons += input[queryId][0]
219218

220-
(fPostCount, ffPostCount, ffGroupCount) = readTimeParams(set(persons),factorFiles, friendFiles)
219+
(fPostCount, ffPostCount, ffGroupCount) = readTimeParams(set(persons),personFactorFiles, activityFactorFiles, friendFiles)
221220

222221
mapParam = {
223222
"f" : fPostCount,

paramgenerator/timeparameters.pyc

10 KB
Binary file not shown.

params.ini

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
# ldbc.snb.datagen.generator.scaleFactor:snb.interactive.1
2-
3-
ldbc.snb.datagen.generator.numPersons:3000
4-
ldbc.snb.datagen.generator.numYears:1
5-
ldbc.snb.datagen.generator.startYear:2010
1+
ldbc.snb.datagen.generator.scaleFactor:snb.interactive.1
62

73
ldbc.snb.datagen.serializer.compressed:false
84

run.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@ then
3232
mkdir -p substitution_parameters
3333
python paramgenerator/generateparams.py $LDBC_SNB_DATAGEN_HOME substitution_parameters/
3434
python paramgenerator/generateparamsbi.py $LDBC_SNB_DATAGEN_HOME substitution_parameters/
35-
rm -f m*factors*
36-
rm -f .m*factors*
35+
rm -f m*personFactors*
36+
rm -f .m*personFactors*
37+
rm -f m*activityFactors*
38+
rm -f .m*activityFactors*
3739
rm -f m0friendList*
3840
rm -f .m0friendList*
3941
fi

0 commit comments

Comments
 (0)