Skip to content

Commit eaad9e5

Browse files
committed
2 parents d1d6537 + 2f5cd58 commit eaad9e5

File tree

200 files changed

+11419
-17062
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

200 files changed

+11419
-17062
lines changed

README.md

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,14 @@
33
LDBC-SNB Data Generator
44
----------------------
55

6-
The LDBC-SNB Data Generator (DATAGEN) is the responsible of providing the data sets used by all the LDBC benchmarks. This data generator is designed to produce directed labeled graphs that mimic the characteristics of those graphs of real data. A detailed description of the generator can be found in the following pages:
7-
8-
* In **[Data Schema](https://github.com/ldbc/ldbc_socialnet_bm/wiki/Data-Schema)**, a description of the schema of the data produced by the generator.
9-
* In **[Data Generation Process](https://github.com/ldbc/ldbc_socialnet_bm/wiki/Data-Generation)**, information about the generation process of the data.
10-
* In **[Data Output](https://github.com/ldbc/ldbc_socialnet_bm/wiki/Data-Output)**, a description of the contents and the format of the files produced by the generator.
6+
The LDBC-SNB Data Generator (DATAGEN) is the responsible of providing the data sets used by all the LDBC benchmarks. This data generator is designed to produce directed labeled graphs that mimic the characteristics of those graphs of real data. A detailed description of the schema produced by datagen, as well as the format of the output files, can be found in the latest version of official [LDBC SNB especification document](https://github.com/ldbc/ldbc_snb_docs)
117

128

139
ldbc_snb_datagen is part of the LDBC project (http://www.ldbc.eu/).
1410
ldbc_snb_datagen is GPLv3 licensed, to see detailed information about this license read the LICENSE.txt.
1511

16-
* **[Releases](https://github.com/ldbc/ldbc_snb_datagen/releases)**
17-
* **[Configuration](https://github.com/ldbc/ldbc_socialnet_bm/wiki/Configuration)**
18-
* **[Compilation and Execution](https://github.com/ldbc/ldbc_socialnet_bm/wiki/Compilation_Execution)**
19-
* **[Output](https://github.com/ldbc/ldbc_socialnet_bm/wiki/Data-Output)**
20-
* **[Troubleshooting](https://github.com/ldbc/ldbc_socialnet_bm/wiki/Throubleshooting)**
12+
* **[Releases](https://github.com/ldbc-dev/ldbc_snb_datagen_0.2/releases)**
13+
* **[Configuration](https://github.com/ldbc-dev/ldbc_snb_datagen_0.2/wiki/Configuration)**
14+
* **[Compilation and Execution](https://github.com/ldbc-dev/ldbc_snb_datagen_0.2/wiki/Compilation_Execution)**
15+
* **[Output](https://github.com/ldbc-dev/ldbc_snb_datagen_0.2/wiki/Data-Output)**
16+
* **[Troubleshooting](https://github.com/ldbc-dev/ldbc_snb_datagen_0.2/wiki/Throubleshooting)**

params.ini

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,15 @@
1-
scaleFactor:1
2-
compressed:false
3-
serializer:csv
4-
numThreads:1
1+
2+
ldbc.snb.datagen.generator.scaleFactor:snb.interactive.1
3+
4+
ldbc.snb.datagen.serializer.compressed:false
5+
6+
ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonSerializer
7+
8+
ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVInvariantSerializer
9+
10+
ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonActivitySerializer
11+
12+
ldbc.snb.datagen.generator.numThreads:1
13+
14+
ldbc.snb.datagen.serializer.updateStreams:true
15+

pom.xml

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66

77
<modelVersion>4.0.0</modelVersion>
8-
<groupId>ldbc.socialnet.dbgen</groupId>
8+
<groupId>ldbc.snb.datagen</groupId>
99
<artifactId>ldbc_snb_datagen</artifactId>
1010
<version>0.1</version>
1111
<build>
@@ -19,7 +19,7 @@
1919
</descriptorRefs>
2020
<archive>
2121
<manifest>
22-
<mainClass>ldbc.socialnet.dbgen.generator.MRGenerateUsers</mainClass>
22+
<mainClass>ldbc.snb.datagen.generator.LDBCDatagen</mainClass>
2323
</manifest>
2424
</archive>
2525
<finalName>ldbc_snb_datagen</finalName>
@@ -54,8 +54,8 @@
5454
</dependency>
5555
<dependency>
5656
<groupId>org.apache.hadoop</groupId>
57-
<artifactId>hadoop-tools</artifactId>
58-
<version>1.2.1</version>
57+
<artifactId>hadoop-client</artifactId>
58+
<version>2.6.0</version>
5959
</dependency>
6060
<dependency>
6161
<groupId>ca.umontreal.iro</groupId>
@@ -67,5 +67,25 @@
6767
<artifactId>gson</artifactId>
6868
<version>2.2.4</version>
6969
</dependency>
70+
<dependency>
71+
<groupId>org.codehaus.groovy</groupId>
72+
<artifactId>groovy</artifactId>
73+
<version>2.1.6</version>
74+
</dependency>
75+
<dependency>
76+
<groupId>org.codehaus.groovy</groupId>
77+
<artifactId>groovy-templates</artifactId>
78+
<version>2.1.6</version>
79+
</dependency>
80+
<dependency>
81+
<groupId>org.codehaus.groovy</groupId>
82+
<artifactId>groovy-jsr223</artifactId>
83+
<version>2.1.6</version>
84+
</dependency>
85+
<dependency>
86+
<groupId>org.apache.commons</groupId>
87+
<artifactId>commons-math3</artifactId>
88+
<version>3.4.1</version>
89+
</dependency>
7090
</dependencies>
7191
</project>

prova.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

run.sh

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
#!/bin/bash
2-
DEFAULT_HADOOP_HOME=/home/aprat/programs/hadoop-1.2.1 #change to your hadoop folder
3-
DEFAULT_LDBC_SNB_DATAGEN_HOME=/home/aprat/projects/LDBC/ldbc_snb_datagen #change to your ldbc_socialnet_dbgen folder
2+
DEFAULT_HADOOP_HOME=/home/user/hadoop-2.6.0 #change to your hadoop folder
3+
DEFAULT_LDBC_SNB_DATAGEN_HOME=/home/user/ldbc_snb_datagen_0.2 #change to your ldbc_socialnet_dbgen folder
44
PARAM_GENERATION=1 #param generation
5-
export JAVA_HOME=/usr/lib/jvm/default
65

76
# allow overriding configuration from outside via environment variables
87
# i.e. you can do
@@ -27,6 +26,8 @@ if [ $PARAM_GENERATION -eq 1 ]
2726
then
2827
mkdir -p substitution_parameters
2928
python paramgenerator/generateparams.py $LDBC_SNB_DATAGEN_HOME substitution_parameters/
30-
# rm -f m*factors*
31-
# rm -f m0friendList*
29+
rm -f m*factors*
30+
rm -f .m*factors*
31+
rm -f m0friendList*
32+
rm -f .m0friendList*
3233
fi

src/main/java/README.txt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
Basic code guidelines [2013-07-24]
22

3-
ldbc.socialnet.dbgen.generator: The main directory. Contains the program entry class MRGenerateUsers which uses hadoop
3+
ldbc.snb.datagen.generator: The main directory. Contains the program entry class MRGenerateUsers which uses hadoop
44
jobs and our class "ScalableGenerator" to generate all the data.
55
It also contains any supportive classes needed to generate the data.
66

7-
ldbc.socialnet.dbgen.dictionary: Contains the classes responsible of reading the file datasets from the dictionaries
7+
ldbc.snb.datagen.dictionary: Contains the classes responsible of reading the file datasets from the dictionaries
88
folder and provides methods to access such data.
99

10-
ldbc.socialnet.dbgen.objects: The schema entities classes are in this folder.
10+
ldbc.snb.datagen.objects: The schema entities classes are in this folder.
1111

12-
ldbc.socialnet.dbgen.serializer: The generator serializers.
12+
ldbc.snb.datagen.serializer: The generator serializers.
1313

14-
ldbc.socialnet.dbgen.vocabulary: RDF vocabulary classes used in the serializers.
14+
ldbc.snb.datagen.vocabulary: RDF vocabulary classes used in the serializers.
1515

16-
ldbc.socialnet.dbgen.util: Any additional classes which doesn't belong in any other directory.
16+
ldbc.snb.datagen.util: Any additional classes which doesn't belong in any other directory.
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/*
2+
* Copyright (c) 2013 LDBC
3+
* Linked Data Benchmark Council (http://ldbc.eu)
4+
*
5+
* This file is part of ldbc_socialnet_dbgen.
6+
*
7+
* ldbc_socialnet_dbgen is free software: you can redistribute it and/or modify
8+
* it under the terms of the GNU General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* ldbc_socialnet_dbgen is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with ldbc_socialnet_dbgen. If not, see <http://www.gnu.org/licenses/>.
19+
*
20+
* Copyright (C) 2011 OpenLink Software <[email protected]>
21+
* All Rights Reserved.
22+
*
23+
* This program is free software; you can redistribute it and/or modify
24+
* it under the terms of the GNU General Public License as published by
25+
* the Free Software Foundation; only Version 2 of the License dated
26+
* June 1991.
27+
*
28+
* This program is distributed in the hope that it will be useful,
29+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
30+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31+
* GNU General Public License for more details.
32+
*
33+
* You should have received a copy of the GNU General Public License
34+
* along with this program; if not, write to the Free Software
35+
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
36+
*/
37+
package ldbc.snb.datagen.dictionary;
38+
39+
import ldbc.snb.datagen.generator.DatagenParams;
40+
41+
import java.io.BufferedReader;
42+
import java.io.IOException;
43+
import java.io.InputStreamReader;
44+
import java.util.ArrayList;
45+
import java.util.Random;
46+
47+
public class BrowserDictionary {
48+
49+
private static final String SEPARATOR_ = " ";
50+
private ArrayList<String> browsers_;
51+
private ArrayList<Double> cumulativeDistribution_;
52+
private double probAnotherBrowser_ = 0.0f;
53+
54+
public BrowserDictionary(double probAnotherBrowser) {
55+
probAnotherBrowser_ = probAnotherBrowser;
56+
browsers_ = new ArrayList<String>();
57+
cumulativeDistribution_ = new ArrayList<Double>();
58+
load(DatagenParams.browserDictonryFile);
59+
}
60+
61+
private void load(String fileName) {
62+
try {
63+
BufferedReader dictionary = new BufferedReader(
64+
new InputStreamReader(getClass().getResourceAsStream(fileName), "UTF-8"));
65+
String line;
66+
double cummulativeDist = 0.0;
67+
while ((line = dictionary.readLine()) != null) {
68+
String data[] = line.split(SEPARATOR_);
69+
String browser = data[0];
70+
cummulativeDist += Double.parseDouble(data[1]);
71+
browsers_.add(browser);
72+
cumulativeDistribution_.add(cummulativeDist);
73+
}
74+
dictionary.close();
75+
} catch (IOException e) {
76+
e.printStackTrace();
77+
}
78+
}
79+
80+
public String getName(int id) {
81+
return browsers_.get(id);
82+
}
83+
84+
public int getRandomBrowserId(Random random) {
85+
double prob = random.nextDouble();
86+
int minIdx = 0;
87+
int maxIdx = (byte) ((prob < cumulativeDistribution_.get(minIdx)) ? minIdx : cumulativeDistribution_.size() - 1);
88+
// Binary search
89+
while ((maxIdx - minIdx) > 1) {
90+
int middlePoint = minIdx + (maxIdx - minIdx) / 2;
91+
if (prob > cumulativeDistribution_.get(middlePoint)) {
92+
minIdx = middlePoint;
93+
} else {
94+
maxIdx = middlePoint;
95+
}
96+
}
97+
return maxIdx;
98+
}
99+
100+
public int getPostBrowserId(Random randomDiffBrowser, Random randomBrowser, int userBrowserId) {
101+
double prob = randomDiffBrowser.nextDouble();
102+
return (prob < probAnotherBrowser_) ? getRandomBrowserId(randomBrowser) : userBrowserId;
103+
}
104+
}

0 commit comments

Comments
 (0)