Skip to content

Commit ef1720a

Browse files
author
Hadoop user
committed
2 parents 4021b69 + 8e1642e commit ef1720a

36 files changed

+904
-258
lines changed

pom.xml

Lines changed: 94 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,41 @@
11
<project xmlns="http://maven.apache.org/POM/4.0.0"
22
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
33
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
4-
http://maven.apache.org/xsd/maven-4.0.0.xsd">
5-
6-
4+
http://maven.apache.org/xsd/maven-4.0.0.xsd">
75
<modelVersion>4.0.0</modelVersion>
6+
87
<groupId>ldbc.snb.datagen</groupId>
98
<artifactId>ldbc_snb_datagen</artifactId>
109
<version>0.2.5</version>
11-
<dependencies>
12-
<dependency>
13-
<groupId>junit</groupId>
14-
<artifactId>junit</artifactId>
15-
<version>4.12</version>
16-
<scope>test</scope>
17-
</dependency>
18-
<dependency>
19-
<groupId>xerces</groupId>
20-
<artifactId>xercesImpl</artifactId>
21-
<version>2.9.1</version>
22-
</dependency>
23-
<dependency>
24-
<groupId>xalan</groupId>
25-
<artifactId>xalan</artifactId>
26-
<version>2.7.1</version>
27-
</dependency>
28-
<dependency>
29-
<groupId>org.jdom</groupId>
30-
<artifactId>jdom</artifactId>
31-
<version>1.1.3</version>
32-
</dependency>
10+
<packaging>jar</packaging>
11+
12+
<dependencies>
13+
<dependency>
14+
<groupId>junit</groupId>
15+
<artifactId>junit</artifactId>
16+
<version>4.12</version>
17+
<scope>test</scope>
18+
</dependency>
19+
<dependency>
20+
<groupId>xerces</groupId>
21+
<artifactId>xercesImpl</artifactId>
22+
<version>2.9.1</version>
23+
</dependency>
24+
<dependency>
25+
<groupId>xalan</groupId>
26+
<artifactId>xalan</artifactId>
27+
<version>2.7.1</version>
28+
</dependency>
29+
<dependency>
30+
<groupId>org.jdom</groupId>
31+
<artifactId>jdom</artifactId>
32+
<version>1.1.3</version>
33+
</dependency>
3334
<dependency>
3435
<groupId>org.apache.hadoop</groupId>
3536
<artifactId>hadoop-client</artifactId>
3637
<version>2.6.0</version>
37-
</dependency>
38+
</dependency>
3839
<dependency>
3940
<groupId>ca.umontreal.iro</groupId>
4041
<artifactId>ssj</artifactId>
@@ -45,53 +46,72 @@
4546
<artifactId>gson</artifactId>
4647
<version>2.2.4</version>
4748
</dependency>
48-
<dependency>
49-
<groupId>org.codehaus.groovy</groupId>
50-
<artifactId>groovy</artifactId>
51-
<version>2.1.6</version>
52-
</dependency>
53-
<dependency>
54-
<groupId>org.codehaus.groovy</groupId>
55-
<artifactId>groovy-templates</artifactId>
56-
<version>2.1.6</version>
57-
</dependency>
58-
<dependency>
59-
<groupId>org.codehaus.groovy</groupId>
60-
<artifactId>groovy-jsr223</artifactId>
61-
<version>2.1.6</version>
62-
</dependency>
63-
<dependency>
64-
<groupId>org.apache.commons</groupId>
65-
<artifactId>commons-math3</artifactId>
66-
<version>3.4.1</version>
67-
</dependency>
49+
<dependency>
50+
<groupId>org.codehaus.groovy</groupId>
51+
<artifactId>groovy</artifactId>
52+
<version>2.1.6</version>
53+
</dependency>
54+
<dependency>
55+
<groupId>org.codehaus.groovy</groupId>
56+
<artifactId>groovy-templates</artifactId>
57+
<version>2.1.6</version>
58+
</dependency>
59+
<dependency>
60+
<groupId>org.codehaus.groovy</groupId>
61+
<artifactId>groovy-jsr223</artifactId>
62+
<version>2.1.6</version>
63+
</dependency>
64+
<dependency>
65+
<groupId>org.apache.commons</groupId>
66+
<artifactId>commons-math3</artifactId>
67+
<version>3.4.1</version>
68+
</dependency>
69+
<dependency>
70+
<groupId>org.python</groupId>
71+
<artifactId>jython</artifactId>
72+
<version>2.7.0</version>
73+
</dependency>
6874
</dependencies>
69-
<build>
70-
<plugins>
71-
<plugin>
72-
<artifactId>maven-assembly-plugin</artifactId>
73-
<version>2.4</version>
74-
<configuration>
75-
<descriptorRefs>
76-
<descriptorRef>jar-with-dependencies</descriptorRef>
77-
</descriptorRefs>
78-
<archive>
79-
<manifest>
80-
<mainClass>ldbc.snb.datagen.generator.LDBCDatagen</mainClass>
81-
</manifest>
82-
</archive>
83-
<finalName>ldbc_snb_datagen</finalName>
84-
<appendAssemblyId>false</appendAssemblyId>
85-
</configuration>
86-
</plugin>
87-
<plugin>
88-
<groupId>org.apache.maven.plugins</groupId>
89-
<artifactId>maven-compiler-plugin</artifactId>
90-
<version>3.1</version>
91-
<configuration>
92-
<compilerVersion>1.6</compilerVersion>
93-
</configuration>
94-
</plugin>
95-
</plugins>
96-
</build>
75+
<build>
76+
<plugins>
77+
<plugin>
78+
<groupId>org.apache.maven.plugins</groupId>
79+
<artifactId>maven-compiler-plugin</artifactId>
80+
<version>3.5.1</version>
81+
<configuration>
82+
<source>1.8</source>
83+
<target>1.8</target>
84+
</configuration>
85+
</plugin>
86+
<plugin>
87+
<groupId>org.apache.maven.plugins</groupId>
88+
<artifactId>maven-jar-plugin</artifactId>
89+
<configuration>
90+
<archive>
91+
<manifest>
92+
<addClasspath>true</addClasspath>
93+
</manifest>
94+
</archive>
95+
</configuration>
96+
</plugin>
97+
<plugin>
98+
<groupId>org.apache.maven.plugins</groupId>
99+
<artifactId>maven-dependency-plugin</artifactId>
100+
<executions>
101+
<execution>
102+
<id>copy-dependencies</id>
103+
<phase>package</phase>
104+
<goals>
105+
<goal>copy-dependencies</goal>
106+
</goals>
107+
<configuration>
108+
<outputDirectory>${project.build.directory}</outputDirectory>
109+
<overWriteReleases>false</overWriteReleases>
110+
<overWriteSnapshots>true</overWriteSnapshots>
111+
</configuration>
112+
</execution>
113+
</executions>
114+
</plugin>
115+
</plugins>
116+
</build>
97117
</project>

run.sh

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#!/bin/bash
22
DEFAULT_HADOOP_HOME=/home/user/hadoop-2.6.0 #change to your hadoop folder
3-
DEFAULT_LDBC_SNB_DATAGEN_HOME=/home/user/ldbc_snb_datagen_0.2 #change to your ldbc_socialnet_dbgen folder
4-
PARAM_GENERATION=1 #param generation
3+
DEFAULT_LDBC_SNB_DATAGEN_HOME=/home/user/ldbc_snb_datagen #change to your ldbc_socialnet_dbgen folder
54

65
# allow overriding configuration from outside via environment variables
76
# i.e. you can do
@@ -14,22 +13,13 @@ export HADOOP_HOME
1413
export LDBC_SNB_DATAGEN_HOME
1514

1615
mvn clean
17-
mvn -DskipTests assembly:assembly
16+
mvn -DskipTests package
1817

19-
cp $LDBC_SNB_DATAGEN_HOME/target/ldbc_snb_datagen.jar $LDBC_SNB_DATAGEN_HOME/
20-
rm $LDBC_SNB_DATAGEN_HOME/target/ldbc_snb_datagen.jar
18+
$HADOOP_HOME/bin/hadoop jar $LDBC_SNB_DATAGEN_HOME/target/ldbc_snb_datagen-0.2.5.jar ldbc.snb.datagen.generator.LDBCDatagen $LDBC_SNB_DATAGEN_HOME/params.ini
2119

22-
$HADOOP_HOME/bin/hadoop jar $LDBC_SNB_DATAGEN_HOME/ldbc_snb_datagen.jar $LDBC_SNB_DATAGEN_HOME/params.ini
23-
24-
if [ $PARAM_GENERATION -eq 1 ]
25-
then
26-
mkdir -p substitution_parameters
27-
python paramgenerator/generateparams.py $LDBC_SNB_DATAGEN_HOME substitution_parameters/
28-
python paramgenerator/generateparamsbi.py $LDBC_SNB_DATAGEN_HOME substitution_parameters/
29-
rm -f m*personFactors*
30-
rm -f .m*personFactors*
31-
rm -f m*activityFactors*
32-
rm -f .m*activityFactors*
33-
rm -f m0friendList*
34-
rm -f .m0friendList*
35-
fi
20+
rm -f m*personFactors*
21+
rm -f .m*personFactors*
22+
rm -f m*activityFactors*
23+
rm -f .m*activityFactors*
24+
rm -f m0friendList*
25+
rm -f .m0friendList*

src/main/java/ldbc/snb/datagen/dictionary/FlashmobTagDictionary.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ private void initialize() {
117117
flashmobTags.put(tags[i], instances);
118118
}
119119
FlashmobTag flashmobTag = new FlashmobTag();
120-
flashmobTag.date = dateGen.randomDateInMillis(random, dateGen.getStartDateTime(), dateGen.getEndDateTime());
120+
flashmobTag.date = dateGen.randomDate(random, dateGen.getStartDateTime());
121121
flashmobTag.level = levelGenerator.getValue(random);
122122
sumLevels += flashmobTag.level;
123123
flashmobTag.tag = tags[i];

src/main/java/ldbc/snb/datagen/dictionary/TagTextDictionary.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,10 @@ public String generateText(Random randomTextSize, TreeSet<Integer> tags, int tex
163163
if (returnString.length() < textSize - 1) {
164164
returnString.append(" ");
165165
}
166+
if (returnString.length() > textSize) {
167+
returnString.delete(textSize-1,returnString.length());
168+
returnString.trimToSize();
169+
}
166170
return returnString.toString().replace("|", " ");
167171
}
168172
}

src/main/java/ldbc/snb/datagen/generator/CommentGenerator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ public long createComments(RandomGeneratorFarm randomFarm, final Forum forum, fi
8282
}
8383

8484
long creationDate = Dictionaries.dates.powerlawCommDateDay(randomFarm.get(RandomGeneratorFarm.Aspect.DATE),replyTo.creationDate()+DatagenParams.deltaTime);
85-
if( creationDate <= Dictionaries.dates.getEndDateTime() ) {
85+
/*if( creationDate <= Dictionaries.dates.getEndDateTime() )*/ {
8686
Comment comment = new Comment(SN.formId(SN.composeId(nextId++,creationDate)),
8787
creationDate,
8888
member.person(),

src/main/java/ldbc/snb/datagen/generator/DateGenerator.java

Lines changed: 42 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ public DateGenerator(Configuration conf, GregorianCalendar from, GregorianCalend
7979
toBirthDay_ = tobirthCalendar.getTimeInMillis();
8080
calendar_ = new GregorianCalendar();
8181
calendar_.setTimeZone(TimeZone.getTimeZone("GMT"));
82-
updateThreshold_ = getMaxDateTime() - (long)((getMaxDateTime() - getStartDateTime())*(DatagenParams.updatePortion));
82+
//updateThreshold_ = getMaxDateTime() - (long)((getMaxDateTime() - getStartDateTime())*(DatagenParams.updatePortion));
83+
updateThreshold_ = getEndDateTime() - (long)((getEndDateTime() - getStartDateTime())*(DatagenParams.updatePortion));
8384

8485
try {
8586
dateFormatter_ = (DateFormatter) Class.forName(conf.get("ldbc.snb.datagen.serializer.dateFormatter")).newInstance();
@@ -93,7 +94,7 @@ public DateGenerator(Configuration conf, GregorianCalendar from, GregorianCalend
9394
/*
9495
* Date between from and to
9596
*/
96-
public Long randomDateInMillis(Random random) {
97+
public Long randomPersonCreationDate(Random random) {
9798
long date = (long) (random.nextDouble() * (to_ - from_) + from_);
9899
calendar_.setTime(new Date(date));
99100
return calendar_.getTimeInMillis();
@@ -136,71 +137,67 @@ public static boolean isTravelSeason(long date) {
136137
}
137138
}
138139

139-
/*
140-
* Format date in xsd:dateTime format
141-
*/
142-
/* public String formatDateTime(Long date) {
143-
calendar.setTimeInMillis(date);
144-
String dateString = formatDate(calendar);
145-
return dateString + "T00:00:00";
146-
}
147-
148-
public String formatDateTime(GregorianCalendar date) {
149-
String dateString = formatDate(date);
150-
return dateString + "T00:00:00";
151-
}
152-
*/
153-
154140
public int getNumberOfMonths(long date, int startMonth, int startYear) {
155141
calendar_.setTimeInMillis(date);
156142
int month = calendar_.get(Calendar.MONTH) + 1;
157143
int year = calendar_.get(Calendar.YEAR);
158-
159144
return (year - startYear) * 12 + month - startMonth;
160145
}
161146

162-
public Long randomDateInMillis(Random random, Long from, Long to) {
163-
long date = (long) (random.nextDouble() * (to - from) + from);
164-
calendar_.setTime(new Date(date));
165-
166-
return calendar_.getTimeInMillis();
167-
}
168-
169-
public Long randomThirtyDaysSpan(Random random, Long from) {
170-
long randomSpanMilis = (long) (random.nextDouble() * (THIRTY_DAYS));
171-
return (from + randomSpanMilis);
172-
}
173-
174147
public long randomKnowsCreationDate(Random random, Person personA, Person personB) {
175-
long fromDate = Math.max(personA.creationDate(), personB.creationDate());
176-
long randomSpanMilis = (long) (random.nextDouble() * (THIRTY_DAYS));
177-
return Math.min(fromDate + randomSpanMilis, getEndDateTime());
148+
long fromDate = Math.max(personA.creationDate(), personB.creationDate()) + DatagenParams.deltaTime;
149+
//long randomSpanMilis = (long) (random.nextDouble() * (THIRTY_DAYS));
150+
//return Math.min(fromDate + randomSpanMilis, getEndDateTime() + DatagenParams.deltaTime);
151+
return randomDate(random, fromDate, fromDate + THIRTY_DAYS);
178152
}
179153

180154
public long numberOfMonths(Person user) {
181155
return numberOfMonths(user.creationDate());
182156
}
183157

184158
public long numberOfMonths(long fromDate) {
185-
return (to_ - fromDate) / THIRTY_DAYS;
159+
return (to_ - fromDate) / THIRTY_DAYS;
186160
}
187161

188-
public long randomDate(Random random, long minDate) {
189-
return (long) (random.nextDouble() * (to_ - minDate) + minDate);
162+
/*public long randomDate(Random random, long minDate) {
163+
return (long) (random.nextDouble() * (to_+ DatagenParams.deltaTime - minDate) + minDate);
190164
}
191165
192-
public long randomSevenDays(Random random) {
193-
return (long) (random.nextDouble() * DateGenerator.SEVEN_DAYS);
194-
}
166+
public long randomDate(Random random, long minDate, long maxDate) {
167+
long to = Math.min(maxDate, to_ + DatagenParams.deltaTime);
168+
return (long) (random.nextDouble() * (to - minDate) + minDate);
169+
}
195170
196-
public long powerlawCommDateDay(Random random, long lastCommentCreatedDate) {
197-
return (long) (powerDist_.getDouble(random) * ONE_DAY + lastCommentCreatedDate);
198-
}
171+
public long powerlawCommDateDay(Random random, long lastCommentCreatedDate) {
172+
long date = (long) (powerDist_.getDouble(random) * ONE_DAY + lastCommentCreatedDate);
173+
return Math.min(to_+DatagenParams.deltaTime,date);
174+
}*/
175+
176+
177+
public long randomDate(Random random, long minDate) {
178+
long to = Math.max(minDate+THIRTY_DAYS, to_);
179+
return (long) (random.nextDouble() * (to - minDate) + minDate);
180+
}
181+
182+
public long randomDate(Random random, long minDate, long maxDate) {
183+
long to = maxDate;
184+
return (long) (random.nextDouble() * (to - minDate) + minDate);
185+
}
186+
187+
public long powerlawCommDateDay(Random random, long lastCommentCreatedDate) {
188+
long date = (long) (powerDist_.getDouble(random) * ONE_DAY + lastCommentCreatedDate);
189+
return date;
190+
}
191+
192+
193+
194+
public long randomSevenDays(Random random) {
195+
return (long) (random.nextDouble() * DateGenerator.SEVEN_DAYS);
196+
}
199197

200198
// The birthday is fixed during 1980 --> 1990
201199
public long getBirthDay(Random random, long userCreatedDate) {
202200
calendar_.setTimeInMillis(((long)(random.nextDouble() * (toBirthDay_ - fromBirthDay_)) + fromBirthDay_));
203-
// GregorianCalendar aux_calendar = new GregorianCalendar(TimeZone.getTimeZone("GMT"));
204201
GregorianCalendar aux_calendar = new GregorianCalendar(calendar_.get(Calendar.YEAR),calendar_.get(Calendar.MONTH), calendar_.get(Calendar.DAY_OF_MONTH),0,0,0);
205202
aux_calendar.setTimeZone(TimeZone.getTimeZone("GMT"));
206203
return aux_calendar.getTimeInMillis();
@@ -247,9 +244,10 @@ public long getEndDateTime() {
247244
return to_;
248245
}
249246

250-
public long getMaxDateTime() {
247+
/* public long getMaxDateTime() {
251248
return to_ + SEVEN_DAYS + deltaTime_;
252249
}
250+
*/
253251

254252
public long getUpdateThreshold() {
255253
return updateThreshold_;

0 commit comments

Comments
 (0)