Skip to content

Commit 2b30303

Browse files
committed
Fixed several bugs related to dates and query parameters
1 parent 5c8e26a commit 2b30303

25 files changed

+315
-167
lines changed

run.sh

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#!/bin/bash
22
DEFAULT_HADOOP_HOME=/home/user/hadoop-2.6.0 #change to your hadoop folder
3-
DEFAULT_LDBC_SNB_DATAGEN_HOME=/home/user/ldbc_snb_datagen_0.2 #change to your ldbc_socialnet_dbgen folder
4-
PARAM_GENERATION=1 #param generation
3+
DEFAULT_LDBC_SNB_DATAGEN_HOME=/home/user/ldbc_snb_datagen #change to your ldbc_socialnet_dbgen folder
54

65
# allow overriding configuration from outside via environment variables
76
# i.e. you can do
@@ -14,22 +13,13 @@ export HADOOP_HOME
1413
export LDBC_SNB_DATAGEN_HOME
1514

1615
mvn clean
17-
mvn -DskipTests assembly:assembly
16+
mvn -DskipTests package
1817

19-
cp $LDBC_SNB_DATAGEN_HOME/target/ldbc_snb_datagen.jar $LDBC_SNB_DATAGEN_HOME/
20-
rm $LDBC_SNB_DATAGEN_HOME/target/ldbc_snb_datagen.jar
18+
$HADOOP_HOME/bin/hadoop jar $LDBC_SNB_DATAGEN_HOME/target/ldbc_snb_datagen-0.2.5.jar ldbc.snb.datagen.generator.LDBCDatagen $LDBC_SNB_DATAGEN_HOME/params.ini
2119

22-
$HADOOP_HOME/bin/hadoop jar $LDBC_SNB_DATAGEN_HOME/ldbc_snb_datagen.jar $LDBC_SNB_DATAGEN_HOME/params.ini
23-
24-
if [ $PARAM_GENERATION -eq 1 ]
25-
then
26-
mkdir -p substitution_parameters
27-
python paramgenerator/generateparams.py $LDBC_SNB_DATAGEN_HOME substitution_parameters/
28-
python paramgenerator/generateparamsbi.py $LDBC_SNB_DATAGEN_HOME substitution_parameters/
29-
rm -f m*personFactors*
30-
rm -f .m*personFactors*
31-
rm -f m*activityFactors*
32-
rm -f .m*activityFactors*
33-
rm -f m0friendList*
34-
rm -f .m0friendList*
35-
fi
20+
rm -f m*personFactors*
21+
rm -f .m*personFactors*
22+
rm -f m*activityFactors*
23+
rm -f .m*activityFactors*
24+
rm -f m0friendList*
25+
rm -f .m0friendList*

src/main/java/ldbc/snb/datagen/dictionary/FlashmobTagDictionary.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ private void initialize() {
117117
flashmobTags.put(tags[i], instances);
118118
}
119119
FlashmobTag flashmobTag = new FlashmobTag();
120-
flashmobTag.date = dateGen.randomDateInMillis(random, dateGen.getStartDateTime(), dateGen.getEndDateTime());
120+
flashmobTag.date = dateGen.randomDate(random, dateGen.getStartDateTime());
121121
flashmobTag.level = levelGenerator.getValue(random);
122122
sumLevels += flashmobTag.level;
123123
flashmobTag.tag = tags[i];

src/main/java/ldbc/snb/datagen/generator/CommentGenerator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ public long createComments(RandomGeneratorFarm randomFarm, final Forum forum, fi
8282
}
8383

8484
long creationDate = Dictionaries.dates.powerlawCommDateDay(randomFarm.get(RandomGeneratorFarm.Aspect.DATE),replyTo.creationDate()+DatagenParams.deltaTime);
85-
if( creationDate <= Dictionaries.dates.getEndDateTime() ) {
85+
/*if( creationDate <= Dictionaries.dates.getEndDateTime() )*/ {
8686
Comment comment = new Comment(SN.formId(SN.composeId(nextId++,creationDate)),
8787
creationDate,
8888
member.person(),

src/main/java/ldbc/snb/datagen/generator/DateGenerator.java

Lines changed: 42 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ public DateGenerator(Configuration conf, GregorianCalendar from, GregorianCalend
7979
toBirthDay_ = tobirthCalendar.getTimeInMillis();
8080
calendar_ = new GregorianCalendar();
8181
calendar_.setTimeZone(TimeZone.getTimeZone("GMT"));
82-
updateThreshold_ = getMaxDateTime() - (long)((getMaxDateTime() - getStartDateTime())*(DatagenParams.updatePortion));
82+
//updateThreshold_ = getMaxDateTime() - (long)((getMaxDateTime() - getStartDateTime())*(DatagenParams.updatePortion));
83+
updateThreshold_ = getEndDateTime() - (long)((getEndDateTime() - getStartDateTime())*(DatagenParams.updatePortion));
8384

8485
try {
8586
dateFormatter_ = (DateFormatter) Class.forName(conf.get("ldbc.snb.datagen.serializer.dateFormatter")).newInstance();
@@ -93,7 +94,7 @@ public DateGenerator(Configuration conf, GregorianCalendar from, GregorianCalend
9394
/*
9495
* Date between from and to
9596
*/
96-
public Long randomDateInMillis(Random random) {
97+
public Long randomPersonCreationDate(Random random) {
9798
long date = (long) (random.nextDouble() * (to_ - from_) + from_);
9899
calendar_.setTime(new Date(date));
99100
return calendar_.getTimeInMillis();
@@ -136,71 +137,67 @@ public static boolean isTravelSeason(long date) {
136137
}
137138
}
138139

139-
/*
140-
* Format date in xsd:dateTime format
141-
*/
142-
/* public String formatDateTime(Long date) {
143-
calendar.setTimeInMillis(date);
144-
String dateString = formatDate(calendar);
145-
return dateString + "T00:00:00";
146-
}
147-
148-
public String formatDateTime(GregorianCalendar date) {
149-
String dateString = formatDate(date);
150-
return dateString + "T00:00:00";
151-
}
152-
*/
153-
154140
public int getNumberOfMonths(long date, int startMonth, int startYear) {
155141
calendar_.setTimeInMillis(date);
156142
int month = calendar_.get(Calendar.MONTH) + 1;
157143
int year = calendar_.get(Calendar.YEAR);
158-
159144
return (year - startYear) * 12 + month - startMonth;
160145
}
161146

162-
public Long randomDateInMillis(Random random, Long from, Long to) {
163-
long date = (long) (random.nextDouble() * (to - from) + from);
164-
calendar_.setTime(new Date(date));
165-
166-
return calendar_.getTimeInMillis();
167-
}
168-
169-
public Long randomThirtyDaysSpan(Random random, Long from) {
170-
long randomSpanMilis = (long) (random.nextDouble() * (THIRTY_DAYS));
171-
return (from + randomSpanMilis);
172-
}
173-
174147
public long randomKnowsCreationDate(Random random, Person personA, Person personB) {
175-
long fromDate = Math.max(personA.creationDate(), personB.creationDate());
176-
long randomSpanMilis = (long) (random.nextDouble() * (THIRTY_DAYS));
177-
return Math.min(fromDate + randomSpanMilis, getEndDateTime());
148+
long fromDate = Math.max(personA.creationDate(), personB.creationDate()) + DatagenParams.deltaTime;
149+
//long randomSpanMilis = (long) (random.nextDouble() * (THIRTY_DAYS));
150+
//return Math.min(fromDate + randomSpanMilis, getEndDateTime() + DatagenParams.deltaTime);
151+
return randomDate(random, fromDate, fromDate + THIRTY_DAYS);
178152
}
179153

180154
public long numberOfMonths(Person user) {
181155
return numberOfMonths(user.creationDate());
182156
}
183157

184158
public long numberOfMonths(long fromDate) {
185-
return (to_ - fromDate) / THIRTY_DAYS;
159+
return (to_ - fromDate) / THIRTY_DAYS;
186160
}
187161

188-
public long randomDate(Random random, long minDate) {
189-
return (long) (random.nextDouble() * (to_ - minDate) + minDate);
162+
/*public long randomDate(Random random, long minDate) {
163+
return (long) (random.nextDouble() * (to_+ DatagenParams.deltaTime - minDate) + minDate);
190164
}
191165
192-
public long randomSevenDays(Random random) {
193-
return (long) (random.nextDouble() * DateGenerator.SEVEN_DAYS);
194-
}
166+
public long randomDate(Random random, long minDate, long maxDate) {
167+
long to = Math.min(maxDate, to_ + DatagenParams.deltaTime);
168+
return (long) (random.nextDouble() * (to - minDate) + minDate);
169+
}
195170
196-
public long powerlawCommDateDay(Random random, long lastCommentCreatedDate) {
197-
return (long) (powerDist_.getDouble(random) * ONE_DAY + lastCommentCreatedDate);
198-
}
171+
public long powerlawCommDateDay(Random random, long lastCommentCreatedDate) {
172+
long date = (long) (powerDist_.getDouble(random) * ONE_DAY + lastCommentCreatedDate);
173+
return Math.min(to_+DatagenParams.deltaTime,date);
174+
}*/
175+
176+
177+
public long randomDate(Random random, long minDate) {
178+
long to = Math.max(minDate+THIRTY_DAYS, to_);
179+
return (long) (random.nextDouble() * (to - minDate) + minDate);
180+
}
181+
182+
public long randomDate(Random random, long minDate, long maxDate) {
183+
long to = maxDate;
184+
return (long) (random.nextDouble() * (to - minDate) + minDate);
185+
}
186+
187+
public long powerlawCommDateDay(Random random, long lastCommentCreatedDate) {
188+
long date = (long) (powerDist_.getDouble(random) * ONE_DAY + lastCommentCreatedDate);
189+
return date;
190+
}
191+
192+
193+
194+
public long randomSevenDays(Random random) {
195+
return (long) (random.nextDouble() * DateGenerator.SEVEN_DAYS);
196+
}
199197

200198
// The birthday is fixed during 1980 --> 1990
201199
public long getBirthDay(Random random, long userCreatedDate) {
202200
calendar_.setTimeInMillis(((long)(random.nextDouble() * (toBirthDay_ - fromBirthDay_)) + fromBirthDay_));
203-
// GregorianCalendar aux_calendar = new GregorianCalendar(TimeZone.getTimeZone("GMT"));
204201
GregorianCalendar aux_calendar = new GregorianCalendar(calendar_.get(Calendar.YEAR),calendar_.get(Calendar.MONTH), calendar_.get(Calendar.DAY_OF_MONTH),0,0,0);
205202
aux_calendar.setTimeZone(TimeZone.getTimeZone("GMT"));
206203
return aux_calendar.getTimeInMillis();
@@ -247,9 +244,10 @@ public long getEndDateTime() {
247244
return to_;
248245
}
249246

250-
public long getMaxDateTime() {
247+
/* public long getMaxDateTime() {
251248
return to_ + SEVEN_DAYS + deltaTime_;
252249
}
250+
*/
253251

254252
public long getUpdateThreshold() {
255253
return updateThreshold_;

src/main/java/ldbc/snb/datagen/generator/DistanceKnowsGenerator.java

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,4 @@ boolean know( Person personA, Person personB, int dist, ArrayList<Float> percent
4646
return false;
4747
}
4848

49-
void createKnow( Person personA, Person personB ) {
50-
long creationDate = Dictionaries.dates.randomKnowsCreationDate(
51-
randomFarm.get(RandomGeneratorFarm.Aspect.DATE),
52-
personA,
53-
personB);
54-
creationDate = creationDate - personA.creationDate() >= DatagenParams.deltaTime ? creationDate : creationDate + (DatagenParams.deltaTime - (creationDate - personA.creationDate()));
55-
creationDate = creationDate - personB.creationDate() >= DatagenParams.deltaTime ? creationDate : creationDate + (DatagenParams.deltaTime - (creationDate - personB.creationDate()));
56-
if( creationDate <= Dictionaries.dates.getEndDateTime() ) {
57-
float similarity = Person.personSimilarity.Similarity(personA,personB);
58-
personB.knows().add(new Knows(personA, creationDate, similarity));
59-
personA.knows().add(new Knows(personB, creationDate, similarity));
60-
}
61-
}
6249
}

src/main/java/ldbc/snb/datagen/generator/FlashmobPostGenerator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ protected PostGenerator.PostInfo generatePostInfo( Random randomTag, Random rand
129129
}
130130
double prob = dateDistribution_.nextDouble(randomDate);
131131
postInfo.date = flashmobTag.date - flashmobSpan_/2 + (long)(prob * flashmobSpan_);
132-
if( postInfo.date > Dictionaries.dates.getEndDateTime() ) return null;
132+
//if( postInfo.date > Dictionaries.dates.getEndDateTime() ) return null;
133133
return postInfo;
134134
}
135135
}

src/main/java/ldbc/snb/datagen/generator/ForumGenerator.java

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,16 @@ public Forum createWall(RandomGeneratorFarm randomFarm, long forumId, Person per
3838

3939
TreeSet<Knows> knows = person.knows();
4040
for ( Knows k : knows ) {
41-
forum.addMember(new ForumMembership(forum.id(), k.creationDate()+DatagenParams.deltaTime, k.to()));
41+
long date = Math.max(k.creationDate(), forum.creationDate()+DatagenParams.deltaTime);
42+
assert (forum.creationDate() + DatagenParams.deltaTime) <= date : "Forum creation date is larger than knows in wall "+forum.creationDate()+ " " +k.creationDate();
43+
forum.addMember(new ForumMembership(forum.id(), date, k.to()));
4244
}
4345
return forum;
4446
}
4547

4648
public Forum createGroup(RandomGeneratorFarm randomFarm, long forumId, Person person, ArrayList<Person> persons){
4749
long date = Dictionaries.dates.randomDate(randomFarm.get(RandomGeneratorFarm.Aspect.DATE), person.creationDate() + DatagenParams.deltaTime);
48-
if( date > Dictionaries.dates.getEndDateTime() ) return null;
50+
//if( date > Dictionaries.dates.getEndDateTime() ) return null;
4951

5052
int language = randomFarm.get(RandomGeneratorFarm.Aspect.LANGUAGE).nextInt(person.languages().size());
5153
Iterator<Integer> iter = person.interests().iterator();
@@ -82,21 +84,23 @@ public Forum createGroup(RandomGeneratorFarm randomFarm, long forumId, Person pe
8284
if (!added.contains(k.to().accountId())) {
8385
Random random = randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX);
8486
date = Dictionaries.dates.randomDate(random,Math.max(forum.creationDate(), k.creationDate()+DatagenParams.deltaTime));
85-
if( date < Dictionaries.dates.getEndDateTime() ) {
87+
assert forum.creationDate() +DatagenParams.deltaTime <= date : "Forum creation date larger than membership date for knows based members";
88+
/*if( date < Dictionaries.dates.getEndDateTime() )*/ {
8689
forum.addMember(new ForumMembership(forum.id(), date, k.to()));
8790
added.add(k.to().accountId());
8891
}
8992
}
9093
} else {
91-
int friendIdx = randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX).nextInt(persons.size());
92-
Person member = persons.get(friendIdx);
94+
int candidateIndex = randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX).nextInt(persons.size());
95+
Person member = persons.get(candidateIndex);
9396
prob = randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP).nextDouble();
9497
if (prob < 0.1) {
9598
if (!added.contains(member.accountId())) {
9699
added.add(member.accountId());
97100
Random random = randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX);
98101
date = Dictionaries.dates.randomDate(random,Math.max(forum.creationDate(), member.creationDate()+DatagenParams.deltaTime));
99-
if( date < Dictionaries.dates.getEndDateTime() ) {
102+
/*if( date < Dictionaries.dates.getEndDateTime() )*/ {
103+
assert forum.creationDate() +DatagenParams.deltaTime <= date : "Forum creation date larger than membership date for block based members";
100104
forum.addMember(new ForumMembership(forum.id(), date, new Person.PersonSummary(member)));
101105
added.add(member.accountId());
102106
}
@@ -110,7 +114,7 @@ public Forum createGroup(RandomGeneratorFarm randomFarm, long forumId, Person pe
110114

111115
public Forum createAlbum(RandomGeneratorFarm randomFarm, long forumId, Person person, int numAlbum) {
112116
long date = Dictionaries.dates.randomDate(randomFarm.get(RandomGeneratorFarm.Aspect.DATE), person.creationDate() + DatagenParams.deltaTime);
113-
if( date > Dictionaries.dates.getEndDateTime() ) return null;
117+
//if( date > Dictionaries.dates.getEndDateTime() ) return null;
114118
int language = randomFarm.get(RandomGeneratorFarm.Aspect.LANGUAGE).nextInt(person.languages().size());
115119
Forum forum = new Forum(SN.formId(SN.composeId(forumId,date)),
116120
date,
@@ -140,7 +144,7 @@ public Forum createAlbum(RandomGeneratorFarm randomFarm, long forumId, Person pe
140144
if (prob < 0.7) {
141145
Random random = randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX);
142146
date = Dictionaries.dates.randomDate(random,Math.max(forum.creationDate(), k.creationDate()+DatagenParams.deltaTime));
143-
if( date < Dictionaries.dates.getEndDateTime() ) {
147+
/*if( date < Dictionaries.dates.getEndDateTime() )*/ {
144148
forum.addMember(new ForumMembership(forum.id(), date, k.to()));
145149
}
146150
}

src/main/java/ldbc/snb/datagen/generator/LDBCDatagen.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,11 @@ public static void main(String[] args) /*throws Exception*/ {
362362
LDBCDatagen datagen = new LDBCDatagen();
363363
LDBCDatagen.init(conf);
364364
datagen.runGenerateJob(conf);
365+
}catch(AssertionError e ) {
366+
System.err.println("Error during execution");
367+
System.err.println(e.getMessage());
368+
e.printStackTrace();
369+
System.exit(1);
365370
}catch(Exception e ) {
366371
System.err.println("Error during execution");
367372
System.err.println(e.getMessage());

src/main/java/ldbc/snb/datagen/generator/LikeGenerator.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,10 @@ public void generateLikes(Random random, final Forum forum, final Message messag
4343
for (int i = 0; i < numLikes; i++) {
4444
ForumMembership membership = memberships.get(startIndex+i);
4545
long minDate = message.creationDate() > memberships.get(startIndex+i).creationDate() ? message.creationDate() : membership.creationDate();
46-
long date = Math.max(Dictionaries.dates.randomSevenDays(random),DatagenParams.deltaTime) + minDate;
47-
if( date <= Dictionaries.dates.getEndDateTime() ) {
46+
//long date = Math.max(Dictionaries.dates.randomSevenDays(random),DatagenParams.deltaTime) + minDate;
47+
long date = Dictionaries.dates.randomDate(random, minDate, Dictionaries.dates.randomSevenDays(random) + minDate);
48+
/*if( date <= Dictionaries.dates.getEndDateTime() )*/ {
49+
assert((membership.person().creationDate() + DatagenParams.deltaTime) < date);
4850
like.user = membership.person().accountId();
4951
like.userCreationDate = membership.person().creationDate();
5052
like.messageId = message.messageId();

src/main/java/ldbc/snb/datagen/generator/PersonActivityGenerator.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,16 @@ public PersonActivityGenerator( PersonActivitySerializer serializer, UpdateEvent
4848
}
4949

5050
private void generateActivity( Person person, ArrayList<Person> block ) {
51-
generateWall(person, block);
52-
generateGroups(person, block);
53-
generateAlbums(person, block);
54-
if(person.creationDate() < Dictionaries.dates.getUpdateThreshold() || !DatagenParams.updateStreams ) {
51+
try {
5552
factorTable_.extractFactors(person);
53+
generateWall(person, block);
54+
generateGroups(person, block);
55+
generateAlbums(person, block);
56+
} catch (AssertionError e) {
57+
System.out.println("Assertion error when generating activity!");
58+
System.out.println(e.getMessage());
59+
e.printStackTrace();
60+
System.exit(1);
5661
}
5762
}
5863

0 commit comments

Comments
 (0)