Skip to content

Commit e7d0fbc

Browse files
committed
Merge branch 'bi' of github.com:ldbc/ldbc_snb_datagen into bi
2 parents d4fe499 + 2cfd261 commit e7d0fbc

15 files changed

+146
-52
lines changed

src/main/java/ldbc/snb/datagen/dictionary/Dictionaries.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import ldbc.snb.datagen.generator.DatagenParams;
99
import ldbc.snb.datagen.generator.DateGenerator;
10+
import org.apache.hadoop.conf.Configuration;
1011

1112
import java.util.GregorianCalendar;
1213

@@ -32,11 +33,11 @@ public class Dictionaries {
3233
public static FlashmobTagDictionary flashmobs = null;
3334

3435

35-
public static void loadDictionaries() {
36+
public static void loadDictionaries(Configuration conf) {
3637

3738
browsers = new BrowserDictionary(DatagenParams.probAnotherBrowser);
3839

39-
dates = new DateGenerator( new GregorianCalendar(DatagenParams.startYear,
40+
dates = new DateGenerator( conf, new GregorianCalendar(DatagenParams.startYear,
4041
DatagenParams.startMonth,
4142
DatagenParams.startDate),
4243
new GregorianCalendar(DatagenParams.endYear,

src/main/java/ldbc/snb/datagen/generator/DateGenerator.java

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737
package ldbc.snb.datagen.generator;
3838

3939
import ldbc.snb.datagen.objects.Person;
40+
import ldbc.snb.datagen.serializer.formatter.DateFormatter;
41+
import org.apache.hadoop.conf.Configuration;
4042

4143
import java.text.SimpleDateFormat;
4244
import java.util.*;
@@ -51,7 +53,6 @@ public class DateGenerator {
5153
public static long TEN_YEARS = 10L * ONE_YEAR;
5254
public static long THIRTY_YEARS = 30L * ONE_YEAR;
5355

54-
private Date date_;
5556
private long from_;
5657
private long to_;
5758
private long fromBirthDay_;
@@ -61,10 +62,11 @@ public class DateGenerator {
6162
private long updateThreshold_;
6263
private PowerDistGenerator powerDist_;
6364
private SimpleDateFormat gmtDateFormatter_;
65+
private DateFormatter dateFormatter_;
6466

6567
// This constructor is for the case of friendship's created date generator
66-
public DateGenerator(GregorianCalendar from, GregorianCalendar to,
67-
double alpha, long deltaTime) {
68+
public DateGenerator(Configuration conf, GregorianCalendar from, GregorianCalendar to,
69+
double alpha, long deltaTime) {
6870
from_ = from.getTimeInMillis();
6971
to_ = to.getTimeInMillis();
7072
powerDist_ = new PowerDistGenerator(0.0, 1.0, alpha);
@@ -79,9 +81,13 @@ public DateGenerator(GregorianCalendar from, GregorianCalendar to,
7981
calendar_.setTimeZone(TimeZone.getTimeZone("GMT"));
8082
updateThreshold_ = getMaxDateTime() - (long)((getMaxDateTime() - getStartDateTime())*(DatagenParams.updatePortion));
8183

82-
gmtDateFormatter_ = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ");
83-
gmtDateFormatter_.setTimeZone(TimeZone.getTimeZone("GMT"));
84-
date_ = new Date();
84+
try {
85+
dateFormatter_ = (DateFormatter) Class.forName(conf.get("ldbc.snb.datagen.serializer.dateFormatter")).newInstance();
86+
dateFormatter_.initialize(conf);
87+
} catch(Exception e) {
88+
System.err.println("Error when initializing date formatter");
89+
System.err.println(e.getMessage());
90+
}
8591
}
8692

8793
/*
@@ -97,26 +103,20 @@ public Long randomDateInMillis(Random random) {
97103
* format the date
98104
*/
99105
public String formatDate(long date) {
100-
SimpleDateFormat gmtDateFormatter = new SimpleDateFormat("yyyy-MM-dd");
101-
gmtDateFormatter.setTimeZone(TimeZone.getTimeZone("GMT"));
102-
calendar_.setTimeInMillis(date);
103-
return gmtDateFormatter.format(calendar_.getTime());
106+
return dateFormatter_.formatDate(date);
104107
}
105108

106109
public String formatYear(long date) {
107-
calendar_.setTimeInMillis(date);
108-
int year = calendar_.get(Calendar.YEAR);
109-
return year + "";
110+
calendar_.setTimeInMillis(date);
111+
int year = calendar_.get(Calendar.YEAR);
112+
return year + "";
110113
}
111114

112115
/*
113116
* format the date with hours and minutes
114117
*/
115-
public String formatDateDetail(long d) {
116-
//calendar_.setTimeInMillis(d);
117-
//return gmtDateFormatter_.format(calendar_.getTime());
118-
date_.setTime(d);
119-
return gmtDateFormatter_.format(date_);
118+
public String formatDateTime(long date) {
119+
return dateFormatter_.formatDateTime(date);
120120
}
121121

122122

src/main/java/ldbc/snb/datagen/generator/LDBCDatagen.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ public class LDBCDatagen {
5858
public static synchronized void init (Configuration conf) {
5959
if(!initialized) {
6060
DatagenParams.readConf(conf);
61-
Dictionaries.loadDictionaries();
61+
Dictionaries.loadDictionaries(conf);
6262
SN.initialize();
6363
initialized = true;
6464
}

src/main/java/ldbc/snb/datagen/objects/FlashmobTag.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,6 @@ public void copyTo(FlashmobTag t) {
5959
}
6060

6161
public String toString(){
62-
return "Level: "+level+" Date: "+Dictionaries.dates.formatDateDetail(date)+" Tag:"+ Dictionaries.tags.getName(tag);
62+
return "Level: "+level+" Date: "+Dictionaries.dates.formatDateTime(date)+" Tag:"+ Dictionaries.tags.getName(tag);
6363
}
6464
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package ldbc.snb.datagen.serializer.formatter;
2+
3+
import org.apache.hadoop.conf.Configuration;
4+
5+
/**
6+
* Created by aprat on 14/01/16.
7+
*/
8+
9+
public interface DateFormatter {
10+
public void initialize(Configuration config);
11+
public String formatDate(long date);
12+
public String formatDateTime(long date);
13+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package ldbc.snb.datagen.serializer.formatter;
2+
3+
import org.apache.hadoop.conf.Configuration;
4+
5+
import java.util.Calendar;
6+
import java.util.Date;
7+
import java.util.GregorianCalendar;
8+
import java.util.TimeZone;
9+
10+
/**
11+
* Created by aprat on 14/01/16.
12+
*/
13+
public class LongDateFormatter implements DateFormatter {
14+
private Date date_;
15+
private GregorianCalendar calendar_;
16+
public void initialize(Configuration config) {
17+
date_ = new Date();
18+
calendar_ = new GregorianCalendar(TimeZone.getTimeZone("GMT"));
19+
20+
}
21+
22+
public String formatDate(long date) {
23+
date_.setTime(date);
24+
calendar_.setTime(date_);
25+
int year = calendar_.get(Calendar.YEAR);
26+
int month = calendar_.get(Calendar.MONTH);
27+
int day = calendar_.get(Calendar.DAY_OF_MONTH);
28+
calendar_.clear();
29+
calendar_.set(year, month, day,0,0,0);
30+
return Long.toString(calendar_.getTime().getTime());
31+
}
32+
33+
public String formatDateTime(long date) {
34+
return Long.toString(date);
35+
}
36+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package ldbc.snb.datagen.serializer.formatter;
2+
3+
import org.apache.hadoop.conf.Configuration;
4+
import sun.util.calendar.Gregorian;
5+
6+
import java.text.SimpleDateFormat;
7+
import java.util.Calendar;
8+
import java.util.Date;
9+
import java.util.GregorianCalendar;
10+
import java.util.TimeZone;
11+
12+
/**
13+
* Created by aprat on 14/01/16.
14+
*/
15+
public class StringDateFormatter implements DateFormatter{
16+
17+
private String formatDateTimeString_ = "yyyy-MM-dd'T'HH:mm:ss.SSSZ";
18+
private String formatDateString_ = "yyyy-MM-dd";
19+
20+
private SimpleDateFormat gmtDateTimeFormatter_;
21+
private SimpleDateFormat gmtDateFormatter_;
22+
private GregorianCalendar calendar_;
23+
private Date date_;
24+
public void initialize(Configuration conf) {
25+
26+
formatDateTimeString_ = conf.get("ldbc.snb.datagen.serializer.formatter.StringDateFormatter.dateTimeFormat", formatDateTimeString_);
27+
gmtDateTimeFormatter_ = new SimpleDateFormat(formatDateTimeString_);
28+
gmtDateTimeFormatter_.setTimeZone(TimeZone.getTimeZone("GMT"));
29+
formatDateString_ = conf.get("ldbc.snb.datagen.serializer.formatter.StringDateFormatter.dateFormat", formatDateString_);
30+
gmtDateFormatter_ = new SimpleDateFormat(formatDateString_);
31+
gmtDateFormatter_.setTimeZone(TimeZone.getTimeZone("GMT"));
32+
date_ = new Date();
33+
}
34+
35+
public String formatDateTime(long date) {
36+
date_.setTime(date);
37+
return gmtDateTimeFormatter_.format(date_);
38+
}
39+
40+
public String formatDate(long date) {
41+
date_.setTime(date);
42+
return gmtDateFormatter_.format(date_);
43+
}
44+
45+
}

src/main/java/ldbc/snb/datagen/serializer/small/CSVPersonActivitySerializer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ protected void serialize( final Post post ) {
9090

9191
arguments.add(Long.toString(post.messageId()));
9292
arguments.add(post.content());
93-
arguments.add(Dictionaries.dates.formatDateDetail(post.creationDate()));
93+
arguments.add(Dictionaries.dates.formatDateTime(post.creationDate()));
9494
writers[FileNames.MESSAGE.ordinal()].writeEntry(arguments);
9595
arguments.clear();
9696

@@ -110,7 +110,7 @@ protected void serialize( final Post post ) {
110110
protected void serialize( final Comment comment ) {
111111
arguments.add(Long.toString(comment.messageId()));
112112
arguments.add(comment.content());
113-
arguments.add(Dictionaries.dates.formatDateDetail(comment.creationDate()));
113+
arguments.add(Dictionaries.dates.formatDateTime(comment.creationDate()));
114114
writers[FileNames.MESSAGE.ordinal()].writeEntry(arguments);
115115
arguments.clear();
116116

src/main/java/ldbc/snb/datagen/serializer/snb/interactive/CSVMergeForeignPersonActivitySerializer.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ public void close() {
127127

128128
protected void serialize(final Forum forum ) {
129129

130-
String dateString = Dictionaries.dates.formatDateDetail(forum.creationDate());
130+
String dateString = Dictionaries.dates.formatDateTime(forum.creationDate());
131131

132132
arguments.add(Long.toString(forum.id()));
133133
arguments.add(forum.title());
@@ -149,7 +149,7 @@ protected void serialize( final Post post ) {
149149

150150
arguments.add(Long.toString(post.messageId()));
151151
arguments.add(empty);
152-
arguments.add(Dictionaries.dates.formatDateDetail(post.creationDate()));
152+
arguments.add(Dictionaries.dates.formatDateTime(post.creationDate()));
153153
arguments.add(post.ipAddress().toString());
154154
arguments.add(Dictionaries.browsers.getName(post.browserId()));
155155
arguments.add(Dictionaries.languages.getLanguageName(post.language()));
@@ -171,7 +171,7 @@ protected void serialize( final Post post ) {
171171

172172
protected void serialize( final Comment comment ) {
173173
arguments.add(Long.toString(comment.messageId()));
174-
arguments.add(Dictionaries.dates.formatDateDetail(comment.creationDate()));
174+
arguments.add(Dictionaries.dates.formatDateTime(comment.creationDate()));
175175
arguments.add(comment.ipAddress().toString());
176176
arguments.add(Dictionaries.browsers.getName(comment.browserId()));
177177
arguments.add(comment.content());
@@ -200,7 +200,7 @@ protected void serialize(final Photo photo ) {
200200

201201
arguments.add(Long.toString(photo.messageId()));
202202
arguments.add(photo.content());
203-
arguments.add(Dictionaries.dates.formatDateDetail(photo.creationDate()));
203+
arguments.add(Dictionaries.dates.formatDateTime(photo.creationDate()));
204204
arguments.add(photo.ipAddress().toString());
205205
arguments.add(Dictionaries.browsers.getName(photo.browserId()));
206206
arguments.add(empty);
@@ -223,15 +223,15 @@ protected void serialize(final Photo photo ) {
223223
protected void serialize(final ForumMembership membership ) {
224224
arguments.add(Long.toString(membership.forumId()));
225225
arguments.add(Long.toString(membership.person().accountId()));
226-
arguments.add(Dictionaries.dates.formatDateDetail(membership.creationDate()));
226+
arguments.add(Dictionaries.dates.formatDateTime(membership.creationDate()));
227227
writers[FileNames.FORUM_HASMEMBER_PERSON.ordinal()].writeEntry(arguments);
228228
arguments.clear();
229229
}
230230

231231
protected void serialize( final Like like ) {
232232
arguments.add(Long.toString(like.user));
233233
arguments.add(Long.toString(like.messageId));
234-
arguments.add(Dictionaries.dates.formatDateDetail(like.date));
234+
arguments.add(Dictionaries.dates.formatDateTime(like.date));
235235
if( like.type == Like.LikeType.POST || like.type == Like.LikeType.PHOTO ) {
236236
writers[FileNames.PERSON_LIKES_POST.ordinal()].writeEntry(arguments);
237237
arguments.clear();

src/main/java/ldbc/snb/datagen/serializer/snb/interactive/CSVMergeForeignPersonSerializer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ protected void serialize(final Person p) {
120120
String dateString = Dictionaries.dates.formatDate(p.birthDay());
121121
arguments.add(dateString);
122122

123-
dateString = Dictionaries.dates.formatDateDetail(p.creationDate());
123+
dateString = Dictionaries.dates.formatDateTime(p.creationDate());
124124
arguments.add(dateString);
125125
arguments.add(p.ipAddress().toString());
126126
arguments.add(Dictionaries.browsers.getName(p.browserId()));
@@ -177,7 +177,7 @@ protected void serialize(final WorkAt workAt) {
177177
@Override
178178
protected void serialize( final Person p, Knows knows) {
179179
ArrayList<String> arguments = new ArrayList<String>();
180-
String dateString = Dictionaries.dates.formatDateDetail(knows.creationDate());
180+
String dateString = Dictionaries.dates.formatDateTime(knows.creationDate());
181181
arguments.add(Long.toString(p.accountId()));
182182
arguments.add(Long.toString(knows.to().accountId()));
183183
arguments.add(dateString);

0 commit comments

Comments
 (0)