Skip to content

Commit e1cbfa9

Browse files
committed
Changed update stream format
1 parent 3a7ac9d commit e1cbfa9

File tree

2 files changed

+65
-56
lines changed

2 files changed

+65
-56
lines changed

src/main/java/ldbc/socialnet/dbgen/objects/UpdateEvent.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,9 @@ public static void writeEvent( OutputStream os, UpdateEvent event ) {
6464
StringBuffer string = new StringBuffer();
6565
string.append(Long.toString(event.date));
6666
string.append("|");
67-
string.append(event.type.toString());
67+
string.append(Integer.toString(event.type.ordinal()+1));
6868
string.append("|");
6969
string.append(event.eventData);
70-
string.append("|");
7170
string.append("\n");
7271
//fileOutputStream.write(string.toString().getBytes("UTF8"));
7372
os.write(string.toString().getBytes("UTF8"));
@@ -85,7 +84,6 @@ public static void writeEventKeyValue( OutputStream os, UpdateEvent event ) {
8584
string.append(event.type.toString());
8685
string.append("|");
8786
string.append(event.eventData);
88-
string.append("|");
8987
string.append("\n");
9088
//fileOutputStream.write(string.toString().getBytes("UTF8"));
9189
os.write(string.toString().getBytes("UTF8"));

src/main/java/ldbc/socialnet/dbgen/serializer/UpdateEventSerializer.java

Lines changed: 64 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,9 @@ public class UpdateEventSerializer implements Serializer{
6969

7070
private SequenceFile.Writer forumStreamWriter[];
7171
private SequenceFile.Writer personStreamWriter[];
72-
private ArrayList<Object> data;
73-
private ArrayList<Object> list;
72+
private ArrayList<String> data;
73+
private ArrayList<String> list;
74+
private ArrayList<String> tuple;
7475
private UpdateEvent currentEvent;
7576
private GregorianCalendar date;
7677
private BrowserDictionary browserDic;
@@ -81,10 +82,10 @@ public class UpdateEventSerializer implements Serializer{
8182
private Statistics statistics;
8283
private long minDate;
8384
private long maxDate;
84-
private Gson gson;
8585
private long numEvents = 0;
8686
private int numPartitions = 1;
8787
private int nextPartition = 0;
88+
private StringBuffer stringBuffer;
8889

8990
public UpdateEventSerializer( String outputDir,
9091
String outputFileName,
@@ -95,8 +96,10 @@ public UpdateEventSerializer( String outputDir,
9596
LanguageDictionary languageDic,
9697
IPAddressDictionary ipDic,
9798
Statistics statistics) {
98-
gson = new GsonBuilder().disableHtmlEscaping().create();
99-
this.data = new ArrayList<Object>();
99+
this.stringBuffer = new StringBuffer(512);
100+
this.data = new ArrayList<String>();
101+
this.list = new ArrayList<String>();
102+
this.tuple = new ArrayList<String>();
100103
this.currentEvent = new UpdateEvent(-1, UpdateEvent.UpdateEventType.NO_EVENT,new String(""));
101104
this.date = new GregorianCalendar();
102105
this.browserDic = browserDic;
@@ -220,10 +223,9 @@ public void writeKeyValue( UpdateEvent event, Stream s ) {
220223
StringBuffer string = new StringBuffer();
221224
string.append(Long.toString(event.date));
222225
string.append("|");
223-
string.append(event.type.toString());
226+
string.append(Integer.toString(event.type.ordinal()+1));
224227
string.append("|");
225228
string.append(event.eventData);
226-
string.append("|");
227229
string.append("\n");
228230
switch (s) {
229231
case FORUM_STREAM:
@@ -253,19 +255,28 @@ enum Stream {
253255
PERSON_STREAM
254256
}
255257

258+
private String formatStringArray(ArrayList<String> array, String separator) {
259+
if( array.size() == 0 ) return "";
260+
stringBuffer.setLength(0);
261+
for( String s : array) {
262+
stringBuffer.append(s);
263+
stringBuffer.append(separator);
264+
}
265+
return stringBuffer.substring(0,stringBuffer.length()-1);
266+
}
267+
256268
private void endEvent( Stream s ) {
257269
numEvents++;
258-
currentEvent.eventData = gson.toJson(data);
270+
currentEvent.eventData = formatStringArray(data,"|");
259271
writeKeyValue(currentEvent, s);
260272
}
261273

262274
private void beginList() {
263-
list = new ArrayList<Object>();
264275
list.clear();
265276
}
266277

267278
private void endList() {
268-
data.add(list);
279+
data.add(formatStringArray(list,";"));
269280
}
270281

271282

@@ -332,7 +343,7 @@ public Long unitsGenerated() {
332343
public void serialize(UserInfo info) {
333344

334345
beginEvent(info.user.getCreationDate(), UpdateEvent.UpdateEventType.ADD_PERSON);
335-
data.add(info.user.getAccountId());
346+
data.add(Long.toString(info.user.getAccountId()));
336347
data.add(info.extraInfo.getFirstName());
337348
data.add(info.extraInfo.getLastName());
338349
data.add(info.extraInfo.getGender());
@@ -354,13 +365,13 @@ public void serialize(UserInfo info) {
354365
String empty = "";
355366
data.add(empty);
356367
}
357-
data.add(info.extraInfo.getLocationId());
358-
ArrayList<Object> languages = new ArrayList<Object>();
368+
data.add(Integer.toString(info.extraInfo.getLocationId()));
369+
ArrayList<String> languages = new ArrayList<String>();
359370
Vector<Integer> userLang = info.extraInfo.getLanguages();
360371
for (int i = 0; i < languages.size(); i++) {
361372
languages.add(languageDic.getLanguagesName(userLang.get(i)));
362373
}
363-
data.add(languages);
374+
data.add(formatStringArray(languages,";"));
364375

365376
beginList();
366377
Iterator<String> itString = info.extraInfo.getEmail().iterator();
@@ -373,7 +384,7 @@ public void serialize(UserInfo info) {
373384
Iterator<Integer> itInteger = info.user.getSetOfTags().iterator();
374385
while (itInteger.hasNext()){
375386
Integer interestIdx = itInteger.next();
376-
list.add(interestIdx);
387+
list.add(Integer.toString(interestIdx));
377388
}
378389
endList();
379390

@@ -382,12 +393,12 @@ public void serialize(UserInfo info) {
382393
long universityId = info.extraInfo.getUniversity();
383394
if ( universityId != -1){
384395
if (info.extraInfo.getClassYear() != -1 ) {
385-
ArrayList<Object> studyAtData = new ArrayList<Object>();
396+
ArrayList<String> studyAtData = new ArrayList<String>();
386397
date.setTimeInMillis(info.extraInfo.getClassYear());
387398
dateString = DateGenerator.formatYear(date);
388-
studyAtData.add(universityId);
389-
studyAtData.add(Integer.parseInt(dateString));
390-
list.add(studyAtData);
399+
studyAtData.add(Long.toString(universityId));
400+
studyAtData.add(dateString);
401+
list.add(formatStringArray(studyAtData,","));
391402
}
392403
}
393404
endList();
@@ -397,11 +408,11 @@ public void serialize(UserInfo info) {
397408
while (it.hasNext()) {
398409
long companyId = it.next();
399410
date.setTimeInMillis(info.extraInfo.getWorkFrom(companyId));
400-
ArrayList<Object> workAtData = new ArrayList<Object>();
411+
ArrayList<String> workAtData = new ArrayList<String>();
401412
dateString = DateGenerator.formatYear(date);
402-
workAtData.add(companyId);
403-
workAtData.add(Integer.parseInt(dateString));
404-
list.add(workAtData);
413+
workAtData.add(Long.toString(companyId));
414+
workAtData.add(dateString);
415+
list.add(formatStringArray(workAtData,","));
405416
}
406417
endList();
407418
endEvent(Stream.PERSON_STREAM);
@@ -411,8 +422,8 @@ public void serialize(UserInfo info) {
411422
public void serialize(Friend friend) {
412423
if (friend != null && friend.getCreatedTime() != -1){
413424
beginEvent(friend.getCreatedTime(), UpdateEvent.UpdateEventType.ADD_FRIENDSHIP);
414-
data.add(friend.getUserAcc());
415-
data.add(friend.getFriendAcc());
425+
data.add(Long.toString(friend.getUserAcc()));
426+
data.add(Long.toString(friend.getFriendAcc()));
416427
date.setTimeInMillis(friend.getCreatedTime());
417428
data.add(DateGenerator.formatDateDetail(date));
418429
endEvent(Stream.PERSON_STREAM);
@@ -423,7 +434,7 @@ public void serialize(Friend friend) {
423434
public void serialize(Post post) {
424435
beginEvent(post.getCreationDate(), UpdateEvent.UpdateEventType.ADD_POST);
425436
String empty = "";
426-
data.add(Long.parseLong(SN.formId(post.getMessageId())));
437+
data.add(SN.formId(post.getMessageId()));
427438
data.add(empty);
428439
date.setTimeInMillis(post.getCreationDate());
429440
String dateString = DateGenerator.formatDateDetail(date);
@@ -448,16 +459,16 @@ public void serialize(Post post) {
448459
} else {
449460
data.add(empty);
450461
}
451-
data.add(post.getTextSize());
452-
data.add(post.getAuthorId());
453-
data.add(Long.parseLong(SN.formId(post.getGroupId())));
454-
data.add(ipDic.getLocation(post.getIpAddress()));
462+
data.add(Long.toString(post.getTextSize()));
463+
data.add(Long.toString(post.getAuthorId()));
464+
data.add(SN.formId(post.getGroupId()));
465+
data.add(Long.toString(ipDic.getLocation(post.getIpAddress())));
455466

456467
beginList();
457468
Iterator<Integer> it = post.getTags().iterator();
458469
while (it.hasNext()) {
459470
Integer tagId = it.next();
460-
list.add(tagId);
471+
list.add(Integer.toString(tagId));
461472
}
462473
endList();
463474
endEvent(Stream.FORUM_STREAM);
@@ -472,8 +483,8 @@ public void serialize(Like like) {
472483
}
473484
date.setTimeInMillis(like.date);
474485
String dateString = DateGenerator.formatDateDetail(date);
475-
data.add(like.user);
476-
data.add(Long.parseLong(SN.formId(like.messageId)));
486+
data.add(Long.toString(like.user));
487+
data.add(SN.formId(like.messageId));
477488
data.add(dateString);
478489
endEvent(Stream.FORUM_STREAM);
479490
}
@@ -483,7 +494,7 @@ public void serialize(Photo photo) {
483494

484495
beginEvent(photo.getCreationDate(), UpdateEvent.UpdateEventType.ADD_POST);
485496
String empty = "";
486-
data.add(Long.parseLong(SN.formId(photo.getMessageId())));
497+
data.add(SN.formId(photo.getMessageId()));
487498
data.add(photo.getContent());
488499
date.setTimeInMillis(photo.getCreationDate());
489500
String dateString = DateGenerator.formatDateDetail(date);
@@ -500,16 +511,16 @@ public void serialize(Photo photo) {
500511
}
501512
data.add(empty);
502513
data.add(empty);
503-
data.add(0);
504-
data.add(photo.getAuthorId());
505-
data.add(Long.parseLong(SN.formId(photo.getGroupId())));
506-
data.add(ipDic.getLocation(photo.getIpAddress()));
514+
data.add("0");
515+
data.add(Long.toString(photo.getAuthorId()));
516+
data.add(SN.formId(photo.getGroupId()));
517+
data.add(Long.toString(ipDic.getLocation(photo.getIpAddress())));
507518

508519
beginList();
509520
Iterator<Integer> it = photo.getTags().iterator();
510521
while (it.hasNext()) {
511522
Integer tagId = it.next();
512-
list.add(tagId);
523+
list.add(Integer.toString(tagId));
513524
}
514525
endList();
515526
endEvent(Stream.FORUM_STREAM);
@@ -521,7 +532,7 @@ public void serialize(Comment comment) {
521532
beginEvent(comment.getCreationDate(), UpdateEvent.UpdateEventType.ADD_COMMENT);
522533
date.setTimeInMillis(comment.getCreationDate());
523534
String dateString = DateGenerator.formatDateDetail(date);
524-
data.add(Long.parseLong(SN.formId(comment.getMessageId())));
535+
data.add(SN.formId(comment.getMessageId()));
525536
data.add(dateString);
526537
if (comment.getIpAddress() != null) {
527538
data.add(comment.getIpAddress().toString());
@@ -541,21 +552,21 @@ public void serialize(Comment comment) {
541552
else {
542553
data.add("");
543554
}
544-
data.add(comment.getTextSize());
545-
data.add(comment.getAuthorId());
546-
data.add(ipDic.getLocation(comment.getIpAddress()));
555+
data.add(Integer.toString(comment.getTextSize()));
556+
data.add(Long.toString(comment.getAuthorId()));
557+
data.add(Long.toString(ipDic.getLocation(comment.getIpAddress())));
547558
if (comment.getReplyOf() == comment.getPostId()) {
548-
data.add(Long.parseLong(SN.formId(comment.getPostId())));
549-
data.add(new Long(-1));
559+
data.add(SN.formId(comment.getPostId()));
560+
data.add("-1");
550561
} else {
551-
data.add(new Long(-1));
552-
data.add(Long.parseLong(SN.formId(comment.getReplyOf())));
562+
data.add("-1");
563+
data.add(SN.formId(comment.getReplyOf()));
553564
}
554565
beginList();
555566
Iterator<Integer> it = comment.getTags().iterator();
556567
while (it.hasNext()) {
557568
Integer tagId = it.next();
558-
list.add(tagId);
569+
list.add(Integer.toString(tagId));
559570
}
560571
endList();
561572
endEvent(Stream.FORUM_STREAM);
@@ -567,15 +578,15 @@ public void serialize(Group group) {
567578
date.setTimeInMillis(group.getCreatedDate());
568579
String dateString = DateGenerator.formatDateDetail(date);
569580

570-
data.add(Long.parseLong(SN.formId(group.getGroupId())));
581+
data.add(SN.formId(group.getGroupId()));
571582
data.add(group.getGroupName());
572583
data.add(dateString);
573-
data.add(group.getModeratorId());
584+
data.add(Long.toString(group.getModeratorId()));
574585

575586
beginList();
576587
Integer groupTags[] = group.getTags();
577588
for (int i = 0; i < groupTags.length; i ++) {
578-
list.add(groupTags[i]);
589+
list.add(Integer.toString(groupTags[i]));
579590
}
580591
endList();
581592
endEvent(Stream.FORUM_STREAM);
@@ -586,8 +597,8 @@ public void serialize(GroupMemberShip membership) {
586597
beginEvent(membership.getJoinDate(), UpdateEvent.UpdateEventType.ADD_FORUM_MEMBERSHIP);
587598
date.setTimeInMillis(membership.getJoinDate());
588599
String dateString = DateGenerator.formatDateDetail(date);
589-
data.add(Long.parseLong(SN.formId(membership.getGroupId())));
590-
data.add(membership.getUserId());
600+
data.add(SN.formId(membership.getGroupId()));
601+
data.add(Long.toString(membership.getUserId()));
591602
data.add(dateString);
592603
endEvent(Stream.FORUM_STREAM);
593604
}

0 commit comments

Comments
 (0)