@@ -69,8 +69,9 @@ public class UpdateEventSerializer implements Serializer{
69
69
70
70
private SequenceFile .Writer forumStreamWriter [];
71
71
private SequenceFile .Writer personStreamWriter [];
72
- private ArrayList <Object > data ;
73
- private ArrayList <Object > list ;
72
+ private ArrayList <String > data ;
73
+ private ArrayList <String > list ;
74
+ private ArrayList <String > tuple ;
74
75
private UpdateEvent currentEvent ;
75
76
private GregorianCalendar date ;
76
77
private BrowserDictionary browserDic ;
@@ -81,10 +82,10 @@ public class UpdateEventSerializer implements Serializer{
81
82
private Statistics statistics ;
82
83
private long minDate ;
83
84
private long maxDate ;
84
- private Gson gson ;
85
85
private long numEvents = 0 ;
86
86
private int numPartitions = 1 ;
87
87
private int nextPartition = 0 ;
88
+ private StringBuffer stringBuffer ;
88
89
89
90
public UpdateEventSerializer ( String outputDir ,
90
91
String outputFileName ,
@@ -95,8 +96,10 @@ public UpdateEventSerializer( String outputDir,
95
96
LanguageDictionary languageDic ,
96
97
IPAddressDictionary ipDic ,
97
98
Statistics statistics ) {
98
- gson = new GsonBuilder ().disableHtmlEscaping ().create ();
99
- this .data = new ArrayList <Object >();
99
+ this .stringBuffer = new StringBuffer (512 );
100
+ this .data = new ArrayList <String >();
101
+ this .list = new ArrayList <String >();
102
+ this .tuple = new ArrayList <String >();
100
103
this .currentEvent = new UpdateEvent (-1 , UpdateEvent .UpdateEventType .NO_EVENT ,new String ("" ));
101
104
this .date = new GregorianCalendar ();
102
105
this .browserDic = browserDic ;
@@ -220,10 +223,9 @@ public void writeKeyValue( UpdateEvent event, Stream s ) {
220
223
StringBuffer string = new StringBuffer ();
221
224
string .append (Long .toString (event .date ));
222
225
string .append ("|" );
223
- string .append (event .type .toString ( ));
226
+ string .append (Integer . toString ( event .type .ordinal ()+ 1 ));
224
227
string .append ("|" );
225
228
string .append (event .eventData );
226
- string .append ("|" );
227
229
string .append ("\n " );
228
230
switch (s ) {
229
231
case FORUM_STREAM :
@@ -253,19 +255,28 @@ enum Stream {
253
255
PERSON_STREAM
254
256
}
255
257
258
+ private String formatStringArray (ArrayList <String > array , String separator ) {
259
+ if ( array .size () == 0 ) return "" ;
260
+ stringBuffer .setLength (0 );
261
+ for ( String s : array ) {
262
+ stringBuffer .append (s );
263
+ stringBuffer .append (separator );
264
+ }
265
+ return stringBuffer .substring (0 ,stringBuffer .length ()-1 );
266
+ }
267
+
256
268
private void endEvent ( Stream s ) {
257
269
numEvents ++;
258
- currentEvent .eventData = gson . toJson (data );
270
+ currentEvent .eventData = formatStringArray (data , "|" );
259
271
writeKeyValue (currentEvent , s );
260
272
}
261
273
262
274
private void beginList () {
263
- list = new ArrayList <Object >();
264
275
list .clear ();
265
276
}
266
277
267
278
private void endList () {
268
- data .add (list );
279
+ data .add (formatStringArray ( list , ";" ) );
269
280
}
270
281
271
282
@@ -332,7 +343,7 @@ public Long unitsGenerated() {
332
343
public void serialize (UserInfo info ) {
333
344
334
345
beginEvent (info .user .getCreationDate (), UpdateEvent .UpdateEventType .ADD_PERSON );
335
- data .add (info .user .getAccountId ());
346
+ data .add (Long . toString ( info .user .getAccountId () ));
336
347
data .add (info .extraInfo .getFirstName ());
337
348
data .add (info .extraInfo .getLastName ());
338
349
data .add (info .extraInfo .getGender ());
@@ -354,13 +365,13 @@ public void serialize(UserInfo info) {
354
365
String empty = "" ;
355
366
data .add (empty );
356
367
}
357
- data .add (info .extraInfo .getLocationId ());
358
- ArrayList <Object > languages = new ArrayList <Object >();
368
+ data .add (Integer . toString ( info .extraInfo .getLocationId () ));
369
+ ArrayList <String > languages = new ArrayList <String >();
359
370
Vector <Integer > userLang = info .extraInfo .getLanguages ();
360
371
for (int i = 0 ; i < languages .size (); i ++) {
361
372
languages .add (languageDic .getLanguagesName (userLang .get (i )));
362
373
}
363
- data .add (languages );
374
+ data .add (formatStringArray ( languages , ";" ) );
364
375
365
376
beginList ();
366
377
Iterator <String > itString = info .extraInfo .getEmail ().iterator ();
@@ -373,7 +384,7 @@ public void serialize(UserInfo info) {
373
384
Iterator <Integer > itInteger = info .user .getSetOfTags ().iterator ();
374
385
while (itInteger .hasNext ()){
375
386
Integer interestIdx = itInteger .next ();
376
- list .add (interestIdx );
387
+ list .add (Integer . toString ( interestIdx ) );
377
388
}
378
389
endList ();
379
390
@@ -382,12 +393,12 @@ public void serialize(UserInfo info) {
382
393
long universityId = info .extraInfo .getUniversity ();
383
394
if ( universityId != -1 ){
384
395
if (info .extraInfo .getClassYear () != -1 ) {
385
- ArrayList <Object > studyAtData = new ArrayList <Object >();
396
+ ArrayList <String > studyAtData = new ArrayList <String >();
386
397
date .setTimeInMillis (info .extraInfo .getClassYear ());
387
398
dateString = DateGenerator .formatYear (date );
388
- studyAtData .add (universityId );
389
- studyAtData .add (Integer . parseInt ( dateString ) );
390
- list .add (studyAtData );
399
+ studyAtData .add (Long . toString ( universityId ) );
400
+ studyAtData .add (dateString );
401
+ list .add (formatStringArray ( studyAtData , "," ) );
391
402
}
392
403
}
393
404
endList ();
@@ -397,11 +408,11 @@ public void serialize(UserInfo info) {
397
408
while (it .hasNext ()) {
398
409
long companyId = it .next ();
399
410
date .setTimeInMillis (info .extraInfo .getWorkFrom (companyId ));
400
- ArrayList <Object > workAtData = new ArrayList <Object >();
411
+ ArrayList <String > workAtData = new ArrayList <String >();
401
412
dateString = DateGenerator .formatYear (date );
402
- workAtData .add (companyId );
403
- workAtData .add (Integer . parseInt ( dateString ) );
404
- list .add (workAtData );
413
+ workAtData .add (Long . toString ( companyId ) );
414
+ workAtData .add (dateString );
415
+ list .add (formatStringArray ( workAtData , "," ) );
405
416
}
406
417
endList ();
407
418
endEvent (Stream .PERSON_STREAM );
@@ -411,8 +422,8 @@ public void serialize(UserInfo info) {
411
422
public void serialize (Friend friend ) {
412
423
if (friend != null && friend .getCreatedTime () != -1 ){
413
424
beginEvent (friend .getCreatedTime (), UpdateEvent .UpdateEventType .ADD_FRIENDSHIP );
414
- data .add (friend .getUserAcc ());
415
- data .add (friend .getFriendAcc ());
425
+ data .add (Long . toString ( friend .getUserAcc () ));
426
+ data .add (Long . toString ( friend .getFriendAcc () ));
416
427
date .setTimeInMillis (friend .getCreatedTime ());
417
428
data .add (DateGenerator .formatDateDetail (date ));
418
429
endEvent (Stream .PERSON_STREAM );
@@ -423,7 +434,7 @@ public void serialize(Friend friend) {
423
434
public void serialize (Post post ) {
424
435
beginEvent (post .getCreationDate (), UpdateEvent .UpdateEventType .ADD_POST );
425
436
String empty = "" ;
426
- data .add (Long . parseLong ( SN .formId (post .getMessageId () )));
437
+ data .add (SN .formId (post .getMessageId ()));
427
438
data .add (empty );
428
439
date .setTimeInMillis (post .getCreationDate ());
429
440
String dateString = DateGenerator .formatDateDetail (date );
@@ -448,16 +459,16 @@ public void serialize(Post post) {
448
459
} else {
449
460
data .add (empty );
450
461
}
451
- data .add (post .getTextSize ());
452
- data .add (post .getAuthorId ());
453
- data .add (Long . parseLong ( SN .formId (post .getGroupId () )));
454
- data .add (ipDic .getLocation (post .getIpAddress ()));
462
+ data .add (Long . toString ( post .getTextSize () ));
463
+ data .add (Long . toString ( post .getAuthorId () ));
464
+ data .add (SN .formId (post .getGroupId ()));
465
+ data .add (Long . toString ( ipDic .getLocation (post .getIpAddress () )));
455
466
456
467
beginList ();
457
468
Iterator <Integer > it = post .getTags ().iterator ();
458
469
while (it .hasNext ()) {
459
470
Integer tagId = it .next ();
460
- list .add (tagId );
471
+ list .add (Integer . toString ( tagId ) );
461
472
}
462
473
endList ();
463
474
endEvent (Stream .FORUM_STREAM );
@@ -472,8 +483,8 @@ public void serialize(Like like) {
472
483
}
473
484
date .setTimeInMillis (like .date );
474
485
String dateString = DateGenerator .formatDateDetail (date );
475
- data .add (like .user );
476
- data .add (Long . parseLong ( SN .formId (like .messageId ) ));
486
+ data .add (Long . toString ( like .user ) );
487
+ data .add (SN .formId (like .messageId ));
477
488
data .add (dateString );
478
489
endEvent (Stream .FORUM_STREAM );
479
490
}
@@ -483,7 +494,7 @@ public void serialize(Photo photo) {
483
494
484
495
beginEvent (photo .getCreationDate (), UpdateEvent .UpdateEventType .ADD_POST );
485
496
String empty = "" ;
486
- data .add (Long . parseLong ( SN .formId (photo .getMessageId () )));
497
+ data .add (SN .formId (photo .getMessageId ()));
487
498
data .add (photo .getContent ());
488
499
date .setTimeInMillis (photo .getCreationDate ());
489
500
String dateString = DateGenerator .formatDateDetail (date );
@@ -500,16 +511,16 @@ public void serialize(Photo photo) {
500
511
}
501
512
data .add (empty );
502
513
data .add (empty );
503
- data .add (0 );
504
- data .add (photo .getAuthorId ());
505
- data .add (Long . parseLong ( SN .formId (photo .getGroupId () )));
506
- data .add (ipDic .getLocation (photo .getIpAddress ()));
514
+ data .add ("0" );
515
+ data .add (Long . toString ( photo .getAuthorId () ));
516
+ data .add (SN .formId (photo .getGroupId ()));
517
+ data .add (Long . toString ( ipDic .getLocation (photo .getIpAddress () )));
507
518
508
519
beginList ();
509
520
Iterator <Integer > it = photo .getTags ().iterator ();
510
521
while (it .hasNext ()) {
511
522
Integer tagId = it .next ();
512
- list .add (tagId );
523
+ list .add (Integer . toString ( tagId ) );
513
524
}
514
525
endList ();
515
526
endEvent (Stream .FORUM_STREAM );
@@ -521,7 +532,7 @@ public void serialize(Comment comment) {
521
532
beginEvent (comment .getCreationDate (), UpdateEvent .UpdateEventType .ADD_COMMENT );
522
533
date .setTimeInMillis (comment .getCreationDate ());
523
534
String dateString = DateGenerator .formatDateDetail (date );
524
- data .add (Long . parseLong ( SN .formId (comment .getMessageId () )));
535
+ data .add (SN .formId (comment .getMessageId ()));
525
536
data .add (dateString );
526
537
if (comment .getIpAddress () != null ) {
527
538
data .add (comment .getIpAddress ().toString ());
@@ -541,21 +552,21 @@ public void serialize(Comment comment) {
541
552
else {
542
553
data .add ("" );
543
554
}
544
- data .add (comment .getTextSize ());
545
- data .add (comment .getAuthorId ());
546
- data .add (ipDic .getLocation (comment .getIpAddress ()));
555
+ data .add (Integer . toString ( comment .getTextSize () ));
556
+ data .add (Long . toString ( comment .getAuthorId () ));
557
+ data .add (Long . toString ( ipDic .getLocation (comment .getIpAddress () )));
547
558
if (comment .getReplyOf () == comment .getPostId ()) {
548
- data .add (Long . parseLong ( SN .formId (comment .getPostId () )));
549
- data .add (new Long (- 1 ) );
559
+ data .add (SN .formId (comment .getPostId ()));
560
+ data .add ("-1" );
550
561
} else {
551
- data .add (new Long (- 1 ) );
552
- data .add (Long . parseLong ( SN .formId (comment .getReplyOf () )));
562
+ data .add ("-1" );
563
+ data .add (SN .formId (comment .getReplyOf ()));
553
564
}
554
565
beginList ();
555
566
Iterator <Integer > it = comment .getTags ().iterator ();
556
567
while (it .hasNext ()) {
557
568
Integer tagId = it .next ();
558
- list .add (tagId );
569
+ list .add (Integer . toString ( tagId ) );
559
570
}
560
571
endList ();
561
572
endEvent (Stream .FORUM_STREAM );
@@ -567,15 +578,15 @@ public void serialize(Group group) {
567
578
date .setTimeInMillis (group .getCreatedDate ());
568
579
String dateString = DateGenerator .formatDateDetail (date );
569
580
570
- data .add (Long . parseLong ( SN .formId (group .getGroupId () )));
581
+ data .add (SN .formId (group .getGroupId ()));
571
582
data .add (group .getGroupName ());
572
583
data .add (dateString );
573
- data .add (group .getModeratorId ());
584
+ data .add (Long . toString ( group .getModeratorId () ));
574
585
575
586
beginList ();
576
587
Integer groupTags [] = group .getTags ();
577
588
for (int i = 0 ; i < groupTags .length ; i ++) {
578
- list .add (groupTags [i ]);
589
+ list .add (Integer . toString ( groupTags [i ]) );
579
590
}
580
591
endList ();
581
592
endEvent (Stream .FORUM_STREAM );
@@ -586,8 +597,8 @@ public void serialize(GroupMemberShip membership) {
586
597
beginEvent (membership .getJoinDate (), UpdateEvent .UpdateEventType .ADD_FORUM_MEMBERSHIP );
587
598
date .setTimeInMillis (membership .getJoinDate ());
588
599
String dateString = DateGenerator .formatDateDetail (date );
589
- data .add (Long . parseLong ( SN .formId (membership .getGroupId () )));
590
- data .add (membership .getUserId ());
600
+ data .add (SN .formId (membership .getGroupId ()));
601
+ data .add (Long . toString ( membership .getUserId () ));
591
602
data .add (dateString );
592
603
endEvent (Stream .FORUM_STREAM );
593
604
}
0 commit comments