67
67
*/
68
68
public class UpdateEventSerializer implements Serializer {
69
69
70
- private OutputStream fileOutputStream ;
71
- private SequenceFile .Writer hdfsWriter ;
70
+ private SequenceFile . Writer forumStreamWriter [] ;
71
+ private SequenceFile .Writer personStreamWriter [] ;
72
72
private ArrayList <Object > data ;
73
73
private ArrayList <Object > list ;
74
74
private UpdateEvent currentEvent ;
@@ -83,8 +83,18 @@ public class UpdateEventSerializer implements Serializer{
83
83
private long maxDate ;
84
84
private Gson gson ;
85
85
private long numEvents = 0 ;
86
-
87
- public UpdateEventSerializer ( String outputDir , String outputFileName ,boolean exportText , boolean compress , TagDictionary tagDic , BrowserDictionary browserDic , LanguageDictionary languageDic , IPAddressDictionary ipDic , Statistics statistics ) {
86
+ private int numPartitions = 1 ;
87
+ private int nextPartition = 0 ;
88
+
89
+ public UpdateEventSerializer ( String outputDir ,
90
+ String outputFileName ,
91
+ boolean exportText ,
92
+ int numPartitions ,
93
+ TagDictionary tagDic ,
94
+ BrowserDictionary browserDic ,
95
+ LanguageDictionary languageDic ,
96
+ IPAddressDictionary ipDic ,
97
+ Statistics statistics ) {
88
98
gson = new GsonBuilder ().disableHtmlEscaping ().create ();
89
99
this .data = new ArrayList <Object >();
90
100
this .currentEvent = new UpdateEvent (-1 , UpdateEvent .UpdateEventType .NO_EVENT ,new String ("" ));
@@ -93,22 +103,22 @@ public UpdateEventSerializer( String outputDir, String outputFileName,boolean ex
93
103
this .languageDic = languageDic ;
94
104
this .ipDic = ipDic ;
95
105
this .exportText = exportText ;
106
+ this .numPartitions = numPartitions ;
96
107
this .tagDic = tagDic ;
97
108
this .statistics = statistics ;
98
109
this .minDate = Long .MAX_VALUE ;
99
110
this .maxDate = Long .MIN_VALUE ;
100
111
try {
112
+ this .forumStreamWriter = new SequenceFile .Writer [this .numPartitions ];
113
+ this .personStreamWriter = new SequenceFile .Writer [this .numPartitions ];
101
114
Configuration conf = new Configuration ();
102
115
FileSystem fs = FileSystem .get (conf );
103
- /*if( compress ) {
104
- this.fileOutputStream = new GZIPOutputStream(new FileOutputStream(outputDir + "/" + outputFileName +".gz"));
105
- } else {
106
- this.fileOutputStream = new FileOutputStream(outputDir + "/" + outputFileName );
116
+ for ( int i = 0 ; i < numPartitions ; ++i ) {
117
+ Path outFile = new Path (outputDir + "/" + outputFileName +"_" +i +"_forum" );
118
+ forumStreamWriter [i ] = new SequenceFile .Writer (fs , conf , outFile , LongWritable .class , Text .class );
119
+ outFile = new Path (outputDir + "/" + outputFileName +"_" +i +"_person" );
120
+ personStreamWriter [i ] = new SequenceFile .Writer (fs , conf , outFile , LongWritable .class , Text .class );
107
121
}
108
- hdfsOutput = new FSDataOutputStream(this.fileOutputStream, new FileSystem.Statistics(null));
109
- */
110
- Path outFile = new Path (outputDir + "/" + outputFileName );
111
- hdfsWriter = new SequenceFile .Writer (fs , conf ,outFile , LongWritable .class ,Text .class );
112
122
} catch (IOException e ){
113
123
System .err .println (e .getMessage ());
114
124
System .exit (-1 );
@@ -201,8 +211,11 @@ public UpdateEventSerializer( String outputDir, String outputFileName,boolean ex
201
211
// statistics.eventParams.add(params);
202
212
}
203
213
214
+ public void changePartition () {
215
+ nextPartition = (++nextPartition ) % numPartitions ;
216
+ }
204
217
205
- public void writeKeyValue ( UpdateEvent event ) {
218
+ public void writeKeyValue ( UpdateEvent event , Stream s ) {
206
219
try {
207
220
StringBuffer string = new StringBuffer ();
208
221
string .append (Long .toString (event .date ));
@@ -212,8 +225,14 @@ public void writeKeyValue( UpdateEvent event ) {
212
225
string .append (event .eventData );
213
226
string .append ("|" );
214
227
string .append ("\n " );
215
- //fileOutputStream.write(string.toString().getBytes("UTF8"));
216
- hdfsWriter .append (new LongWritable (event .date ),new Text (string .toString ()));
228
+ switch (s ) {
229
+ case FORUM_STREAM :
230
+ forumStreamWriter [nextPartition ].append (new LongWritable (event .date ),new Text (string .toString ()));
231
+ break ;
232
+ case PERSON_STREAM :
233
+ personStreamWriter [nextPartition ].append (new LongWritable (event .date ),new Text (string .toString ()));
234
+ break ;
235
+ }
217
236
} catch (IOException e ){
218
237
System .err .println (e .getMessage ());
219
238
System .exit (-1 );
@@ -229,10 +248,15 @@ private void beginEvent( long date, UpdateEvent.UpdateEventType type ) {
229
248
data .clear ();
230
249
}
231
250
232
- private void endEvent () {
251
+ enum Stream {
252
+ FORUM_STREAM ,
253
+ PERSON_STREAM
254
+ }
255
+
256
+ private void endEvent ( Stream s ) {
233
257
numEvents ++;
234
258
currentEvent .eventData = gson .toJson (data );
235
- writeKeyValue (currentEvent );
259
+ writeKeyValue (currentEvent , s );
236
260
}
237
261
238
262
private void beginList () {
@@ -256,7 +280,10 @@ public void close() {
256
280
System .out .println ("Number of update events serialized " +numEvents );
257
281
258
282
try {
259
- hdfsWriter .close ();
283
+ for ( int i = 0 ; i < numPartitions ; ++i ) {
284
+ forumStreamWriter [i ].close ();
285
+ personStreamWriter [i ].close ();
286
+ }
260
287
} catch (IOException e ){
261
288
System .err .println (e .getMessage ());
262
289
System .exit (-1 );
@@ -377,7 +404,7 @@ public void serialize(UserInfo info) {
377
404
list .add (workAtData );
378
405
}
379
406
endList ();
380
- endEvent ();
407
+ endEvent (Stream . PERSON_STREAM );
381
408
}
382
409
383
410
@ Override
@@ -388,7 +415,7 @@ public void serialize(Friend friend) {
388
415
data .add (friend .getFriendAcc ());
389
416
date .setTimeInMillis (friend .getCreatedTime ());
390
417
data .add (DateGenerator .formatDateDetail (date ));
391
- endEvent ();
418
+ endEvent (Stream . PERSON_STREAM );
392
419
}
393
420
}
394
421
@@ -433,7 +460,7 @@ public void serialize(Post post) {
433
460
list .add (tagId );
434
461
}
435
462
endList ();
436
- endEvent ();
463
+ endEvent (Stream . FORUM_STREAM );
437
464
}
438
465
439
466
@ Override
@@ -448,7 +475,7 @@ public void serialize(Like like) {
448
475
data .add (like .user );
449
476
data .add (Long .parseLong (SN .formId (like .messageId )));
450
477
data .add (dateString );
451
- endEvent ();
478
+ endEvent (Stream . FORUM_STREAM );
452
479
}
453
480
454
481
@ Override
@@ -485,7 +512,7 @@ public void serialize(Photo photo) {
485
512
list .add (tagId );
486
513
}
487
514
endList ();
488
- endEvent ();
515
+ endEvent (Stream . FORUM_STREAM );
489
516
}
490
517
491
518
@ Override
@@ -531,7 +558,7 @@ public void serialize(Comment comment) {
531
558
list .add (tagId );
532
559
}
533
560
endList ();
534
- endEvent ();
561
+ endEvent (Stream . FORUM_STREAM );
535
562
}
536
563
537
564
@ Override
@@ -551,7 +578,7 @@ public void serialize(Group group) {
551
578
list .add (groupTags [i ]);
552
579
}
553
580
endList ();
554
- endEvent ();
581
+ endEvent (Stream . FORUM_STREAM );
555
582
}
556
583
557
584
@ Override
@@ -562,7 +589,7 @@ public void serialize(GroupMemberShip membership) {
562
589
data .add (Long .parseLong (SN .formId (membership .getGroupId ())));
563
590
data .add (membership .getUserId ());
564
591
data .add (dateString );
565
- endEvent ();
592
+ endEvent (Stream . FORUM_STREAM );
566
593
}
567
594
568
595
@ Override
0 commit comments