Skip to content

Commit 0637f75

Browse files
committed
Added tuplekey partitioner to partition during merge phase
1 parent a036583 commit 0637f75

File tree

2 files changed

+20
-0
lines changed

2 files changed

+20
-0
lines changed

src/main/java/ldbc/snb/datagen/hadoop/HadoopMergeFriendshipFiles.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ public void run( String outputFileName, ArrayList<String> friendshipFileNames )
8181
job.setNumReduceTasks(numThreads);
8282
job.setInputFormatClass(SequenceFileInputFormat.class);
8383
job.setOutputFormatClass(SequenceFileOutputFormat.class);
84+
job.setPartitionerClass(HadoopTuplePartitioner.class);
8485

8586
for ( String s : friendshipFileNames ) {
8687
FileInputFormat.addInputPath(job, new Path(s));
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
package ldbc.snb.datagen.hadoop;
2+
3+
import ldbc.snb.datagen.objects.Person;
4+
import org.apache.hadoop.mapreduce.Partitioner;
5+
6+
/**
7+
* Created by aprat on 25/08/15.
8+
*/
9+
public class HadoopTuplePartitioner extends Partitioner<TupleKey, Person> {
10+
11+
public HadoopTuplePartitioner() {
12+
super();
13+
}
14+
15+
@Override
16+
public int getPartition(TupleKey key, Person person, int numReduceTasks) {
17+
return (int)(key.key % numReduceTasks);
18+
}
19+
}

0 commit comments

Comments
 (0)