Skip to content

Commit 0dfdcf4

Browse files
committed
Reenabled sorting prior person serialization.
Fixed bug with ttl serializer which was producing repeated knows, studyat and workat ids
1 parent 0b28e62 commit 0dfdcf4

File tree

1 file changed

+13
-13
lines changed

1 file changed

+13
-13
lines changed

src/main/java/ldbc/snb/datagen/hadoop/HadoopPersonSerializer.java

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import ldbc.snb.datagen.objects.Person;
88
import ldbc.snb.datagen.serializer.PersonSerializer;
99
import ldbc.snb.datagen.serializer.UpdateEventSerializer;
10+
import ldbc.snb.datagen.vocabulary.SN;
1011
import org.apache.hadoop.conf.Configuration;
1112
import org.apache.hadoop.fs.FileSystem;
1213
import org.apache.hadoop.fs.Path;
@@ -25,7 +26,7 @@
2526
*/
2627
public class HadoopPersonSerializer {
2728

28-
public static class HadoopPersonSerializerReducer extends Reducer<TupleKey, Person, LongWritable, Person> {
29+
public static class HadoopPersonSerializerReducer extends Reducer<BlockKey, Person, LongWritable, Person> {
2930

3031
private int reducerId; /** The id of the reducer.**/
3132
private PersonSerializer personSerializer_; /** The person serializer **/
@@ -47,8 +48,9 @@ protected void setup(Context context) {
4748
}
4849

4950
@Override
50-
public void reduce(TupleKey key, Iterable<Person> valueSet,Context context)
51+
public void reduce(BlockKey key, Iterable<Person> valueSet,Context context)
5152
throws IOException, InterruptedException {
53+
SN.machineId = key.block;
5254
personSerializer_.reset();
5355
for( Person p : valueSet ) {
5456
if(p.creationDate()< Dictionaries.dates.getUpdateThreshold() || !DatagenParams.updateStreams ) {
@@ -85,42 +87,40 @@ public void run( String inputFileName ) throws Exception {
8587

8688
FileSystem fs = FileSystem.get(conf);
8789

88-
/*String rankedFileName = conf.get("ldbc.snb.datagen.serializer.hadoopDir") + "/ranked";
90+
String rankedFileName = conf.get("ldbc.snb.datagen.serializer.hadoopDir") + "/ranked";
8991
HadoopFileRanker hadoopFileRanker = new HadoopFileRanker( conf, TupleKey.class, Person.class, null );
9092
hadoopFileRanker.run(inputFileName,rankedFileName);
91-
*/
9293

9394
int numThreads = Integer.parseInt(conf.get("ldbc.snb.datagen.generator.numThreads"));
9495
Job job = Job.getInstance(conf, "Person Serializer");
95-
//job.setMapOutputKeyClass(BlockKey.class);
96-
job.setMapOutputKeyClass(TupleKey.class);
96+
job.setMapOutputKeyClass(BlockKey.class);
97+
//job.setMapOutputKeyClass(TupleKey.class);
9798
job.setMapOutputValueClass(Person.class);
9899
job.setOutputKeyClass(LongWritable.class);
99100
job.setOutputValueClass(Person.class);
100101
job.setJarByClass(HadoopBlockMapper.class);
101-
//job.setMapperClass(HadoopBlockMapper.class);
102+
job.setMapperClass(HadoopBlockMapper.class);
102103
job.setReducerClass(HadoopPersonSerializerReducer.class);
103104
job.setNumReduceTasks(numThreads);
104105
job.setInputFormatClass(SequenceFileInputFormat.class);
105106
job.setOutputFormatClass(SequenceFileOutputFormat.class);
106107

107108
job.setPartitionerClass(HadoopTuplePartitioner.class);
108109

109-
/*job.setSortComparatorClass(BlockKeyComparator.class);
110+
job.setSortComparatorClass(BlockKeyComparator.class);
110111
job.setGroupingComparatorClass(BlockKeyGroupComparator.class);
111112
job.setPartitionerClass(HadoopBlockPartitioner.class);
112-
*/
113-
114-
//FileInputFormat.setInputPaths(job, new Path(rankedFileName));
115-
FileInputFormat.setInputPaths(job, new Path(inputFileName));
113+
114+
FileInputFormat.setInputPaths(job, new Path(rankedFileName));
115+
//FileInputFormat.setInputPaths(job, new Path(inputFileName));
116116
FileOutputFormat.setOutputPath(job, new Path(conf.get("ldbc.snb.datagen.serializer.hadoopDir")+"/aux"));
117117
if(!job.waitForCompletion(true)) {
118118
throw new Exception();
119119
}
120120

121121

122122
try{
123-
// fs.delete(new Path(rankedFileName), true);
123+
fs.delete(new Path(rankedFileName), true);
124124
fs.delete(new Path(conf.get("ldbc.snb.datagen.serializer.hadoopDir")+"/aux"),true);
125125
} catch(IOException e) {
126126
System.err.println(e.getMessage());

0 commit comments

Comments
 (0)