Skip to content

Commit 4a02d03

Browse files
committed
Fixed error with deterministic output
1 parent 20c570c commit 4a02d03

13 files changed

+175
-118
lines changed

pom.xml

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,21 +37,21 @@
3737
</plugins>
3838
</build>
3939
<dependencies>
40-
<dependency>
41-
<groupId>xerces</groupId>
42-
<artifactId>xercesImpl</artifactId>
43-
<version>2.9.1</version>
44-
</dependency>
45-
<dependency>
46-
<groupId>xalan</groupId>
47-
<artifactId>xalan</artifactId>
48-
<version>2.7.1</version>
49-
</dependency>
50-
<dependency>
51-
<groupId>org.jdom</groupId>
52-
<artifactId>jdom</artifactId>
53-
<version>1.1.3</version>
54-
</dependency>
40+
<dependency>
41+
<groupId>xerces</groupId>
42+
<artifactId>xercesImpl</artifactId>
43+
<version>2.9.1</version>
44+
</dependency>
45+
<dependency>
46+
<groupId>xalan</groupId>
47+
<artifactId>xalan</artifactId>
48+
<version>2.7.1</version>
49+
</dependency>
50+
<dependency>
51+
<groupId>org.jdom</groupId>
52+
<artifactId>jdom</artifactId>
53+
<version>1.1.3</version>
54+
</dependency>
5555
<dependency>
5656
<groupId>org.apache.hadoop</groupId>
5757
<artifactId>hadoop-tools</artifactId>

src/main/java/ldbc/socialnet/dbgen/dictionary/FlashmobTagDictionary.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -135,15 +135,15 @@ private int searchEarliestIndex( long fromDate ) {
135135
/** @brief Makes a decision of selecting or not a flashmob tag.
136136
* @param[in] index The index of the flashmob tag to select.
137137
* @return true if the flashmob tag is selected. false otherwise.*/
138-
private boolean selectFlashmobTag( int index ) {
139-
return random.nextDouble() > (1-probRandomPerLevel*flashmobTagCumDist[index].level);
138+
private boolean selectFlashmobTag(Random rand, int index ) {
139+
return rand.nextDouble() > (1-probRandomPerLevel*flashmobTagCumDist[index].level);
140140
}
141141

142142
/** @brief Given a set of interests and a date, generates a set of flashmob tags.
143143
* @param[in] interests The set of interests.
144144
* @param[in] fromDate The date from which to consider the flashmob tags.
145145
* @return A vector containing the selected flashmob tags.*/
146-
public Vector<FlashmobTag> generateFlashmobTags( TreeSet<Integer> interests, long fromDate ) {
146+
public Vector<FlashmobTag> generateFlashmobTags( Random rand, TreeSet<Integer> interests, long fromDate ) {
147147
Vector<FlashmobTag> result = new Vector<FlashmobTag>();
148148
Iterator<Integer> it = interests.iterator();
149149
while(it.hasNext()) {
@@ -154,7 +154,7 @@ public Vector<FlashmobTag> generateFlashmobTags( TreeSet<Integer> interests, lon
154154
while( it2.hasNext()){
155155
FlashmobTag instance = it2.next();
156156
if( instance.date >= fromDate ) {
157-
if(random.nextDouble() > 1 - probInterestFlashmobTag){
157+
if(rand.nextDouble() > 1 - probInterestFlashmobTag){
158158
result.add(instance);
159159
}
160160
}
@@ -164,7 +164,7 @@ public Vector<FlashmobTag> generateFlashmobTags( TreeSet<Integer> interests, lon
164164
int numberOfMonths = (int)(dateGen.numberOfMonths(fromDate));
165165
int earliestIndex = searchEarliestIndex(fromDate);
166166
for( int i = earliestIndex; i < flashmobTagCumDist.length; ++i ) {
167-
if(selectFlashmobTag(i)) {
167+
if(selectFlashmobTag(rand, i)) {
168168
result.add(flashmobTagCumDist[i]);
169169
}
170170
}

src/main/java/ldbc/socialnet/dbgen/generator/FBSocialDegreeGenerator.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,11 @@ public short getSocialDegree(){
159159

160160
//Assign new ID to user based on the percentile that he/she belongs to
161161
public int getIDByPercentile(){
162-
int id = percenttileIDCounter[percentileIdx] * 100 + percentileIdx;
162+
/* int id = percenttileIDCounter[percentileIdx] * 100 + percentileIdx;
163163
percenttileIDCounter[percentileIdx]++;
164164
return id;
165+
*/
166+
return percentileIdx;
165167
}
166168

167169
public void printNewBucketRange(String filename){

src/main/java/ldbc/socialnet/dbgen/generator/FlashmobPostGenerator.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ protected PostInfo generatePostInfo( Random randomTag, Random randomDate, Group
212212

213213
@Override
214214
protected int generateNumOfPost(Random randomNumPost, ReducedUserProfile user) {
215-
Vector<FlashmobTag> temp = flashmobTagDictionary.generateFlashmobTags( user.getSetOfTags(), user.getCreationDate());
215+
Vector<FlashmobTag> temp = flashmobTagDictionary.generateFlashmobTags(randomNumPost, user.getSetOfTags(), user.getCreationDate());
216216
userFlashmobTags = new FlashmobTag[temp.size()];
217217
int index = 0;
218218
int sumLevels = 0;
@@ -250,7 +250,7 @@ protected int generateNumOfPost(Random randomNumPost, Group group) {
250250
for( int i = 0; i < groupTags.length; ++i ) {
251251
tags.add(groupTags[i]);
252252
}
253-
Vector<FlashmobTag> temp = flashmobTagDictionary.generateFlashmobTags( tags, group.getCreatedDate() );
253+
Vector<FlashmobTag> temp = flashmobTagDictionary.generateFlashmobTags( randomNumPost, tags, group.getCreatedDate() );
254254
groupFlashmobTags = new FlashmobTag[temp.size()];
255255
Iterator<FlashmobTag> it = temp.iterator();
256256
int index = 0;

src/main/java/ldbc/socialnet/dbgen/generator/MRGenerateUsers.java

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ protected void cleanup(Context context){
147147
}
148148
}
149149

150-
public static class GenerateUsersMapper extends Mapper <LongWritable, Text, LongWritable, ReducedUserProfile> {
150+
public static class GenerateUsersMapper extends Mapper <LongWritable, Text, TupleKey, ReducedUserProfile> {
151151

152152
private int fileIdx;
153153

@@ -168,33 +168,32 @@ public void map(LongWritable key, Text value, Context context)
168168

169169
}
170170

171-
public static class DimensionReducer extends Reducer<ComposedKey, ReducedUserProfile, LongWritable, ReducedUserProfile>{
171+
public static class DimensionReducer extends Reducer<ComposedKey, ReducedUserProfile, TupleKey, ReducedUserProfile>{
172172

173173
public static ScalableGenerator friendGenerator;
174174
private int attempTaskId;
175175
private int dimension;
176176
private int pass;
177+
private int numCalls = 0;
177178

178179
@Override
179180
protected void setup(Context context){
180181
Configuration conf = context.getConfiguration();
181182
dimension = Integer.parseInt(conf.get("dimension"));
182183
pass = Integer.parseInt(conf.get("pass"));
183-
184-
String strTaskId = context.getTaskAttemptID().getTaskID().toString();
185-
attempTaskId = Integer.parseInt(strTaskId.substring(strTaskId.length() - 3));
184+
attempTaskId = context.getTaskAttemptID().getTaskID().getId();
186185
friendGenerator = new ScalableGenerator(attempTaskId, conf);
187186
friendGenerator.init();
188187
}
189188

190189
@Override
191190
public void reduce(ComposedKey key, Iterable<ReducedUserProfile> valueSet,
192191
Context context) throws IOException, InterruptedException{
192+
numCalls++;
193193
friendGenerator.resetState((int)key.block);
194194
int counter = 0;
195195
System.out.println("Start University group: "+key.block);
196196
for (ReducedUserProfile user:valueSet){
197-
// System.out.println(user.getAccountId());
198197
friendGenerator.pushUserProfile(user, pass, dimension, context);
199198
counter++;
200199
}
@@ -207,6 +206,7 @@ protected void cleanup(Context context){
207206
System.out.println("Number of user profile read " + friendGenerator.totalNumUserProfilesRead);
208207
System.out.println("Number of exact user profile out " + friendGenerator.exactOutput);
209208
System.out.println("Number of exact friend added " + friendGenerator.friendshipNum);
209+
System.out.println("Number of reducer calls "+numCalls);
210210
}
211211
}
212212

@@ -220,7 +220,7 @@ public static class UserActivityReducer extends Reducer <ComposedKey, ReducedUse
220220
protected void setup(Context context){
221221
Configuration conf = context.getConfiguration();
222222
String strTaskId = context.getTaskAttemptID().getTaskID().toString();
223-
attempTaskId = Integer.parseInt(strTaskId.substring(strTaskId.length() - 3));
223+
attempTaskId = context.getTaskAttemptID().getTaskID().getId();
224224
friendGenerator = new ScalableGenerator(attempTaskId, conf);
225225
friendGenerator.init();
226226
friendGenerator.openSerializer();
@@ -313,9 +313,9 @@ public int runGenerateJob(Configuration conf) throws Exception {
313313
printProgress("Starting: Person generation");
314314
conf.set("pass",Integer.toString(0));
315315
Job job = new Job(conf,"SIB Generate Users & 1st Dimension");
316-
job.setMapOutputKeyClass(LongWritable.class);
316+
job.setMapOutputKeyClass(TupleKey.class);
317317
job.setMapOutputValueClass(ReducedUserProfile.class);
318-
job.setOutputKeyClass(LongWritable.class);
318+
job.setOutputKeyClass(TupleKey.class);
319319
job.setOutputValueClass(ReducedUserProfile.class);
320320
job.setJarByClass(GenerateUsersMapper.class);
321321
job.setMapperClass(GenerateUsersMapper.class);
@@ -329,7 +329,7 @@ public int runGenerateJob(Configuration conf) throws Exception {
329329

330330
/// --------------- Sorting by first dimension ----------------
331331
printProgress("Starting: Sorting by first dimension");
332-
HadoopFileRanker fileRanker = new HadoopFileRanker(conf,LongWritable.class,ReducedUserProfile.class);
332+
HadoopFileRanker fileRanker = new HadoopFileRanker(conf,TupleKey.class,ReducedUserProfile.class);
333333
fileRanker.run(hadoopDir+"/sib",hadoopDir+"/sibSorting");
334334
fs.delete(new Path(hadoopDir + "/sib"),true);
335335

@@ -340,7 +340,7 @@ public int runGenerateJob(Configuration conf) throws Exception {
340340
job = new Job(conf,"SIB Generate Friendship - Interest");
341341
job.setMapOutputKeyClass(ComposedKey.class);
342342
job.setMapOutputValueClass(ReducedUserProfile.class);
343-
job.setOutputKeyClass(LongWritable.class);
343+
job.setOutputKeyClass(TupleKey.class);
344344
job.setOutputValueClass(ReducedUserProfile.class);
345345
job.setJarByClass(HadoopBlockMapper.class);
346346
job.setMapperClass(HadoopBlockMapper.class);
@@ -359,7 +359,7 @@ public int runGenerateJob(Configuration conf) throws Exception {
359359

360360
/// --------------- Sorting phase 2 ----------------
361361
printProgress("Starting: Sorting by second dimension");
362-
fileRanker = new HadoopFileRanker(conf,LongWritable.class,ReducedUserProfile.class);
362+
fileRanker = new HadoopFileRanker(conf,TupleKey.class,ReducedUserProfile.class);
363363
fileRanker.run(hadoopDir+"/sib2",hadoopDir+"/sibSorting2");
364364
fs.delete(new Path(hadoopDir + "/sib2"),true);
365365

@@ -370,7 +370,7 @@ public int runGenerateJob(Configuration conf) throws Exception {
370370
job = new Job(conf,"SIB Generate Friendship - Interest");
371371
job.setMapOutputKeyClass(ComposedKey.class);
372372
job.setMapOutputValueClass(ReducedUserProfile.class);
373-
job.setOutputKeyClass(LongWritable.class);
373+
job.setOutputKeyClass(TupleKey.class);
374374
job.setOutputValueClass(ReducedUserProfile.class);
375375
job.setJarByClass(HadoopBlockMapper.class);
376376
job.setMapperClass(HadoopBlockMapper.class);
@@ -388,7 +388,7 @@ public int runGenerateJob(Configuration conf) throws Exception {
388388

389389
/// --------------- Sorting phase 3--------------
390390
printProgress("Starting: Sorting by third dimension");
391-
fileRanker = new HadoopFileRanker(conf,LongWritable.class,ReducedUserProfile.class);
391+
fileRanker = new HadoopFileRanker(conf,TupleKey.class,ReducedUserProfile.class);
392392
fileRanker.run(hadoopDir+"/sib3",hadoopDir+"/sibSorting3");
393393
fs.delete(new Path(hadoopDir + "/sib3"),true);
394394

@@ -399,7 +399,7 @@ public int runGenerateJob(Configuration conf) throws Exception {
399399
job = new Job(conf,"SIB Generate Friendship - Random");
400400
job.setMapOutputKeyClass(ComposedKey.class);
401401
job.setMapOutputValueClass(ReducedUserProfile.class);
402-
job.setOutputKeyClass(LongWritable.class);
402+
job.setOutputKeyClass(TupleKey.class);
403403
job.setOutputValueClass(ReducedUserProfile.class);
404404
job.setJarByClass(HadoopBlockMapper.class);
405405
job.setMapperClass(HadoopBlockMapper.class);
@@ -418,7 +418,7 @@ public int runGenerateJob(Configuration conf) throws Exception {
418418
/// --------------- Sorting phase 3--------------
419419

420420
printProgress("Starting: Sorting by third dimension (for activity generation)");
421-
fileRanker = new HadoopFileRanker(conf,LongWritable.class,ReducedUserProfile.class);
421+
fileRanker = new HadoopFileRanker(conf,TupleKey.class,ReducedUserProfile.class);
422422
fileRanker.run(hadoopDir+"/sib4",hadoopDir+"/sibSorting4");
423423
fs.delete(new Path(hadoopDir + "/sib4"),true);
424424

@@ -429,7 +429,7 @@ public int runGenerateJob(Configuration conf) throws Exception {
429429
job = new Job(conf,"Generate user activity");
430430
job.setMapOutputKeyClass(ComposedKey.class);
431431
job.setMapOutputValueClass(ReducedUserProfile.class);
432-
job.setOutputKeyClass(LongWritable.class);
432+
job.setOutputKeyClass(TupleKey.class);
433433
job.setOutputValueClass(ReducedUserProfile.class);
434434
job.setJarByClass(HadoopBlockMapper.class);
435435
job.setMapperClass(HadoopBlockMapper.class);
@@ -543,7 +543,7 @@ public int runGenerateJob(Configuration conf) throws Exception {
543543
Job job6 = new Job(conf,"Dump the friends lists");
544544
job6.setMapOutputKeyClass(ComposedKey.class);
545545
job6.setMapOutputValueClass(ReducedUserProfile.class);
546-
job6.setOutputKeyClass(LongWritable.class);
546+
job6.setOutputKeyClass(ComposedKey.class);
547547
job6.setOutputValueClass(ReducedUserProfile.class);
548548
job6.setJarByClass(HadoopBlockMapper.class);
549549
job6.setMapperClass(HadoopBlockMapper.class);

src/main/java/ldbc/socialnet/dbgen/generator/MRWriter.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,10 @@
4040
import java.io.ObjectOutputStream;
4141

4242
import ldbc.socialnet.dbgen.objects.ReducedUserProfile;
43+
import ldbc.socialnet.dbgen.util.ComposedKey;
4344
import ldbc.socialnet.dbgen.util.MapReduceKey;
4445

46+
import ldbc.socialnet.dbgen.util.TupleKey;
4547
import org.apache.hadoop.io.IntWritable;
4648
import org.apache.hadoop.io.LongWritable;
4749
import org.apache.hadoop.mapreduce.Reducer;
@@ -66,12 +68,7 @@ public void writeReducedUserProfiles(int from, int to, int pass,
6668
try {
6769
to = to % windowSize;
6870
for (int i = from; i != to; i = (i+1)%windowSize) {
69-
/* int key = userProfiles[i].getDicElementId(pass);
70-
int block = 0;
71-
long id = userProfiles[i].getAccountId();
72-
MapReduceKey mpk = new MapReduceKey(block,key,id);
73-
context.write(mpk, userProfiles[i]);*/
74-
context.write(new LongWritable(userProfiles[i].getDicElementId(pass)), userProfiles[i]);
71+
context.write(new TupleKey(userProfiles[i].getDicElementId(pass),userProfiles[i].getAccountId()), userProfiles[i]);
7572
numberSerializedObject++;
7673
}
7774
}

0 commit comments

Comments
 (0)