Skip to content

Commit 14454db

Browse files
committed
Added parameter to disable user activity generation
1 parent 88ce855 commit 14454db

File tree

4 files changed

+130
-94
lines changed

4 files changed

+130
-94
lines changed

run_local.sh

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/bin/bash
2+
DEFAULT_HADOOP_HOME=/home/aprat/Programs/hadoop-1.2.1 #change to your hadoop folder
3+
DEFAULT_LDBC_SNB_DATAGEN_HOME=/home/aprat/Projects/LDBC/generator/ldbc_snb_datagen #change to your ldbc_socialnet_dbgen folder
4+
5+
# allow overriding configuration from outside via environment variables
6+
# i.e. you can do
7+
# HADOOP_HOME=/foo/bar LDBC_SNB_DATAGEN_HOME=/baz/quux ./run.sh
8+
# instead of changing the contents of this file
9+
HADOOP_HOME=${HADOOP_HOME:-$DEFAULT_HADOOP_HOME}
10+
LDBC_SNB_DATAGEN_HOME=${LDBC_SNB_DATAGEN_HOME:-$DEFAULT_LDBC_SNB_DATAGEN_HOME}
11+
12+
export HADOOP_HOME
13+
export LDBC_SNB_DATAGEN_HOME
14+
15+
mvn clean
16+
mvn assembly:assembly
17+
18+
cp $LDBC_SNB_DATAGEN_HOME/target/ldbc_snb_datagen.jar $LDBC_SNB_DATAGEN_HOME/
19+
rm $LDBC_SNB_DATAGEN_HOME/target/ldbc_snb_datagen.jar
20+
21+
$HADOOP_HOME/bin/hadoop jar $LDBC_SNB_DATAGEN_HOME/ldbc_snb_datagen.jar $LDBC_SNB_DATAGEN_HOME/params.ini
22+
23+
#parameter generation
24+
PARAM_GENERATION=0
25+
26+
if [ $PARAM_GENERATION -eq 1 ]
27+
then
28+
mkdir -p substitution_parameters
29+
python paramgenerator/generateparams.py $LDBC_SNB_DATAGEN_HOME substitution_parameters/
30+
rm -f m0factors.txt
31+
rm -f m0friendList*
32+
fi

src/main/java/ldbc/socialnet/dbgen/generator/MRGenerateUsers.java

Lines changed: 90 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -547,101 +547,101 @@ public int runGenerateJob(Configuration conf) throws Exception {
547547
long min = Long.MAX_VALUE;
548548
long max = Long.MIN_VALUE;
549549

550-
// for( int i =0; i < numThreads; ++i ) {
551-
// int numPartitions = conf.getInt("numUpdatePartitions", 1);
552-
// for( int j = 0; j < numPartitions; ++j ) {
553-
// /// --------------- Fifth job: Sort update streams ----------------
554-
// conf.setInt("mapred.line.input.format.linespermap", 1000000);
555-
// conf.setInt("partitionId",j);
556-
// conf.set("streamType","forum");
557-
// Job jobForum = new Job(conf, "Soring update streams "+j+" of reducer "+i);
558-
// jobForum.setMapOutputKeyClass(LongWritable.class);
559-
// jobForum.setMapOutputValueClass(Text.class);
560-
// jobForum.setOutputKeyClass(LongWritable.class);
561-
// jobForum.setOutputValueClass(Text.class);
562-
// jobForum.setJarByClass(UpdateEventMapper.class);
563-
// jobForum.setMapperClass(UpdateEventMapper.class);
564-
// jobForum.setReducerClass(UpdateEventReducer.class);
565-
// jobForum.setNumReduceTasks(1);
566-
// jobForum.setInputFormatClass(SequenceFileInputFormat.class);
567-
// jobForum.setOutputFormatClass(SequenceFileOutputFormat.class);
568-
// jobForum.setPartitionerClass(UpdateEventPartitioner.class);
569-
// FileInputFormat.addInputPath(jobForum, new Path(socialNetDir + "/temp_updateStream_" + i+"_"+j+"_forum"));
570-
// FileOutputFormat.setOutputPath(jobForum, new Path(hadoopDir + "/sibEnd"));
571-
// printProgress("Starting: Sorting update streams");
572-
// jobForum.waitForCompletion(true);
573-
// fs.delete(new Path(socialNetDir + "/temp_updateStream_" + i+"_"+j+"_forum"), false);
574-
// fs.delete(new Path(hadoopDir + "/sibEnd"), true);
575-
//
576-
// conf.setInt("mapred.line.input.format.linespermap", 1000000);
577-
// conf.setInt("partitionId",j);
578-
// conf.set("streamType","person");
579-
// Job jobPerson = new Job(conf, "Soring update streams "+j+" of reducer "+i);
580-
// jobPerson.setMapOutputKeyClass(LongWritable.class);
581-
// jobPerson.setMapOutputValueClass(Text.class);
582-
// jobPerson.setOutputKeyClass(LongWritable.class);
583-
// jobPerson.setOutputValueClass(Text.class);
584-
// jobPerson.setJarByClass(UpdateEventMapper.class);
585-
// jobPerson.setMapperClass(UpdateEventMapper.class);
586-
// jobPerson.setReducerClass(UpdateEventReducer.class);
587-
// jobPerson.setNumReduceTasks(1);
588-
// jobPerson.setInputFormatClass(SequenceFileInputFormat.class);
589-
// jobPerson.setOutputFormatClass(SequenceFileOutputFormat.class);
590-
// jobPerson.setPartitionerClass(UpdateEventPartitioner.class);
591-
// FileInputFormat.addInputPath(jobPerson, new Path(socialNetDir + "/temp_updateStream_" + i+"_"+j+"_person"));
592-
// FileOutputFormat.setOutputPath(jobPerson, new Path(hadoopDir + "/sibEnd"));
593-
// printProgress("Starting: Sorting update streams");
594-
// jobPerson.waitForCompletion(true);
595-
// fs.delete(new Path(socialNetDir + "/temp_updateStream_" + i+"_"+j+"_person"), false);
596-
// fs.delete(new Path(hadoopDir + "/sibEnd"), true);
597-
//
598-
// if(conf.getBoolean("updateStreams",false)) {
599-
// Properties properties = new Properties();
600-
// properties.load(fs.open(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j + "_person.properties")));
601-
// Long auxMin = Long.parseLong(properties.getProperty("min_write_event_start_time"));
602-
// min = auxMin < min ? auxMin : min;
603-
// Long auxMax = Long.parseLong(properties.getProperty("max_write_event_start_time"));
604-
// max = auxMax > max ? auxMax : max;
605-
// numEvents += Long.parseLong(properties.getProperty("num_events"));
606-
//
607-
// properties.load(fs.open(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j + "_forum.properties")));
608-
//
609-
// auxMin = Long.parseLong(properties.getProperty("min_write_event_start_time"));
610-
// min = auxMin < min ? auxMin : min;
611-
// auxMax = Long.parseLong(properties.getProperty("max_write_event_start_time"));
612-
// max = auxMax > max ? auxMax : max;
613-
// numEvents += Long.parseLong(properties.getProperty("num_events"));
614-
//
615-
// fs.delete(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j + "_person.properties"),true);
616-
// fs.delete(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j + "_forum.properties"),true);
617-
// }
618-
// }
619-
// }
620-
//
621-
// if(conf.getBoolean("updateStreams",false)) {
622-
// OutputStream output = fs.create(new Path(conf.get("outputDir") + "/social_network/updateStream.properties"));
623-
// output.write(new String("ldbc.snb.interactive.gct_delta_duration:"+conf.get("deltaTime")+"\n").getBytes());
624-
// output.write(new String("ldbc.snb.interactive.min_write_event_start_time:"+min+"\n").getBytes());
625-
// output.write(new String("ldbc.snb.interactive.max_write_event_start_time:"+max+"\n").getBytes());
626-
// output.write(new String("ldbc.snb.interactive.update_interleave:"+(max-min)/numEvents+"\n").getBytes());
627-
// output.write(new String("ldbc.snb.interactive.num_events:"+numEvents).getBytes());
628-
// output.close();
629-
// }
630-
//
631-
//
632-
//
633-
// printProgress("Starting: Materialize friends for substitution parameters");
634-
// int resMaterializeFriends = job6.waitForCompletion(true) ? 0 : 1;
635-
// fs.delete(new Path(hadoopDir + "/sibSorting3"),true);
550+
for( int i =0; i < numThreads; ++i ) {
551+
int numPartitions = conf.getInt("numUpdatePartitions", 1);
552+
for( int j = 0; j < numPartitions; ++j ) {
553+
/// --------------- Fifth job: Sort update streams ----------------
554+
conf.setInt("mapred.line.input.format.linespermap", 1000000);
555+
conf.setInt("partitionId",j);
556+
conf.set("streamType","forum");
557+
Job jobForum = new Job(conf, "Soring update streams "+j+" of reducer "+i);
558+
jobForum.setMapOutputKeyClass(LongWritable.class);
559+
jobForum.setMapOutputValueClass(Text.class);
560+
jobForum.setOutputKeyClass(LongWritable.class);
561+
jobForum.setOutputValueClass(Text.class);
562+
jobForum.setJarByClass(UpdateEventMapper.class);
563+
jobForum.setMapperClass(UpdateEventMapper.class);
564+
jobForum.setReducerClass(UpdateEventReducer.class);
565+
jobForum.setNumReduceTasks(1);
566+
jobForum.setInputFormatClass(SequenceFileInputFormat.class);
567+
jobForum.setOutputFormatClass(SequenceFileOutputFormat.class);
568+
jobForum.setPartitionerClass(UpdateEventPartitioner.class);
569+
FileInputFormat.addInputPath(jobForum, new Path(socialNetDir + "/temp_updateStream_" + i+"_"+j+"_forum"));
570+
FileOutputFormat.setOutputPath(jobForum, new Path(hadoopDir + "/sibEnd"));
571+
printProgress("Starting: Sorting update streams");
572+
jobForum.waitForCompletion(true);
573+
fs.delete(new Path(socialNetDir + "/temp_updateStream_" + i+"_"+j+"_forum"), false);
574+
fs.delete(new Path(hadoopDir + "/sibEnd"), true);
575+
576+
conf.setInt("mapred.line.input.format.linespermap", 1000000);
577+
conf.setInt("partitionId",j);
578+
conf.set("streamType","person");
579+
Job jobPerson = new Job(conf, "Soring update streams "+j+" of reducer "+i);
580+
jobPerson.setMapOutputKeyClass(LongWritable.class);
581+
jobPerson.setMapOutputValueClass(Text.class);
582+
jobPerson.setOutputKeyClass(LongWritable.class);
583+
jobPerson.setOutputValueClass(Text.class);
584+
jobPerson.setJarByClass(UpdateEventMapper.class);
585+
jobPerson.setMapperClass(UpdateEventMapper.class);
586+
jobPerson.setReducerClass(UpdateEventReducer.class);
587+
jobPerson.setNumReduceTasks(1);
588+
jobPerson.setInputFormatClass(SequenceFileInputFormat.class);
589+
jobPerson.setOutputFormatClass(SequenceFileOutputFormat.class);
590+
jobPerson.setPartitionerClass(UpdateEventPartitioner.class);
591+
FileInputFormat.addInputPath(jobPerson, new Path(socialNetDir + "/temp_updateStream_" + i+"_"+j+"_person"));
592+
FileOutputFormat.setOutputPath(jobPerson, new Path(hadoopDir + "/sibEnd"));
593+
printProgress("Starting: Sorting update streams");
594+
jobPerson.waitForCompletion(true);
595+
fs.delete(new Path(socialNetDir + "/temp_updateStream_" + i+"_"+j+"_person"), false);
596+
fs.delete(new Path(hadoopDir + "/sibEnd"), true);
597+
598+
if(conf.getBoolean("updateStreams",false)) {
599+
Properties properties = new Properties();
600+
properties.load(fs.open(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j + "_person.properties")));
601+
Long auxMin = Long.parseLong(properties.getProperty("min_write_event_start_time"));
602+
min = auxMin < min ? auxMin : min;
603+
Long auxMax = Long.parseLong(properties.getProperty("max_write_event_start_time"));
604+
max = auxMax > max ? auxMax : max;
605+
numEvents += Long.parseLong(properties.getProperty("num_events"));
606+
607+
properties.load(fs.open(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j + "_forum.properties")));
608+
609+
auxMin = Long.parseLong(properties.getProperty("min_write_event_start_time"));
610+
min = auxMin < min ? auxMin : min;
611+
auxMax = Long.parseLong(properties.getProperty("max_write_event_start_time"));
612+
max = auxMax > max ? auxMax : max;
613+
numEvents += Long.parseLong(properties.getProperty("num_events"));
614+
615+
fs.delete(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j + "_person.properties"),true);
616+
fs.delete(new Path(conf.get("outputDir") + "/social_network/updateStream_" + i + "_" + j + "_forum.properties"),true);
617+
}
618+
}
619+
}
620+
621+
if(conf.getBoolean("updateStreams",false)) {
622+
OutputStream output = fs.create(new Path(conf.get("outputDir") + "/social_network/updateStream.properties"));
623+
output.write(new String("ldbc.snb.interactive.gct_delta_duration:"+conf.get("deltaTime")+"\n").getBytes());
624+
output.write(new String("ldbc.snb.interactive.min_write_event_start_time:"+min+"\n").getBytes());
625+
output.write(new String("ldbc.snb.interactive.max_write_event_start_time:"+max+"\n").getBytes());
626+
output.write(new String("ldbc.snb.interactive.update_interleave:"+(max-min)/numEvents+"\n").getBytes());
627+
output.write(new String("ldbc.snb.interactive.num_events:"+numEvents).getBytes());
628+
output.close();
629+
}
630+
631+
632+
633+
printProgress("Starting: Materialize friends for substitution parameters");
634+
int resMaterializeFriends = job6.waitForCompletion(true) ? 0 : 1;
635+
fs.delete(new Path(hadoopDir + "/sibSorting3"),true);
636636

637637

638638
long end = System.currentTimeMillis();
639639
System.out.println(((end - start) / 1000)
640640
+ " total seconds");
641-
// for( int i = 0; i < numThreads; ++i ) {
642-
// fs.copyToLocalFile(new Path(socialNetDir + "/m"+i+"factors.txt"), new Path("./"));
643-
// fs.copyToLocalFile(new Path(socialNetDir + "/m0friendList"+i+".csv"), new Path("./"));
644-
// }
641+
for( int i = 0; i < numThreads; ++i ) {
642+
fs.copyToLocalFile(new Path(socialNetDir + "/m"+i+"factors.txt"), new Path("./"));
643+
fs.copyToLocalFile(new Path(socialNetDir + "/m0friendList"+i+".csv"), new Path("./"));
644+
}
645645
return res;
646646
}
647647

src/main/java/ldbc/socialnet/dbgen/generator/ScalableGenerator.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -794,10 +794,12 @@ public void generateUserActivity( ReducedUserProfile userProfile, Reducer<MapRed
794794
int nameCount = firstNameCount.containsKey(extraInfo.getFirstName())? firstNameCount.get(extraInfo.getFirstName()):0;
795795
firstNameCount.put(extraInfo.getFirstName(), nameCount+1);
796796
long init = System.currentTimeMillis();
797-
//generatePosts(uniformPostGenerator,reducedUserProfiles[index], extraInfo);
798-
//generatePosts(flashmobPostGenerator, reducedUserProfiles[index], extraInfo);
799-
//generatePhotos(reducedUserProfiles[index], extraInfo);
800-
//generateUserGroups(reducedUserProfiles[index], extraInfo);
797+
if(conf.getBoolean("activity",true)) {
798+
generatePosts(uniformPostGenerator, reducedUserProfiles[index], extraInfo);
799+
generatePosts(flashmobPostGenerator, reducedUserProfiles[index], extraInfo);
800+
generatePhotos(reducedUserProfiles[index], extraInfo);
801+
generateUserGroups(reducedUserProfiles[index], extraInfo);
802+
}
801803
if( numUserProfilesRead % 100 == 0) context.setStatus("Generated post and photo for "+numUserProfilesRead+" users");
802804
dataExporter.changePartition();
803805
}

src/main/java/ldbc/socialnet/dbgen/util/ConfigParser.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ public static Configuration GetConfig( String paramsFile ) {
1818
conf.set("serializer","csv");
1919
conf.set("compressed",Boolean.toString(false));
2020
conf.set("updateStreams",Boolean.toString(false));
21+
conf.set("activity",Boolean.toString(true));
2122
conf.set("outputDir","./");
2223
conf.set("deltaTime","10000");
2324
conf.set("numUpdatePartitions","1");
@@ -36,6 +37,7 @@ public static Configuration GetConfig( String paramsFile ) {
3637
CheckOption(conf, "numYears", properties);
3738
CheckOption(conf,"startYear",properties);
3839
CheckOption(conf,"numUpdatePartitions",properties);
40+
CheckOption(conf,"activity",properties);
3941
if(conf.get("fs.default.name").compareTo("file:///") == 0 ) {
4042
System.out.println("Running in standalone mode. Setting numThreads to 1");
4143
conf.set("numThreads","1");

0 commit comments

Comments
 (0)