Skip to content

Commit f83ea9d

Browse files
committed
Implemented clustering coefficient based edge generation. Working with one step
1 parent de06a6b commit f83ea9d

File tree

8 files changed

+471
-191
lines changed

8 files changed

+471
-191
lines changed

src/main/java/ldbc/snb/datagen/generator/ClusteringKnowsGenerator.java

Lines changed: 403 additions & 152 deletions
Large diffs are not rendered by default.

src/main/java/ldbc/snb/datagen/generator/DateGenerator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ public Long randomThirtyDaysSpan(Random random, Long from) {
170170
public long randomKnowsCreationDate(Random random, Person personA, Person personB) {
171171
long fromDate = Math.max(personA.creationDate(), personB.creationDate());
172172
long randomSpanMilis = (long) (random.nextDouble() * (THIRTY_DAYS));
173-
return (fromDate + randomSpanMilis);
173+
return Math.min(fromDate + randomSpanMilis, getEndDateTime());
174174
}
175175

176176
public long numberOfMonths(Person user) {

src/main/java/ldbc/snb/datagen/generator/DistanceKnowsGenerator.java

Lines changed: 4 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,17 @@ public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float>
2222
randomFarm.resetRandomGenerators(seed);
2323
for( int i = 0; i < persons.size(); ++i ) {
2424
Person p = persons.get(i);
25-
for( int j = i+1; ( target_edges(p, percentages, step_index) > p.knows().size() ) && ( j < persons.size() ); ++j ) {
25+
for( int j = i+1; ( Knows.target_edges(p, percentages, step_index) > p.knows().size() ) && ( j < persons.size() ); ++j ) {
2626
if( know(p, persons.get(j), j - i, percentages, step_index)) {
27-
createKnow(p, persons.get(j));
27+
Knows.createKnow(randomFarm.get(RandomGeneratorFarm.Aspect.DATE), p, persons.get(j));
2828
}
2929
}
3030
}
3131
}
3232

3333
boolean know( Person personA, Person personB, int dist, ArrayList<Float> percentages, int step_index ) {
34-
if( personA.knows().size() >= target_edges( personA, percentages, step_index) ||
35-
personB.knows().size() >= target_edges( personB, percentages, step_index) ) return false;
34+
if( personA.knows().size() >= Knows.target_edges( personA, percentages, step_index) ||
35+
personB.knows().size() >= Knows.target_edges( personB, percentages, step_index) ) return false;
3636
double randProb = randomFarm.get(RandomGeneratorFarm.Aspect.UNIFORM).nextDouble();
3737
double prob = Math.pow(DatagenParams.baseProbCorrelated, dist);
3838
if ((randProb < prob) || (randProb < DatagenParams.limitProCorrelated)) {
@@ -41,27 +41,5 @@ boolean know( Person personA, Person personB, int dist, ArrayList<Float> percent
4141
return false;
4242
}
4343

44-
void createKnow( Person personA, Person personB ) {
45-
long creationDate = Dictionaries.dates.randomKnowsCreationDate(
46-
randomFarm.get(RandomGeneratorFarm.Aspect.DATE),
47-
personA,
48-
personB);
49-
creationDate = creationDate - personA.creationDate() >= DatagenParams.deltaTime ? creationDate : creationDate + (DatagenParams.deltaTime - (creationDate - personA.creationDate()));
50-
creationDate = creationDate - personB.creationDate() >= DatagenParams.deltaTime ? creationDate : creationDate + (DatagenParams.deltaTime - (creationDate - personB.creationDate()));
51-
if( creationDate <= Dictionaries.dates.getEndDateTime() ) {
52-
float similarity = Person.Similarity(personA,personB);
53-
personB.knows().add(new Knows(personA, creationDate, similarity));
54-
personA.knows().add(new Knows(personB, creationDate, similarity));
55-
}
56-
}
5744

58-
long target_edges(Person person, ArrayList<Float> percentages, int step_index ) {
59-
int generated_edges = 0;
60-
for (int i = 0; i < step_index; ++i) {
61-
generated_edges += Math.ceil(percentages.get(i)*person.maxNumKnows());
62-
}
63-
generated_edges = Math.min(generated_edges, (int)person.maxNumKnows());
64-
int to_generate = Math.min( (int)person.maxNumKnows() - generated_edges, (int)Math.ceil(percentages.get(step_index)*person.maxNumKnows()));
65-
return to_generate;
66-
}
6745
}

src/main/java/ldbc/snb/datagen/generator/LDBCDatagen.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ public int runGenerateJob(Configuration conf) throws Exception {
7878
percentages.add(0.45f);
7979
percentages.add(0.1f);
8080

81+
8182
long start = System.currentTimeMillis();
8283
printProgress("Starting: Person generation");
8384
long startPerson = System.currentTimeMillis();
@@ -97,6 +98,7 @@ public int runGenerateJob(Configuration conf) throws Exception {
9798
knowsGenerator.run(hadoopPrefix+"/persons",hadoopPrefix+"/interestEdges");
9899
long endInterest = System.currentTimeMillis();
99100

101+
100102
printProgress("Creating random correlated edges");
101103
long startRandom= System.currentTimeMillis();
102104
knowsGenerator = new HadoopKnowsGenerator(conf,"ldbc.snb.datagen.hadoop.RandomKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", percentages, 2);
@@ -242,8 +244,8 @@ public int runGenerateJob(Configuration conf) throws Exception {
242244
+ " total seconds");
243245
System.out.println("Person generation time: "+((endPerson - startPerson) / 1000));
244246
System.out.println("University correlated edge generation time: "+((endUniversity - startUniversity) / 1000));
245-
System.out.println("Interest correlated edge generation time: "+((endInterest - startInterest) / 1000));
246-
System.out.println("Random correlated edge generation time: "+((endRandom - startRandom) / 1000));
247+
//System.out.println("Interest correlated edge generation time: "+((endInterest - startInterest) / 1000));
248+
//System.out.println("Random correlated edge generation time: "+((endRandom - startRandom) / 1000));
247249
System.out.println("Edges merge time: "+((endMerge - startMerge) / 1000));
248250
System.out.println("Person serialization time: "+((endPersonSerializing - startPersonSerializing) / 1000));
249251
System.out.println("Person activity generation and serialization time: "+((endPersonActivity - startPersonActivity) / 1000));

src/main/java/ldbc/snb/datagen/hadoop/HadoopKnowsGenerator.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ public static class HadoopKnowsGeneratorReducer extends Reducer<BlockKey, Perso
3333
private int step_index;
3434

3535
protected void setup(Context context) {
36-
this.knowsGenerator = new DistanceKnowsGenerator();
37-
// this.knowsGenerator = new ClusteringKnowsGenerator();
36+
//this.knowsGenerator = new DistanceKnowsGenerator();
37+
this.knowsGenerator = new ClusteringKnowsGenerator();
3838
this.percentages = new ArrayList<Float>();
3939
this.conf = context.getConfiguration();
4040
this.step_index = conf.getInt("stepIndex",0);

src/main/java/ldbc/snb/datagen/hadoop/HadoopMergeFriendshipFiles.java

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,15 @@
1515

1616
import java.io.IOException;
1717
import java.util.ArrayList;
18+
import java.util.Collections;
19+
import java.util.Comparator;
1820

1921
/**
2022
* Created by aprat on 29/07/15.
2123
*/
2224
public class HadoopMergeFriendshipFiles {
2325

26+
2427
public static class HadoopMergeFriendshipFilesReducer extends Reducer<TupleKey, Person, TupleKey, Person> {
2528

2629
private Configuration conf;
@@ -40,18 +43,32 @@ protected void setup(Context context) {
4043
public void reduce(TupleKey key, Iterable<Person> valueSet,Context context)
4144
throws IOException, InterruptedException {
4245

46+
ArrayList<Knows> knows = new ArrayList<Knows>();
4347
Person person = null;
4448
int index = 0;
4549
for ( Person p : valueSet) {
4650
if( index == 0 ) {
4751
person = new Person(p);
48-
} else {
49-
for ( Knows k : p.knows() ) {
50-
person.knows().add(k);
51-
}
52+
}
53+
for(Knows k : p.knows()) {
54+
knows.add(k);
5255
}
5356
index++;
5457
}
58+
person.knows().clear();
59+
Collections.sort(knows);
60+
if(knows.size() > 0 ) {
61+
long currentTo = knows.get(0).to().accountId();
62+
person.knows().add(knows.get(0));
63+
for (index = 1; index < knows.size(); ++index) {
64+
Knows nextKnows = knows.get(index);
65+
if(currentTo != knows.get(index).to().accountId()) {
66+
person.knows().add(nextKnows);
67+
currentTo = nextKnows.to().accountId();
68+
}
69+
}
70+
}
71+
5572
//System.out.println("Num persons "+index);
5673
context.write(keySetter.getKey(person),person);
5774
}

src/main/java/ldbc/snb/datagen/objects/Knows.java

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,16 @@
3636
*/
3737
package ldbc.snb.datagen.objects;
3838

39+
import ldbc.snb.datagen.dictionary.Dictionaries;
40+
import ldbc.snb.datagen.generator.DatagenParams;
41+
import ldbc.snb.datagen.util.RandomGeneratorFarm;
3942
import org.apache.hadoop.io.Writable;
4043

4144
import java.io.DataInput;
4245
import java.io.DataOutput;
4346
import java.io.IOException;
44-
47+
import java.util.ArrayList;
48+
import java.util.Random;
4549

4650

4751
public class Knows implements Writable, Comparable<Knows> {
@@ -103,11 +107,38 @@ public void write(DataOutput arg0) throws IOException {
103107
}
104108

105109
public int compareTo(Knows k) {
106-
int res = (int)(to_.accountId() - k.to().accountId());
107-
if( res != 0 ) return res;
110+
long res = (to_.accountId() - k.to().accountId());
111+
if( res > 0 ) return 1;
112+
if( res < 0 ) return -1;
108113
long res2 = creationDate_ - k.creationDate();
109114
if( res2 > 0 ) return 1;
110115
if( res2 < 0 ) return -1;
111116
return 0;
112117
}
118+
119+
public static int num = 0;
120+
121+
public static void createKnow( Random random, Person personA, Person personB ) {
122+
long creationDate = Dictionaries.dates.randomKnowsCreationDate(
123+
random,
124+
personA,
125+
personB);
126+
creationDate = creationDate - personA.creationDate() >= DatagenParams.deltaTime ? creationDate : creationDate + (DatagenParams.deltaTime - (creationDate - personA.creationDate()));
127+
creationDate = creationDate - personB.creationDate() >= DatagenParams.deltaTime ? creationDate : creationDate + (DatagenParams.deltaTime - (creationDate - personB.creationDate()));
128+
if( creationDate <= Dictionaries.dates.getEndDateTime() ) {
129+
float similarity = Person.Similarity(personA,personB);
130+
personB.knows().add(new Knows(personA, creationDate, similarity));
131+
personA.knows().add(new Knows(personB, creationDate, similarity));
132+
}
133+
}
134+
135+
public static long target_edges(Person person, ArrayList<Float> percentages, int step_index ) {
136+
int generated_edges = 0;
137+
for (int i = 0; i < step_index; ++i) {
138+
generated_edges += Math.ceil(percentages.get(i)*person.maxNumKnows());
139+
}
140+
generated_edges = Math.min(generated_edges, (int)person.maxNumKnows());
141+
int to_generate = Math.min( (int)person.maxNumKnows() - generated_edges, (int)Math.ceil(percentages.get(step_index)*person.maxNumKnows()));
142+
return to_generate;
143+
}
113144
}

src/main/java/ldbc/snb/datagen/serializer/graphalytics/CSVPersonSerializer.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
public class CSVPersonSerializer extends PersonSerializer {
5252

5353
private HDFSCSVWriter [] writers;
54+
private String separator_ = " ";
5455

5556
private enum FileNames {
5657
PERSON_KNOWS_PERSON("person_knows_person");
@@ -72,7 +73,7 @@ public void initialize(Configuration conf, int reducerId) {
7273
int numFiles = FileNames.values().length;
7374
writers = new HDFSCSVWriter[numFiles];
7475
for( int i = 0; i < numFiles; ++i) {
75-
writers[i] = new HDFSCSVWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"), FileNames.values()[i].toString()+"_"+reducerId,conf.getInt("ldbc.snb.datagen.numPartitions",1),conf.getBoolean("ldbc.snb.datagen.serializer.compressed",false),"|", false);
76+
writers[i] = new HDFSCSVWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"), FileNames.values()[i].toString()+"_"+reducerId,conf.getInt("ldbc.snb.datagen.numPartitions",1),conf.getBoolean("ldbc.snb.datagen.serializer.compressed",false),separator_, false);
7677
}
7778

7879
ArrayList<String> arguments = new ArrayList<String>();

0 commit comments

Comments
 (0)