Skip to content

Commit f46c7d7

Browse files
committed
Fixed error with distributions not being properly generated
1 parent cec5323 commit f46c7d7

File tree

5 files changed

+46
-25
lines changed

5 files changed

+46
-25
lines changed

src/main/java/ldbc/snb/datagen/generator/ClusteringKnowsGenerator.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,13 @@ public int compareTo(PersonTuple b) {
7272
}
7373
}
7474

75-
public void generateKnows( ArrayList<Person> persons, int seed, float upperBound, boolean firstStep ) {
75+
public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float> percentages, int step_index ) {
7676
Random random = new Random();
7777
Map<Long, Integer> personPosition = new HashMap<Long, Integer>();
7878
for( int i = 0; i < persons.size(); ++i ) {
7979
personPosition.put(persons.get(i).accountId(), i);
8080
}
81-
distanceKnowsGenerator_.generateKnows(persons,seed,upperBound, firstStep);
81+
distanceKnowsGenerator_.generateKnows(persons,seed, percentages, step_index);
8282
PersonGraph bestGraph = new PersonGraph(persons);
8383
MinHash minHash = new MinHash(numMinHashes_, 0);
8484
double bestCC = GraphUtils.ClusteringCoefficient(bestGraph);

src/main/java/ldbc/snb/datagen/generator/DistanceKnowsGenerator.java

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,21 @@ public DistanceKnowsGenerator() {
1818
this.randomFarm = new RandomGeneratorFarm();
1919
}
2020

21-
public void generateKnows( ArrayList<Person> persons, int seed, float percentage, boolean firstStep ) {
21+
public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float> percentages, int step_index ) {
2222
randomFarm.resetRandomGenerators(seed);
2323
for( int i = 0; i < persons.size(); ++i ) {
2424
Person p = persons.get(i);
25-
for( int j = i+1; ( target_edges(p, percentage, firstStep) > p.knows().size() ) && ( j < persons.size() ); ++j ) {
26-
if( know(p, persons.get(j), j - i, percentage, firstStep)) {
25+
for( int j = i+1; ( target_edges(p, percentages, step_index) > p.knows().size() ) && ( j < persons.size() ); ++j ) {
26+
if( know(p, persons.get(j), j - i, percentages, step_index)) {
2727
createKnow(p, persons.get(j));
2828
}
2929
}
3030
}
3131
}
3232

33-
boolean know( Person personA, Person personB, int dist, float percentage, boolean firstStep ) {
34-
if((float)(personA.knows().size()) >= target_edges(personA,percentage, firstStep) ||
35-
personB.knows().size() >= target_edges(personB,percentage, firstStep) ) return false;
33+
boolean know( Person personA, Person personB, int dist, ArrayList<Float> percentages, int step_index ) {
34+
if( personA.knows().size() >= target_edges( personA, percentages, step_index) ||
35+
personB.knows().size() >= target_edges( personB, percentages, step_index) ) return false;
3636
double randProb = randomFarm.get(RandomGeneratorFarm.Aspect.UNIFORM).nextDouble();
3737
double prob = Math.pow(DatagenParams.baseProbCorrelated, dist);
3838
if ((randProb < prob) || (randProb < DatagenParams.limitProCorrelated)) {
@@ -55,11 +55,13 @@ void createKnow( Person personA, Person personB ) {
5555
}
5656
}
5757

58-
long target_edges(Person person, float percentage, boolean firstStep) {
59-
long max = (long) (person.maxNumKnows() * percentage);
60-
if(max == 0 && firstStep ) {
61-
return person.maxNumKnows();
58+
long target_edges(Person person, ArrayList<Float> percentages, int step_index ) {
59+
int generated_edges = 0;
60+
for (int i = 0; i < step_index; ++i) {
61+
generated_edges += Math.ceil(percentages.get(i)*person.maxNumKnows());
6262
}
63-
return max;
63+
generated_edges = Math.min(generated_edges, (int)person.maxNumKnows());
64+
int to_generate = Math.min( (int)person.maxNumKnows() - generated_edges, (int)Math.ceil(percentages.get(step_index)*person.maxNumKnows()));
65+
return to_generate;
6466
}
6567
}

src/main/java/ldbc/snb/datagen/generator/KnowsGenerator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@
88
* Created by aprat on 11/06/15.
99
*/
1010
public interface KnowsGenerator {
11-
public void generateKnows( ArrayList<Person> persons, int seed, float upperBound, boolean firstStep );
11+
public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float> percentages, int step_index );
1212
}

src/main/java/ldbc/snb/datagen/generator/LDBCDatagen.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ public int runGenerateJob(Configuration conf) throws Exception {
7373

7474
String hadoopPrefix = conf.get("ldbc.snb.datagen.serializer.hadoopDir");
7575
FileSystem fs = FileSystem.get(conf);
76+
ArrayList<Float> percentages = new ArrayList<Float>();
77+
percentages.add(0.45f);
78+
percentages.add(0.45f);
79+
percentages.add(0.1f);
7680

7781
long start = System.currentTimeMillis();
7882
printProgress("Starting: Person generation");
@@ -83,19 +87,19 @@ public int runGenerateJob(Configuration conf) throws Exception {
8387

8488
printProgress("Creating university location correlated edges");
8589
long startUniversity = System.currentTimeMillis();
86-
HadoopKnowsGenerator knowsGenerator = new HadoopKnowsGenerator(conf,"ldbc.snb.datagen.hadoop.UniversityKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", 0.45f,true);
90+
HadoopKnowsGenerator knowsGenerator = new HadoopKnowsGenerator(conf,"ldbc.snb.datagen.hadoop.UniversityKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", percentages, 0);
8791
knowsGenerator.run(hadoopPrefix+"/persons",hadoopPrefix+"/universityEdges");
8892
long endUniversity = System.currentTimeMillis();
8993

9094
printProgress("Creating main interest correlated edges");
9195
long startInterest= System.currentTimeMillis();
92-
knowsGenerator = new HadoopKnowsGenerator(conf,"ldbc.snb.datagen.hadoop.InterestKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", 0.45f,false);
96+
knowsGenerator = new HadoopKnowsGenerator(conf,"ldbc.snb.datagen.hadoop.InterestKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", percentages, 1);
9397
knowsGenerator.run(hadoopPrefix+"/persons",hadoopPrefix+"/interestEdges");
9498
long endInterest = System.currentTimeMillis();
9599

96100
printProgress("Creating random correlated edges");
97101
long startRandom= System.currentTimeMillis();
98-
knowsGenerator = new HadoopKnowsGenerator(conf,"ldbc.snb.datagen.hadoop.RandomKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", 0.1f,false);
102+
knowsGenerator = new HadoopKnowsGenerator(conf,"ldbc.snb.datagen.hadoop.RandomKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", percentages, 2);
99103
knowsGenerator.run(hadoopPrefix+"/persons",hadoopPrefix+"/randomEdges");
100104
long endRandom= System.currentTimeMillis();
101105

src/main/java/ldbc/snb/datagen/hadoop/HadoopKnowsGenerator.java

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,22 @@ public static class HadoopKnowsGeneratorReducer extends Reducer<BlockKey, Perso
2828
private KnowsGenerator knowsGenerator; /** The person serializer **/
2929
private Configuration conf;
3030
private HadoopFileKeyChanger.KeySetter<TupleKey> keySetter = null;
31+
private ArrayList<Float> percentages;
32+
private int step_index;
3133

3234
protected void setup(Context context) {
3335
this.knowsGenerator = new DistanceKnowsGenerator();
3436
// this.knowsGenerator = new ClusteringKnowsGenerator();
37+
this.percentages = new ArrayList<Float>();
3538
this.conf = context.getConfiguration();
39+
this.step_index = conf.getInt("stepIndex",0);
40+
float p = conf.getFloat("percentage0",0.0f);
41+
int index = 1;
42+
while(p != 0.0f) {
43+
this.percentages.add(p);
44+
p = conf.getFloat("percentage"+index,0.0f);
45+
++index;
46+
}
3647
try {
3748
this.keySetter = (HadoopFileKeyChanger.KeySetter) Class.forName(conf.get("postKeySetterName")).newInstance();
3849
}catch(Exception e) {
@@ -48,26 +59,26 @@ public void reduce(BlockKey key, Iterable<Person> valueSet,Context context)
4859
for( Person p : valueSet ) {
4960
persons.add(new Person(p));
5061
}
51-
this.knowsGenerator.generateKnows(persons, (int)key.block, conf.getFloat("upperBound", 0.1f), conf.getBoolean("firstStep",false));
62+
this.knowsGenerator.generateKnows(persons, (int)key.block, percentages, step_index);
5263
for( Person p : persons ) {
5364
context.write(keySetter.getKey(p), p);
5465
}
5566
}
5667
}
5768

5869
private Configuration conf;
59-
private double upperBound;
6070
private String preKeySetterName;
6171
private String postKeySetterName;
62-
private boolean firstStep;
72+
private ArrayList<Float> percentages;
73+
private int step_index;
6374

6475

65-
public HadoopKnowsGenerator( Configuration conf, String preKeySetterName, String postKeySetterName, float upperBound, boolean firstStep ) {
76+
public HadoopKnowsGenerator( Configuration conf, String preKeySetterName, String postKeySetterName, ArrayList<Float> percentages, int step_index ) {
6677
this.conf = conf;
67-
this.upperBound = upperBound;
6878
this.preKeySetterName = preKeySetterName;
6979
this.postKeySetterName = postKeySetterName;
70-
this.firstStep = firstStep;
80+
this.percentages = percentages;
81+
this.step_index = step_index;
7182
}
7283

7384
public void run( String inputFileName, String outputFileName ) throws Exception {
@@ -95,8 +106,12 @@ public void run( String inputFileName, String outputFileName ) throws Exception
95106
}
96107
System.out.println("... Time to rank persons: "+ (System.currentTimeMillis() - start)+" ms");
97108

98-
conf.set("upperBound",Double.toString(upperBound));
99-
conf.setBoolean("firstStep",firstStep);
109+
conf.setInt("stepIndex",step_index);
110+
int index = 0;
111+
for( float p : percentages ) {
112+
conf.setFloat("percentage"+index, p);
113+
++index;
114+
}
100115
conf.set("postKeySetterName",postKeySetterName);
101116
int numThreads = Integer.parseInt(conf.get("ldbc.snb.datagen.generator.numThreads"));
102117
Job job = Job.getInstance(conf, "Knows generator");

0 commit comments

Comments
 (0)