Skip to content

Commit b65842a

Browse files
committed
Fixed bug in bucketizing function from histogram
1 parent 96f57a2 commit b65842a

File tree

5 files changed

+19
-27
lines changed

5 files changed

+19
-27
lines changed

src/main/java/ldbc/snb/datagen/generator/distribution/AltmannDistribution.java

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
public class AltmannDistribution extends BucketedDistribution {
1414

15-
private int NUM_BUCKETS_ = 100;
15+
private int NUM_BUCKETS_ = 10000;
1616
private int POPULATION_ = 10000;
1717
private double normalization_factor_ = 0.0;
1818
private double GAMMA_ = 0.4577;
@@ -21,14 +21,9 @@ public class AltmannDistribution extends BucketedDistribution {
2121
private ArrayList<Bucket> buckets_ = null;
2222

2323
@Override
24-
public ArrayList<Bucket> getBuckets() {
25-
return buckets_;
26-
}
27-
28-
@Override
29-
public void initialize(Configuration conf) {
30-
throw new UnsupportedOperationException("Distribution not implemented");
31-
/*POPULATION_ = DatagenParams.numPersons;
24+
public ArrayList<Bucket> getBuckets() {
25+
//throw new UnsupportedOperationException("Distribution not implemented");
26+
POPULATION_ = DatagenParams.numPersons;
3227
for( int i = 1; i <= POPULATION_; ++i ) {
3328
normalization_factor_+= Math.pow(i,-GAMMA_)*Math.exp(-DELTA_*i);
3429
}
@@ -40,9 +35,10 @@ public void initialize(Configuration conf) {
4035
double scale_factor = DatagenParams.numPersons / POPULATION_;
4136
buckets_ = Bucket.bucketizeHistogram(histogram, NUM_BUCKETS_);
4237
for( Bucket e : buckets_) {
38+
System.out.println((e.min()*scale_factor)+" "+(e.max()*scale_factor));
4339
e.max(e.max()*scale_factor);
4440
e.min(e.min()*scale_factor);
4541
}
46-
*/
42+
return buckets_;
4743
}
4844
}

src/main/java/ldbc/snb/datagen/generator/distribution/WeibullDistribution.java

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,6 @@ public class WeibullDistribution extends BucketedDistribution {
2020

2121
@Override
2222
public ArrayList<Bucket> getBuckets() {
23-
return buckets_;
24-
}
25-
26-
@Override
27-
public void initialize(Configuration conf) {
2823
weibull_ = new org.apache.commons.math3.distribution.WeibullDistribution(LAMBDA_,K_);
2924

3025
ArrayList<Double> histogram = new ArrayList<Double>();
@@ -34,5 +29,6 @@ public void initialize(Configuration conf) {
3429
}
3530

3631
buckets_ = Bucket.bucketizeHistogram(histogram,100);
32+
return buckets_;
3733
}
3834
}

src/main/java/ldbc/snb/datagen/generator/distribution/ZetaDistribution.java renamed to src/main/java/ldbc/snb/datagen/generator/distribution/ZipfDistribution.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,20 @@
11
package ldbc.snb.datagen.generator.distribution;
22

33
import ldbc.snb.datagen.generator.DatagenParams;
4-
import ldbc.snb.datagen.generator.distribution.utils.Bucket;
5-
import ldbc.snb.datagen.generator.distribution.utils.BucketedDistribution;
6-
import org.apache.commons.math3.distribution.ZipfDistribution;
74
import org.apache.hadoop.conf.Configuration;
85

96
import java.util.ArrayList;
107

118
/**
129
* Created by aprat on 5/03/15.
1310
*/
14-
public class ZetaDistribution implements DegreeDistribution {
11+
public class ZipfDistribution implements DegreeDistribution {
1512

16-
private ZipfDistribution zipf_;
13+
private org.apache.commons.math3.distribution.ZipfDistribution zipf_;
1714
private double ALPHA_ = 1.7;
1815

1916
public void initialize(Configuration conf) {
20-
zipf_ = new ZipfDistribution(DatagenParams.numPersons, ALPHA_);
17+
zipf_ = new org.apache.commons.math3.distribution.ZipfDistribution(10000, ALPHA_);
2118
}
2219

2320
public void reset (long seed) {

src/main/java/ldbc/snb/datagen/generator/distribution/utils/Bucket.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,13 @@ public static ArrayList<Bucket> bucketizeHistogram(ArrayList<Double> histogram,
1414
for( Double d : histogram ) {
1515
population+=d;
1616
}
17-
int percentile_size = population.intValue() / num_buckets;
17+
double percentile = 1 / (double)num_buckets;
1818
int current_histogram_index = 0;
1919
int current_histogram_counter = histogram.get(current_histogram_index).intValue();
2020
for( int i = 0; i < num_buckets; ++i ) {
21-
double min = population.intValue();
21+
double min = population;
2222
double max = 0;
23-
for( int j = 0; j < percentile_size; ++j ) {
23+
for( int j = 0; j/(double)population < percentile; ++j ) {
2424
min = min > (current_histogram_index+1) ? (current_histogram_index+1) : min;
2525
max = max < (current_histogram_index+1) ? (current_histogram_index+1) : max;
2626
if(--current_histogram_counter <= 0) {

src/main/java/ldbc/snb/datagen/generator/distribution/utils/BucketedDistribution.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,11 @@ public void reset(long seed) {
3737
public long nextDegree() {
3838
int idx = randomPercentile_.nextInt(buckets_.size());
3939
int minRange = (int)(buckets_.get(idx).min());
40-
int maxRange = (int)(Math.floor(buckets_.get(idx).max()));
41-
if( maxRange < minRange ) maxRange = minRange;
42-
return randomDegree_.get(idx).nextInt( maxRange - minRange + 1) + minRange;
40+
int maxRange = (int)(buckets_.get(idx).max());
41+
if( maxRange < minRange ) {
42+
maxRange = minRange;
43+
}
44+
long ret= randomDegree_.get(idx).nextInt( maxRange - minRange + 1) + minRange;
45+
return ret;
4346
}
4447
}

0 commit comments

Comments
 (0)