Skip to content

Commit b9ffa10

Browse files
committed
Added DiscreteWeibullDistribution
Added AltmannDistribution Added MOEZipfDistribution Added graphalytics.1000 scale factor
1 parent 98df6d5 commit b9ffa10

File tree

10 files changed

+141
-81
lines changed

10 files changed

+141
-81
lines changed

src/main/java/ldbc/snb/datagen/generator/distribution/AltmannDistribution.java

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,35 +10,32 @@
1010
/**
1111
* Created by aprat on 26/02/15.
1212
*/
13-
public class AltmannDistribution extends BucketedDistribution {
13+
public class AltmannDistribution extends CumulativeBasedDegreeDistribution {
1414

15-
private int NUM_BUCKETS_ = 10000;
1615
private int POPULATION_ = 10000;
1716
private double normalization_factor_ = 0.0;
18-
private double GAMMA_ = 0.4577;
19-
private double DELTA_ = 0.0162;
17+
private double ALPHA_ = 0.4577;
18+
private double BETA_ = 0.0162;
2019

21-
private ArrayList<Bucket> buckets_ = null;
2220

23-
@Override
24-
public ArrayList<Bucket> getBuckets() {
21+
public ArrayList<CumulativeEntry> cumulativeProbability( Configuration conf ) {
2522
//throw new UnsupportedOperationException("Distribution not implemented");
23+
ALPHA_ = conf.getDouble("ldbc.snb.datagen.generator.distribution.AltmannDistribution.alpha",ALPHA_);
24+
BETA_ = conf.getDouble("ldbc.snb.datagen.generator.distribution.AltmannDistribution.beta",BETA_);
25+
2626
POPULATION_ = DatagenParams.numPersons;
2727
for( int i = 1; i <= POPULATION_; ++i ) {
28-
normalization_factor_+= Math.pow(i,-GAMMA_)*Math.exp(-DELTA_*i);
28+
normalization_factor_+= Math.pow(i,-ALPHA_)*Math.exp(-BETA_*i);
2929
}
30-
ArrayList<Double> histogram = new ArrayList<Double>();
30+
ArrayList<CumulativeEntry> cumulative = new ArrayList<CumulativeEntry>();
3131
for( int i = 1; i <= POPULATION_; ++i) {
32-
histogram.add(POPULATION_*Math.pow(i,-GAMMA_)*Math.exp(-DELTA_*i) / normalization_factor_);
33-
}
34-
35-
double scale_factor = DatagenParams.numPersons / POPULATION_;
36-
buckets_ = Bucket.bucketizeHistogram(histogram, NUM_BUCKETS_);
37-
for( Bucket e : buckets_) {
38-
System.out.println((e.min()*scale_factor)+" "+(e.max()*scale_factor));
39-
e.max(e.max()*scale_factor);
40-
e.min(e.min()*scale_factor);
32+
double prob = Math.pow(i,-ALPHA_)*Math.exp(-BETA_*i) / normalization_factor_;
33+
prob += cumulative.size() > 0 ? cumulative.get(i-2).prob_ : 0.0;
34+
CumulativeEntry entry = new CumulativeEntry();
35+
entry.prob_ = prob;
36+
entry.value_ = i;
37+
cumulative.add(entry);
4138
}
42-
return buckets_;
39+
return cumulative;
4340
}
4441
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
package ldbc.snb.datagen.generator.distribution;
2+
3+
import org.apache.hadoop.conf.Configuration;
4+
5+
import java.util.ArrayList;
6+
import java.util.Random;
7+
8+
/**
9+
* Created by aprat on 12/05/15.
10+
*/
11+
public abstract class CumulativeBasedDegreeDistribution implements DegreeDistribution{
12+
13+
public class CumulativeEntry {
14+
double prob_;
15+
int value_;
16+
}
17+
18+
private ArrayList<CumulativeEntry> cumulativeProbability_;
19+
private Random random_;
20+
21+
public void initialize( Configuration conf ) {
22+
cumulativeProbability_ = cumulativeProbability( conf );
23+
random_ = new Random();
24+
}
25+
26+
public void reset (long seed){
27+
random_.setSeed(seed);
28+
}
29+
30+
public long nextDegree() {
31+
double prob = random_.nextDouble();
32+
int index = binarySearch(cumulativeProbability_,prob);
33+
return cumulativeProbability_.get(index).value_;
34+
}
35+
36+
private int binarySearch( ArrayList<CumulativeEntry> cumulative, double prob ) {
37+
int upperBound = cumulative.size()-1;
38+
int lowerBound = 0;
39+
int midPoint = (upperBound + lowerBound) / 2;
40+
while (upperBound > (lowerBound+1)){
41+
if (cumulative.get(midPoint).prob_ > prob ){
42+
upperBound = midPoint;
43+
} else{
44+
lowerBound = midPoint;
45+
}
46+
midPoint = (upperBound + lowerBound) / 2;
47+
}
48+
return midPoint;
49+
}
50+
51+
public abstract ArrayList<CumulativeEntry> cumulativeProbability( Configuration conf );
52+
}

src/main/java/ldbc/snb/datagen/generator/distribution/DiscreteWeibullDistribution.java

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,28 +11,27 @@
1111
/**
1212
* Created by aprat on 5/03/15.
1313
*/
14-
public class DiscreteWeibullDistribution extends BucketedDistribution {
15-
16-
private ArrayList<Bucket> buckets_;
17-
private double BETA_ = 0.7787;
18-
private double P_ = 0.07;
19-
20-
@Override
21-
public ArrayList<Bucket> getBuckets() {
22-
23-
ArrayList<Double> histogram = new ArrayList<Double>();
24-
for( int i = 1; i <= DatagenParams.numPersons; ++i ) {
25-
double prob = Math.pow(1.0-P_,Math.pow(i,BETA_))-Math.pow((1.0-P_),Math.pow(i+1,BETA_));
26-
histogram.add(DatagenParams.numPersons * prob);
27-
//System.out.println(DatagenParams.numPersons * prob);
28-
}
29-
30-
buckets_ = Bucket.bucketizeHistogram(histogram,10000);
31-
32-
/*for( Bucket e : buckets_) {
33-
System.out.println((e.min())+" "+e.max());
14+
public class DiscreteWeibullDistribution extends CumulativeBasedDegreeDistribution {
15+
16+
//private double BETA_ = 0.7787;
17+
//private double BETA_ = 0.8211;
18+
private double BETA_ = 0.8505;
19+
//private double P_ = 0.062;
20+
//private double P_ = 0.0448;
21+
private double P_ = 0.0205;
22+
23+
public ArrayList<CumulativeEntry> cumulativeProbability( Configuration conf ) {
24+
BETA_ = conf.getDouble("ldbc.snb.datagen.generator.distribution.DiscreteWeibullDistribution.beta",BETA_);
25+
P_ = conf.getDouble("ldbc.snb.datagen.generator.distribution.DiscreteWeibullDistribution.p",P_);
26+
ArrayList<CumulativeEntry> cumulative = new ArrayList<CumulativeEntry>();
27+
for( int i = 0; i < DatagenParams.numPersons; ++i ) {
28+
//double prob = Math.pow(1.0-P_,Math.pow(i,BETA_))-Math.pow((1.0-P_),Math.pow(i+1,BETA_));
29+
double prob = 1.0-Math.pow((1.0-P_),Math.pow(i+1,BETA_));
30+
CumulativeEntry entry = new CumulativeEntry();
31+
entry.prob_ = prob;
32+
entry.value_ = i+1;
33+
cumulative.add(entry);
3434
}
35-
*/
36-
return buckets_;
35+
return cumulative;
3736
}
3837
}

src/main/java/ldbc/snb/datagen/generator/distribution/GeoDistribution.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ public class GeoDistribution implements DegreeDistribution {
1818
private double ALPHA_ = 0.12;
1919

2020
public void initialize(Configuration conf) {
21+
ALPHA_ = conf.getDouble("ldbc.snb.datagen.generator.distribution.GeoDistribution.alpha",ALPHA_);
2122
geo_ = new GeometricDistribution(ALPHA_);
2223
}
2324

src/main/java/ldbc/snb/datagen/generator/distribution/MOEZipfDistribution.java

Lines changed: 20 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -6,53 +6,35 @@
66
import org.apache.hadoop.conf.Configuration;
77

88
import java.util.ArrayList;
9+
import java.util.Random;
910

1011
/**
1112
* Created by aprat on 4/03/15.
1213
*/
13-
public class MOEZipfDistribution extends BucketedDistribution {
14+
public class MOEZipfDistribution implements DegreeDistribution {
1415

15-
private int NUM_BUCKETS_ = 100;
16-
private int POPULATION_ = 10000;
17-
private double normalization_factor_1 = 0.0;
18-
private double normalization_factor_2 = 0.0;
19-
private double ALFA_ = 2.2767;
20-
private double BETA_ = 4.8613;
16+
private org.apache.commons.math3.distribution.ZipfDistribution zipf_;
17+
private double ALPHA_ = 1.7;
18+
private double DELTA_ = 1.5;
19+
private Random random_;
2120

22-
private ArrayList<Bucket> buckets_ = null;
21+
public void initialize(Configuration conf) {
22+
ALPHA_ = conf.getDouble("ldbc.snb.datagen.generator.distribution.MOEZipfDistribution.alpha",ALPHA_);
23+
DELTA_ = conf.getDouble("ldbc.snb.datagen.generator.distribution.MOEZipfDistribution.delta",DELTA_);
24+
zipf_ = new org.apache.commons.math3.distribution.ZipfDistribution(10000, ALPHA_);
25+
random_ = new Random();
26+
}
2327

24-
@Override
25-
public ArrayList<Bucket> getBuckets() {
26-
return buckets_;
28+
public void reset (long seed) {
29+
zipf_.reseedRandomGenerator(seed);
2730
}
2831

29-
@Override
30-
public void initialize(Configuration conf) {
31-
throw new UnsupportedOperationException("Distribution not implemented");
32-
/* POPULATION_ = DatagenParams.numPersons;
33-
for( int i = 1; i <= POPULATION_; ++i ) {
34-
normalization_factor_1+= Math.pow(i,-ALFA_);
35-
normalization_factor_2+= Math.pow(i,-(ALFA_+1));
36-
}
37-
ArrayList<Double> histogram = new ArrayList<Double>(POPULATION_);
38-
for( int i = 0; i< POPULATION_; ++i) {
39-
histogram.add(0.0);
40-
}
41-
42-
Double hurwitz = 0.0;
43-
for( int i = POPULATION_; i > 0; --i) {
44-
hurwitz+= Math.pow(i+1,-ALFA_);
45-
histogram.set(i-1, POPULATION_ * (1.0 - (BETA_ * hurwitz / (normalization_factor_1 - (1.0 - BETA_) * normalization_factor_2))));
46-
// System.out.println(histogram.get(i-1));
47-
}
48-
49-
double scale_factor = DatagenParams.numPersons / POPULATION_;
50-
buckets_ = Bucket.bucketizeHistogram(histogram, NUM_BUCKETS_);
51-
for( Bucket e : buckets_) {
52-
e.max(e.max()*scale_factor);
53-
e.min(e.min()*scale_factor);
54-
}
55-
*/
32+
public long nextDegree(){
33+
double prob = random_.nextDouble();
34+
double prime = (prob * DELTA_)/(1 + prob*(DELTA_ - 1));
35+
long ret = zipf_.inverseCumulativeProbability(prime);
36+
return ret;
5637
}
38+
5739
}
5840

src/main/java/ldbc/snb/datagen/generator/distribution/ZipfDistribution.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ public class ZipfDistribution implements DegreeDistribution {
1414
private double ALPHA_ = 1.7;
1515

1616
public void initialize(Configuration conf) {
17+
ALPHA_ = conf.getDouble("ldbc.snb.datagen.generator.distribution.MOEZipfDistribution.alpha",ALPHA_);
1718
zipf_ = new org.apache.commons.math3.distribution.ZipfDistribution(10000, ALPHA_);
1819
}
1920

src/main/java/ldbc/snb/datagen/hadoop/HadoopBlockMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ public class HadoopBlockMapper extends Mapper<LongWritable, Person, BlockKey, Pe
1818
public void setup(Mapper.Context context) {
1919
Configuration conf = context.getConfiguration();
2020
mapId = context.getTaskAttemptID().getId();
21-
blockSize = conf.getInt("ldbc.snb.datagen.generator.blockSize", 10000);
21+
blockSize = conf.getInt("ldbc.snb.datagen.generator.blockSize", 10000);
2222
}
2323

2424
@Override

src/main/java/ldbc/snb/datagen/hadoop/RandomKeySetter.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ public class RandomKeySetter implements HadoopFileKeyChanger.KeySetter<TupleKey>
1010
public TupleKey getKey(Object object ) {
1111
Person person = (Person)object;
1212
return new TupleKey(person.randomId(),person.accountId());
13+
// return new TupleKey(person.maxNumKnows(),person.accountId());
1314
}
1415
}
1516

src/main/java/ldbc/snb/datagen/serializer/PersonSerializer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ public PersonSerializer() {
1818

1919
}
2020

21-
2221
public void export(Person person) {
22+
// System.out.println(person.maxNumKnows());
2323

2424
serialize(person);
2525

src/main/resources/scale_factors.xml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,4 +240,31 @@
240240
<value>false</value>
241241
</property>
242242
</scale_factor>
243+
244+
<scale_factor name="graphalytics.1000" >
245+
<property>
246+
<name>ldbc.snb.datagen.generator.numPersons</name>
247+
<value>14400000</value>
248+
</property>
249+
<property>
250+
<name>ldbc.snb.datagen.generator.startYear</name>
251+
<value>2010</value>
252+
</property>
253+
<property>
254+
<name>ldbc.snb.datagen.generator.numYears</name>
255+
<value>1</value>
256+
</property>
257+
<property>
258+
<name>ldbc.snb.datagen.generator.distribution.degreeDistribution</name>
259+
<value>ldbc.snb.datagen.generator.distribution.FacebookDegreeDistribution</value>
260+
</property>
261+
<property>
262+
<name>ldbc.snb.datagen.generator.activity</name>
263+
<value>false</value>
264+
</property>
265+
<property>
266+
<name>ldbc.snb.datagen.serializer.updateStreams</name>
267+
<value>false</value>
268+
</property>
269+
</scale_factor>
243270
</scale_factors>

0 commit comments

Comments
 (0)