Skip to content

Commit 7c8fa3e

Browse files
committed
Merge branch 'clustering_coefficient'
2 parents 5588e07 + 6d84111 commit 7c8fa3e

File tree

13 files changed

+764
-214
lines changed

13 files changed

+764
-214
lines changed

params.ini

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,11 @@
11

2-
ldbc.snb.datagen.generator.scaleFactor:graphalytics.1
32

4-
ldbc.snb.datagen.serializer.compressed:false
5-
6-
#ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonSerializer
7-
#
8-
#ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVInvariantSerializer
9-
#
10-
#ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonActivitySerializer
11-
12-
ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.graphalytics.CSVPersonSerializer
3+
ldbc.snb.datagen.generator.scaleFactor:snb.interactive.1
134

14-
ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.empty.EmptyInvariantSerializer
15-
16-
ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.empty.EmptyPersonActivitySerializer
17-
18-
ldbc.snb.datagen.generator.numThreads:1
5+
ldbc.snb.datagen.serializer.compressed:false
196

7+
ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonSerializer
208

9+
ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVInvariantSerializer
2110

11+
ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonActivitySerializer

src/main/java/ldbc/snb/datagen/generator/ClusteringKnowsGenerator.java

Lines changed: 605 additions & 154 deletions
Large diffs are not rendered by default.

src/main/java/ldbc/snb/datagen/generator/DateGenerator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ public Long randomThirtyDaysSpan(Random random, Long from) {
170170
public long randomKnowsCreationDate(Random random, Person personA, Person personB) {
171171
long fromDate = Math.max(personA.creationDate(), personB.creationDate());
172172
long randomSpanMilis = (long) (random.nextDouble() * (THIRTY_DAYS));
173-
return (fromDate + randomSpanMilis);
173+
return Math.min(fromDate + randomSpanMilis, getEndDateTime());
174174
}
175175

176176
public long numberOfMonths(Person user) {

src/main/java/ldbc/snb/datagen/generator/DistanceKnowsGenerator.java

Lines changed: 10 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import ldbc.snb.datagen.objects.Knows;
55
import ldbc.snb.datagen.objects.Person;
66
import ldbc.snb.datagen.util.RandomGeneratorFarm;
7+
import org.apache.hadoop.conf.Configuration;
78

89
import java.util.ArrayList;
910

@@ -22,17 +23,21 @@ public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float>
2223
randomFarm.resetRandomGenerators(seed);
2324
for( int i = 0; i < persons.size(); ++i ) {
2425
Person p = persons.get(i);
25-
for( int j = i+1; ( target_edges(p, percentages, step_index) > p.knows().size() ) && ( j < persons.size() ); ++j ) {
26+
for( int j = i+1; ( Knows.target_edges(p, percentages, step_index) > p.knows().size() ) && ( j < persons.size() ); ++j ) {
2627
if( know(p, persons.get(j), j - i, percentages, step_index)) {
27-
createKnow(p, persons.get(j));
28+
Knows.createKnow(randomFarm.get(RandomGeneratorFarm.Aspect.DATE), p, persons.get(j));
2829
}
2930
}
3031
}
3132
}
3233

34+
public void initialize( Configuration conf ) {
35+
36+
}
37+
3338
boolean know( Person personA, Person personB, int dist, ArrayList<Float> percentages, int step_index ) {
34-
if( personA.knows().size() >= target_edges( personA, percentages, step_index) ||
35-
personB.knows().size() >= target_edges( personB, percentages, step_index) ) return false;
39+
if( personA.knows().size() >= Knows.target_edges( personA, percentages, step_index) ||
40+
personB.knows().size() >= Knows.target_edges( personB, percentages, step_index) ) return false;
3641
double randProb = randomFarm.get(RandomGeneratorFarm.Aspect.UNIFORM).nextDouble();
3742
double prob = Math.pow(DatagenParams.baseProbCorrelated, dist);
3843
if ((randProb < prob) || (randProb < DatagenParams.limitProCorrelated)) {
@@ -41,27 +46,6 @@ boolean know( Person personA, Person personB, int dist, ArrayList<Float> percent
4146
return false;
4247
}
4348

44-
void createKnow( Person personA, Person personB ) {
45-
long creationDate = Dictionaries.dates.randomKnowsCreationDate(
46-
randomFarm.get(RandomGeneratorFarm.Aspect.DATE),
47-
personA,
48-
personB);
49-
creationDate = creationDate - personA.creationDate() >= DatagenParams.deltaTime ? creationDate : creationDate + (DatagenParams.deltaTime - (creationDate - personA.creationDate()));
50-
creationDate = creationDate - personB.creationDate() >= DatagenParams.deltaTime ? creationDate : creationDate + (DatagenParams.deltaTime - (creationDate - personB.creationDate()));
51-
if( creationDate <= Dictionaries.dates.getEndDateTime() ) {
52-
float similarity = Person.Similarity(personA,personB);
53-
personB.knows().add(new Knows(personA, creationDate, similarity));
54-
personA.knows().add(new Knows(personB, creationDate, similarity));
55-
}
56-
}
5749

58-
long target_edges(Person person, ArrayList<Float> percentages, int step_index ) {
59-
int generated_edges = 0;
60-
for (int i = 0; i < step_index; ++i) {
61-
generated_edges += Math.ceil(percentages.get(i)*person.maxNumKnows());
62-
}
63-
generated_edges = Math.min(generated_edges, (int)person.maxNumKnows());
64-
int to_generate = Math.min( (int)person.maxNumKnows() - generated_edges, (int)Math.ceil(percentages.get(step_index)*person.maxNumKnows()));
65-
return to_generate;
66-
}
50+
6751
}

src/main/java/ldbc/snb/datagen/generator/KnowsGenerator.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package ldbc.snb.datagen.generator;
22

33
import ldbc.snb.datagen.objects.Person;
4+
import org.apache.hadoop.conf.Configuration;
45

56
import java.util.ArrayList;
67

@@ -9,4 +10,6 @@
910
*/
1011
public interface KnowsGenerator {
1112
public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float> percentages, int step_index );
13+
14+
public void initialize( Configuration conf );
1215
}

src/main/java/ldbc/snb/datagen/generator/LDBCDatagen.java

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ public int runGenerateJob(Configuration conf) throws Exception {
7878
percentages.add(0.45f);
7979
percentages.add(0.1f);
8080

81+
8182
long start = System.currentTimeMillis();
8283
printProgress("Starting: Person generation");
8384
long startPerson = System.currentTimeMillis();
@@ -87,23 +88,48 @@ public int runGenerateJob(Configuration conf) throws Exception {
8788

8889
printProgress("Creating university location correlated edges");
8990
long startUniversity = System.currentTimeMillis();
90-
HadoopKnowsGenerator knowsGenerator = new HadoopKnowsGenerator(conf,"ldbc.snb.datagen.hadoop.UniversityKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", percentages, 0);
91+
HadoopKnowsGenerator knowsGenerator = new HadoopKnowsGenerator(conf,
92+
"ldbc.snb.datagen.hadoop.UniversityKeySetter",
93+
"ldbc.snb.datagen.hadoop.RandomKeySetter",
94+
percentages,
95+
0,
96+
conf.get("ldbc.snb.datagen.generator.knowsGenerator"));
97+
9198
knowsGenerator.run(hadoopPrefix+"/persons",hadoopPrefix+"/universityEdges");
9299
long endUniversity = System.currentTimeMillis();
93100

101+
94102
printProgress("Creating main interest correlated edges");
95103
long startInterest= System.currentTimeMillis();
96-
knowsGenerator = new HadoopKnowsGenerator(conf,"ldbc.snb.datagen.hadoop.InterestKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", percentages, 1);
104+
105+
knowsGenerator = new HadoopKnowsGenerator( conf,
106+
"ldbc.snb.datagen.hadoop.InterestKeySetter",
107+
"ldbc.snb.datagen.hadoop.RandomKeySetter",
108+
percentages,
109+
1,
110+
conf.get("ldbc.snb.datagen.generator.knowsGenerator"));
111+
97112
knowsGenerator.run(hadoopPrefix+"/persons",hadoopPrefix+"/interestEdges");
98113
long endInterest = System.currentTimeMillis();
99114

115+
116+
100117
printProgress("Creating random correlated edges");
101118
long startRandom= System.currentTimeMillis();
102-
knowsGenerator = new HadoopKnowsGenerator(conf,"ldbc.snb.datagen.hadoop.RandomKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", percentages, 2);
119+
120+
knowsGenerator = new HadoopKnowsGenerator( conf,
121+
"ldbc.snb.datagen.hadoop.RandomKeySetter",
122+
"ldbc.snb.datagen.hadoop.RandomKeySetter",
123+
percentages,
124+
2,
125+
"ldbc.snb.datagen.generator.RandomKnowsGenerator");
126+
103127
knowsGenerator.run(hadoopPrefix+"/persons",hadoopPrefix+"/randomEdges");
104128
long endRandom= System.currentTimeMillis();
105129

106130

131+
132+
107133
fs.delete(new Path(DatagenParams.hadoopDir + "/persons"), true);
108134
printProgress("Merging the different edge files");
109135
ArrayList<String> edgeFileNames = new ArrayList<String>();
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
package ldbc.snb.datagen.generator;
2+
3+
import ldbc.snb.datagen.objects.Knows;
4+
import ldbc.snb.datagen.objects.Person;
5+
import org.apache.hadoop.conf.Configuration;
6+
7+
import java.util.ArrayList;
8+
import java.util.Collections;
9+
import java.util.Comparator;
10+
import java.util.Random;
11+
12+
13+
/**
14+
* Created by aprat on 11/15/14.
15+
*/
16+
public class RandomKnowsGenerator implements KnowsGenerator {
17+
18+
Random rand;
19+
20+
21+
public RandomKnowsGenerator() {
22+
rand = new Random();
23+
}
24+
25+
26+
27+
public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float> percentages, int step_index ) {
28+
29+
rand.setSeed(seed);
30+
ArrayList<Integer> stubs = new ArrayList<Integer>();
31+
int index = 0;
32+
for(Person p : persons ) {
33+
long degree = Knows.target_edges(p, percentages, step_index);
34+
for( int i =0; i < degree; ++i ) {
35+
stubs.add(index);
36+
}
37+
++index;
38+
}
39+
Collections.shuffle(stubs,rand);
40+
while(!stubs.isEmpty()) {
41+
int first = rand.nextInt(stubs.size());
42+
int first_index = stubs.get(first);
43+
stubs.remove(first);
44+
if(!stubs.isEmpty()) {
45+
int second = rand.nextInt(stubs.size());
46+
int second_index = stubs.get(second);
47+
stubs.remove(second);
48+
if(first_index != second_index ) {
49+
Person p1 = persons.get(first_index);
50+
Person p2 = persons.get(second_index);
51+
Knows.createKnow(rand, p1, p2);
52+
}
53+
}
54+
}
55+
}
56+
57+
public void initialize( Configuration conf ) {
58+
59+
}
60+
}

src/main/java/ldbc/snb/datagen/generator/distribution/MOEZipfDistribution.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public class MOEZipfDistribution implements DegreeDistribution {
2121
public void initialize(Configuration conf) {
2222
ALPHA_ = conf.getDouble("ldbc.snb.datagen.generator.distribution.MOEZipfDistribution.alpha",ALPHA_);
2323
DELTA_ = conf.getDouble("ldbc.snb.datagen.generator.distribution.MOEZipfDistribution.delta",DELTA_);
24-
zipf_ = new org.apache.commons.math3.distribution.ZipfDistribution(10000, ALPHA_);
24+
zipf_ = new org.apache.commons.math3.distribution.ZipfDistribution(5000, ALPHA_);
2525
random_ = new Random();
2626
}
2727

src/main/java/ldbc/snb/datagen/generator/tools/GraphUtils.java

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ public class GraphUtils {
1212

1313
public static double ClusteringCoefficient( PersonGraph graph ) {
1414
double CC = 0.0;
15+
int numEdges = 0;
1516
for( Long l : graph.persons()) {
1617
int triangles = 0;
1718
Set<Long> neighbors = graph.neighbors(l);
@@ -20,11 +21,35 @@ public static double ClusteringCoefficient( PersonGraph graph ) {
2021
Set<Long> aux = new HashSet<Long>(neighbors);
2122
aux.retainAll(neighbors2);
2223
triangles+=aux.size();
24+
numEdges++;
2325
}
2426
int degree = neighbors.size();
25-
if(triangles > 0)
27+
if(degree > 1)
2628
CC+=triangles / (double)(degree*(degree-1));
2729
}
2830
return CC / graph.persons().size();
2931
}
32+
33+
public static ArrayList<Double> ClusteringCoefficientList( PersonGraph graph ) {
34+
ArrayList<Double> CC = new ArrayList<Double>();
35+
int numEdges = 0;
36+
for( Long l : graph.persons()) {
37+
int triangles = 0;
38+
Set<Long> neighbors = graph.neighbors(l);
39+
for( Long n : neighbors){
40+
Set<Long> neighbors2 = graph.neighbors(n);
41+
Set<Long> aux = new HashSet<Long>(neighbors);
42+
aux.retainAll(neighbors2);
43+
triangles+=aux.size();
44+
numEdges++;
45+
}
46+
int degree = neighbors.size();
47+
double localCC = 0;
48+
if(degree > 1)
49+
localCC=triangles / (double)(degree*(degree-1));
50+
CC.add(localCC);
51+
52+
}
53+
return CC;
54+
}
3055
}

src/main/java/ldbc/snb/datagen/generator/tools/PersonGraph.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,11 @@
1414
public class PersonGraph {
1515
private HashMap<Long,HashSet<Long>> adjacencies_;
1616
public PersonGraph(ArrayList<Person> persons) {
17-
HashSet<Long> exists = new HashSet<Long>();
18-
for( Person p : persons) {
19-
exists.add(p.accountId());
20-
}
2117
adjacencies_ = new HashMap<Long,HashSet<Long>>();
2218
for( Person p : persons) {
2319
HashSet<Long> neighbors = new HashSet<Long>();
2420
for (Knows k: p.knows()) {
25-
if(exists.contains(k.to().accountId())) {
26-
neighbors.add(k.to().accountId());
27-
}
21+
neighbors.add(k.to().accountId());
2822
}
2923
adjacencies_.put(p.accountId(),neighbors);
3024
}

0 commit comments

Comments
 (0)