Skip to content

Commit 6de51f3

Browse files
committed
Full graph generation with tunable clustering coefficient
1 parent 4a24b96 commit 6de51f3

File tree

3 files changed

+66
-56
lines changed

3 files changed

+66
-56
lines changed

params.ini

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,21 @@
11

2-
ldbc.snb.datagen.generator.scaleFactor:snb.interactive.1
2+
ldbc.snb.datagen.generator.numPersons:10000
3+
ldbc.snb.datagen.generator.startYear:2010
4+
ldbc.snb.datagen.generator.numYears:1
35

46
ldbc.snb.datagen.serializer.compressed:false
57

6-
ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonSerializer
8+
ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.graphalytics.CSVPersonSerializer
79

8-
ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVInvariantSerializer
10+
ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.empty.EmptyInvariantSerializer
911

10-
ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonActivitySerializer
12+
ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.empty.EmptyPersonActivitySerializer
13+
14+
ldbc.snb.datagen.generator.ClusteringKnowsGenerator.clusteringCoefficient:0.1
1115

1216
ldbc.snb.datagen.generator.numThreads:1
1317

14-
ldbc.snb.datagen.serializer.updateStreams:true
18+
ldbc.snb.datagen.serializer.updateStreams:false
19+
20+
ldbc.snb.datagen.generator.activity:false
1521

src/main/java/ldbc/snb/datagen/generator/ClusteringKnowsGenerator.java

Lines changed: 50 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ private class ClusteringInfo {
5656
public ArrayList<Long> community_core_stubs_ = new ArrayList<Long>();
5757
public ArrayList<Float> community_core_probs_ = new ArrayList<Float>();
5858
public ArrayList<Integer> core_nodes_ = new ArrayList<Integer>();
59+
public ArrayList<Integer> community_id_ = new ArrayList<Integer>();
5960
public float sumProbs = 0.0f;
6061
public int numCommunities = 0;
6162

@@ -67,17 +68,26 @@ private class ClusteringInfo {
6768
core_node_expected_external_degree_.add(0.0);
6869
is_core_.add(false);
6970
clustering_coefficient_.add(0.0);
71+
community_id_.add(0);
7072
}
7173
for( int i = 0; i < communities.size(); ++i) {
7274
community_core_stubs_.add(0L);
7375
community_core_probs_.add(0.0f);
7476
}
7577

78+
int index = 0;
7679
for( Community c: communities) {
7780
for( PersonInfo pI : c.core_) {
7881
core_nodes_.add(pI.index_);
7982
is_core_.set(pI.index_, true);
83+
community_id_.set(pI.index_,index );
8084
}
85+
86+
for( PersonInfo pI : c.periphery_) {
87+
is_core_.set(pI.index_, false);
88+
community_id_.set(pI.index_,index );
89+
}
90+
index++;
8191
}
8292

8393
numCommunities = communities.size();
@@ -97,7 +107,7 @@ private Community findSolution( ArrayList<Person> persons, int begin, int last)
97107
PersonInfo pInfo = new PersonInfo();
98108
pInfo.index_ = i;
99109
pInfo.degree_ = Knows.target_edges(p,percentages,stepIndex);
100-
pInfo.original_degree_ = (long)(p.maxNumKnows()*0.9);
110+
pInfo.original_degree_ = (long)(p.maxNumKnows());
101111
nodes.add(pInfo);
102112
}
103113

@@ -151,6 +161,12 @@ private Community checkBudget(ArrayList<Person> persons, ArrayList<PersonInfo> c
151161
return community;
152162
}
153163

164+
private void testCommunity(Community c) {
165+
for(PersonInfo pI : c.core_ ) {
166+
if(pI.degree_ < (c.core_.size() - 1)) System.out.println("Error in building communities\n");
167+
}
168+
}
169+
154170
private ArrayList<Community> generateCommunities( ArrayList<Person> persons) {
155171
ArrayList<Community> communities = new ArrayList<Community>();
156172
int last = 0;
@@ -173,6 +189,7 @@ private ArrayList<Community> generateCommunities( ArrayList<Person> persons) {
173189
}
174190
bestCommunity.id_ = communities.size();
175191
communities.add(bestCommunity);
192+
testCommunity(bestCommunity);
176193

177194
last = best + 1;
178195
begin = last;
@@ -246,21 +263,23 @@ private void estimateCCCommunity( ClusteringInfo cInfo, Community c, float prob
246263
float probTwoConnected = 0.0f;
247264
for( Integer i : cInfo.core_nodes_ ) {
248265
double degree1 = cInfo.core_node_expected_external_degree_.get(i);
249-
if(degree1 > 0) {
266+
if(degree1 >= 1) {
250267
for (Integer ii : cInfo.core_nodes_) {
251-
double degree2 = cInfo.core_node_expected_external_degree_.get(ii);
252-
if(degree2 > 0)
253-
probTwoConnected += degree1 * degree2 / (float) (2 * sumStubs * sumStubs);
268+
if(cInfo.community_id_.get(i) != cInfo.community_id_.get(i)) {
269+
double degree2 = cInfo.core_node_expected_external_degree_.get(ii);
270+
if (degree2 >= 1)
271+
probTwoConnected += degree1 * degree2 / (float) (2 * sumStubs * sumStubs);
272+
}
254273
}
255274
}
256275
}
257276

258277
// Computing clustering coefficient of periphery nodes
259278
for (PersonInfo pI: c.periphery_) {
260279
if(pI.degree_ > 1) {
261-
//cInfo.clustering_coefficient_.set(pI.index_, (double)pI.degree_*(pI.degree_-1)*prob/(pI.original_degree_*(pI.original_degree_-1)));
280+
cInfo.clustering_coefficient_.set(pI.index_, (double)pI.degree_*(pI.degree_-1)*prob/(pI.original_degree_*(pI.original_degree_-1)));
262281
//cInfo.clustering_coefficient_.set(pI.index_, (double)prob);
263-
cInfo.clustering_coefficient_.set(pI.index_, 0.0);
282+
//cInfo.clustering_coefficient_.set(pI.index_, 0.0);
264283
}
265284
}
266285

@@ -278,32 +297,11 @@ private void estimateCCCommunity( ClusteringInfo cInfo, Community c, float prob
278297
// core core triangles
279298
double internalTriangles = 0.0;
280299
double internalDegree = cInfo.core_node_expected_core_degree_.get(pI.index_);
281-
/*if(size > 2) {
282-
internalTriangles = ((size - 1) * (size - 2) * Math.pow(prob, 3));
283-
}*/
284-
if(internalDegree > 1.0) {
300+
301+
if(internalDegree >= 2.0) {
285302
internalTriangles = (internalDegree * (internalDegree - 1) * prob);
286303
}
287304
boolean enteredOffset = false;
288-
/*if( internalDegree >= 2.0 ) {
289-
internalTriangles = (internalDegree * (internalDegree - 1) * prob);
290-
} else if( internalDegree > 1.0) {
291-
double offset = internalDegree - 1.0;
292-
double p = rand.nextDouble();
293-
if(p < offset) {
294-
internalTriangles = prob;
295-
enteredOffset = true;
296-
}
297-
}*/
298-
/*long finalInternalDegree = (long)internalDegree;
299-
if( internalDegree > 1 ) {
300-
double offset = internalDegree - 1.0;
301-
double p = rand.nextDouble();
302-
if(p < offset) {
303-
finalInternalDegree++;
304-
}
305-
}*/
306-
//internalTriangles = (finalInternalDegree * (finalInternalDegree - 1) * prob);
307305

308306
// core periphery triangles
309307
double peripheryTriangles = 0;
@@ -325,21 +323,18 @@ private void estimateCCCommunity( ClusteringInfo cInfo, Community c, float prob
325323
external_triangles += cInfo.core_node_expected_external_degree_.get(pI.index_) * (cInfo.core_node_expected_external_degree_.get(pI.index_) - 1) * (1 - probSameCommunity) * probTwoConnected;
326324
}
327325

326+
328327
//double degree = finalInternalDegree;
329-
double degree = (cInfo.core_node_expected_core_degree_.get(pI.index_) /*+
328+
/*double degree = (cInfo.core_node_expected_core_degree_.get(pI.index_) +
330329
cInfo.core_node_expected_periphery_degree_.get(pI.index_) +
331-
cInfo.core_node_expected_external_degree_.get(pI.index_)*/);
330+
cInfo.core_node_expected_external_degree_.get(pI.index_));*/
332331

333-
//double degree = pI.original_degree_;
332+
double degree = pI.original_degree_;
334333

335334
//System.out.println("Internal Triangles: "+internalTriangles+" , degree: "+degree);
336-
if( degree > 1.0 ) {
337-
cInfo.clustering_coefficient_.set(pI.index_, (internalTriangles/*+peripheryTriangles+external_triangles*/)/(degree*(degree-1)));
335+
if( degree >= 2.0 ) {
336+
cInfo.clustering_coefficient_.set(pI.index_, (internalTriangles+peripheryTriangles+external_triangles)/(degree*(degree-1)));
338337
}
339-
//else if( degree > 1.0 && enteredOffset ) {
340-
// degree = 2.0;
341-
// cInfo.clustering_coefficient_.set(pI.index_, (internalTriangles+peripheryTriangles+external_triangles)/(double)(degree*(degree-1)));
342-
//}
343338
}
344339
}
345340
}
@@ -377,7 +372,7 @@ void refineCommunities( ClusteringInfo cInfo, ArrayList<Community> communities,
377372
int lookAhead = 5;
378373
int tries = 0;
379374
while( Math.abs(currentCC - targetCC) > 0.001 && tries <= lookAhead) {
380-
System.out.println(currentCC);
375+
// System.out.println(currentCC);
381376
boolean found = false;
382377
tries+=1;
383378
if( currentCC < targetCC ) {
@@ -537,6 +532,7 @@ public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float>
537532

538533
for( Community c : communities ) {
539534
c.p_ = 1.0f;//rand.nextFloat();
535+
//c.p_ = rand.nextFloat();
540536
estimateCCCommunity(cInfo, c, c.p_ );
541537
}
542538

@@ -551,12 +547,12 @@ public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float>
551547
System.out.println("Creating graph");
552548
for(Community c : communities ) {
553549
createEdgesCommunityCore(persons, c);
554-
//createEdgesCommunityPeriphery(cInfo, persons,c);
550+
createEdgesCommunityPeriphery(cInfo, persons,c);
555551
}
556-
//fillGraphWithRemainingEdges(cInfo, communities, persons);
552+
fillGraphWithRemainingEdges(cInfo, communities, persons);
557553
graph = new PersonGraph(persons);
558554
System.out.println("Computing clustering coefficient");
559-
/*double finalCC = 0;
555+
double finalCC = 0;
560556
ArrayList<Double> clusteringCoefficient = GraphUtils.ClusteringCoefficientList(graph);
561557
int i = 0;
562558
for( Person p : persons) {
@@ -567,16 +563,16 @@ public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float>
567563
i++;
568564
}
569565
finalCC /= persons.size();
570-
*/
571-
double finalCC = GraphUtils.ClusteringCoefficient(graph);
566+
//double finalCC = GraphUtils.ClusteringCoefficient(graph);
572567

573568
System.out.println("Clustering coefficient of the generated graph: "+finalCC);
574569
double delta = targetCC - finalCC;
575570
if( Math.abs( delta ) > 0.001 ) {
571+
resetStatistics();
576572
for(Person person: persons) {
577573
person.knows().clear();
578574
}
579-
fakeTargetCC += delta;
575+
fakeTargetCC += delta*0.8f;
580576
System.out.println("New Fake targetCC: "+fakeTargetCC );
581577
iterate = true;
582578
}
@@ -617,7 +613,14 @@ public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float>
617613
public void initialize( Configuration conf ) {
618614
targetCC = conf.getFloat("ldbc.snb.datagen.generator.ClusteringKnowsGenerator.clusteringCoefficient", 0.1f);
619615
System.out.println("Initialized clustering coefficient to "+targetCC);
620-
//targetCC /= 2.0f;
616+
targetCC /= 2.0f;
617+
}
618+
619+
public void resetStatistics() {
620+
numCoreCoreEdges = 0;
621+
numCorePeripheryEdges = 0;
622+
numCoreExternalEdges = 0;
623+
numMisses = 0;
621624
}
622625

623626
public void printStatistics() {

src/main/java/ldbc/snb/datagen/generator/LDBCDatagen.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ public int runGenerateJob(Configuration conf) throws Exception {
9898
knowsGenerator.run(hadoopPrefix+"/persons",hadoopPrefix+"/universityEdges");
9999
long endUniversity = System.currentTimeMillis();
100100

101-
/*
101+
102102
printProgress("Creating main interest correlated edges");
103103
long startInterest= System.currentTimeMillis();
104104

@@ -113,6 +113,7 @@ public int runGenerateJob(Configuration conf) throws Exception {
113113
long endInterest = System.currentTimeMillis();
114114

115115

116+
116117
printProgress("Creating random correlated edges");
117118
long startRandom= System.currentTimeMillis();
118119

@@ -125,16 +126,16 @@ public int runGenerateJob(Configuration conf) throws Exception {
125126

126127
knowsGenerator.run(hadoopPrefix+"/persons",hadoopPrefix+"/randomEdges");
127128
long endRandom= System.currentTimeMillis();
128-
*/
129+
129130

130131

131132

132133
fs.delete(new Path(DatagenParams.hadoopDir + "/persons"), true);
133134
printProgress("Merging the different edge files");
134135
ArrayList<String> edgeFileNames = new ArrayList<String>();
135136
edgeFileNames.add(hadoopPrefix+"/universityEdges");
136-
//edgeFileNames.add(hadoopPrefix+"/interestEdges");
137-
//edgeFileNames.add(hadoopPrefix+"/randomEdges");
137+
edgeFileNames.add(hadoopPrefix+"/interestEdges");
138+
edgeFileNames.add(hadoopPrefix+"/randomEdges");
138139
long startMerge = System.currentTimeMillis();
139140
HadoopMergeFriendshipFiles merger = new HadoopMergeFriendshipFiles(conf,"ldbc.snb.datagen.hadoop.RandomKeySetter");
140141
merger.run(hadoopPrefix+"/mergedPersons", edgeFileNames);

0 commit comments

Comments
 (0)