Skip to content

Commit 4a24b96

Browse files
committed
.
1 parent ec2b39b commit 4a24b96

File tree

3 files changed

+85
-38
lines changed

3 files changed

+85
-38
lines changed

src/main/java/ldbc/snb/datagen/generator/ClusteringKnowsGenerator.java

Lines changed: 59 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public class ClusteringKnowsGenerator implements KnowsGenerator {
2121
private int numCoreCoreEdges = 0;
2222
private int numCorePeripheryEdges = 0;
2323
private int numCoreExternalEdges = 0;
24-
private float step_ = 0.10f;
24+
// private float step_ = 0.10f;
2525
private float min_community_prob_ = 0.0f;
2626

2727
private class PersonInfo {
@@ -174,18 +174,6 @@ private ArrayList<Community> generateCommunities( ArrayList<Person> persons) {
174174
bestCommunity.id_ = communities.size();
175175
communities.add(bestCommunity);
176176

177-
/*System.out.print("Core: ");
178-
for( PersonInfo pI : bestCommunity.core_ ) {
179-
System.out.print(pI.degree_+" ");
180-
}
181-
System.out.println();
182-
System.out.print("Periphery: ");
183-
for( PersonInfo pI : bestCommunity.periphery_ ) {
184-
System.out.print(pI.degree_+" ");
185-
}
186-
System.out.println();
187-
*/
188-
189177
last = best + 1;
190178
begin = last;
191179
}
@@ -270,8 +258,9 @@ private void estimateCCCommunity( ClusteringInfo cInfo, Community c, float prob
270258
// Computing clustering coefficient of periphery nodes
271259
for (PersonInfo pI: c.periphery_) {
272260
if(pI.degree_ > 1) {
273-
cInfo.clustering_coefficient_.set(pI.index_, (double)pI.degree_*(pI.degree_-1)*prob/(pI.original_degree_*(pI.original_degree_-1)));
261+
//cInfo.clustering_coefficient_.set(pI.index_, (double)pI.degree_*(pI.degree_-1)*prob/(pI.original_degree_*(pI.original_degree_-1)));
274262
//cInfo.clustering_coefficient_.set(pI.index_, (double)prob);
263+
cInfo.clustering_coefficient_.set(pI.index_, 0.0);
275264
}
276265
}
277266

@@ -292,8 +281,11 @@ private void estimateCCCommunity( ClusteringInfo cInfo, Community c, float prob
292281
/*if(size > 2) {
293282
internalTriangles = ((size - 1) * (size - 2) * Math.pow(prob, 3));
294283
}*/
284+
if(internalDegree > 1.0) {
285+
internalTriangles = (internalDegree * (internalDegree - 1) * prob);
286+
}
295287
boolean enteredOffset = false;
296-
if( internalDegree >= 2.0 ) {
288+
/*if( internalDegree >= 2.0 ) {
297289
internalTriangles = (internalDegree * (internalDegree - 1) * prob);
298290
} else if( internalDegree > 1.0) {
299291
double offset = internalDegree - 1.0;
@@ -302,7 +294,17 @@ private void estimateCCCommunity( ClusteringInfo cInfo, Community c, float prob
302294
internalTriangles = prob;
303295
enteredOffset = true;
304296
}
305-
}
297+
}*/
298+
/*long finalInternalDegree = (long)internalDegree;
299+
if( internalDegree > 1 ) {
300+
double offset = internalDegree - 1.0;
301+
double p = rand.nextDouble();
302+
if(p < offset) {
303+
finalInternalDegree++;
304+
}
305+
}*/
306+
//internalTriangles = (finalInternalDegree * (finalInternalDegree - 1) * prob);
307+
306308
// core periphery triangles
307309
double peripheryTriangles = 0;
308310
long remainingDegree = pI.degree_;
@@ -323,15 +325,16 @@ private void estimateCCCommunity( ClusteringInfo cInfo, Community c, float prob
323325
external_triangles += cInfo.core_node_expected_external_degree_.get(pI.index_) * (cInfo.core_node_expected_external_degree_.get(pI.index_) - 1) * (1 - probSameCommunity) * probTwoConnected;
324326
}
325327

326-
/*double degree = (cInfo.core_node_expected_core_degree_.get(pI.index_) +
328+
//double degree = finalInternalDegree;
329+
double degree = (cInfo.core_node_expected_core_degree_.get(pI.index_) /*+
327330
cInfo.core_node_expected_periphery_degree_.get(pI.index_) +
328-
cInfo.core_node_expected_external_degree_.get(pI.index_));*/
331+
cInfo.core_node_expected_external_degree_.get(pI.index_)*/);
329332

330-
double degree = pI.original_degree_;
333+
//double degree = pI.original_degree_;
331334

332335
//System.out.println("Internal Triangles: "+internalTriangles+" , degree: "+degree);
333336
if( degree > 1.0 ) {
334-
cInfo.clustering_coefficient_.set(pI.index_, (internalTriangles+peripheryTriangles/*+external_triangles*/)/(degree*(degree-1)));
337+
cInfo.clustering_coefficient_.set(pI.index_, (internalTriangles/*+peripheryTriangles+external_triangles*/)/(degree*(degree-1)));
335338
}
336339
//else if( degree > 1.0 && enteredOffset ) {
337340
// degree = 2.0;
@@ -374,6 +377,7 @@ void refineCommunities( ClusteringInfo cInfo, ArrayList<Community> communities,
374377
int lookAhead = 5;
375378
int tries = 0;
376379
while( Math.abs(currentCC - targetCC) > 0.001 && tries <= lookAhead) {
380+
System.out.println(currentCC);
377381
boolean found = false;
378382
tries+=1;
379383
if( currentCC < targetCC ) {
@@ -383,13 +387,16 @@ void refineCommunities( ClusteringInfo cInfo, ArrayList<Community> communities,
383387
}
384388
if( found ) {
385389
currentCC = clusteringCoefficient(communities, cInfo);
386-
//System.out.println(currentCC);
387390
tries = 0;
388391
}
389392
}
390393
System.out.println("Clustering Coefficient after refinement: " + currentCC);
391394
}
392395

396+
float step(int n) {
397+
return 1.0f/(float)n;
398+
}
399+
393400
boolean improveCC(ClusteringInfo cInfo, ArrayList<Community> communities) {
394401
ArrayList<Community> filtered = new ArrayList<Community>();
395402
for(Community c : communities ) {
@@ -398,7 +405,9 @@ boolean improveCC(ClusteringInfo cInfo, ArrayList<Community> communities) {
398405
if(filtered.size() == 0) return false;
399406
int index = rand.nextInt(filtered.size());
400407
Community c = filtered.get(index);
401-
c.p_ = c.p_ + step_ > 1.0f ? 1.0f : c.p_ + step_;
408+
float step = step(c.core_.size());
409+
c.p_ = c.p_ + step > 1.0f ? 1.0f : c.p_ + step;
410+
//c.p_ = c.p_ + step_ > 1.0f ? 1.0f : c.p_ + step_;
402411
cInfo.sumProbs+=0.01;
403412
estimateCCCommunity(cInfo, c, c.p_);
404413
return true;
@@ -412,7 +421,9 @@ boolean worsenCC(ClusteringInfo cInfo, ArrayList<Community> communities) {
412421
if(filtered.size() == 0) return false;
413422
int index = rand.nextInt(filtered.size());
414423
Community c = filtered.get(index);
415-
c.p_ = c.p_ - step_ < min_community_prob_ ? min_community_prob_ : c.p_ - step_ ;
424+
float step = step(c.core_.size());
425+
c.p_ = c.p_ - step < min_community_prob_ ? min_community_prob_ : c.p_ - step ;
426+
//c.p_ = c.p_ - step_ < min_community_prob_ ? min_community_prob_ : c.p_ - step_ ;
416427
cInfo.sumProbs-=0.01;
417428
estimateCCCommunity(cInfo, c, c.p_ );
418429
return true;
@@ -525,40 +536,51 @@ public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float>
525536
System.out.println("maxCC: "+maxCC);
526537

527538
for( Community c : communities ) {
528-
estimateCCCommunity(cInfo, c, rand.nextFloat());
539+
c.p_ = 1.0f;//rand.nextFloat();
540+
estimateCCCommunity(cInfo, c, c.p_ );
529541
}
530542

531543
PersonGraph graph;
532544
boolean iterate;
533545
float fakeTargetCC = targetCC;
546+
int numIterations = 0;
534547
do {
535548
System.out.println("Starting refinement iteration");
536549
iterate = false;
537550
refineCommunities(cInfo, communities, fakeTargetCC);
538551
System.out.println("Creating graph");
539552
for(Community c : communities ) {
540553
createEdgesCommunityCore(persons, c);
541-
createEdgesCommunityPeriphery(cInfo, persons,c);
554+
//createEdgesCommunityPeriphery(cInfo, persons,c);
542555
}
543-
fillGraphWithRemainingEdges(cInfo, communities, persons);
556+
//fillGraphWithRemainingEdges(cInfo, communities, persons);
544557
graph = new PersonGraph(persons);
545558
System.out.println("Computing clustering coefficient");
546-
double clusteringCoefficient = GraphUtils.ClusteringCoefficient(graph);
547-
System.out.println("Clustering coefficient of the generated graph: "+clusteringCoefficient);
548-
double delta = targetCC - clusteringCoefficient;
559+
/*double finalCC = 0;
560+
ArrayList<Double> clusteringCoefficient = GraphUtils.ClusteringCoefficientList(graph);
561+
int i = 0;
562+
for( Person p : persons) {
563+
long degree = graph.neighbors(p.accountId()).size();
564+
long originalDegree = p.maxNumKnows();
565+
if(originalDegree > 1)
566+
finalCC += clusteringCoefficient.get(i) * degree*(degree - 1) / (originalDegree*(originalDegree-1));
567+
i++;
568+
}
569+
finalCC /= persons.size();
570+
*/
571+
double finalCC = GraphUtils.ClusteringCoefficient(graph);
572+
573+
System.out.println("Clustering coefficient of the generated graph: "+finalCC);
574+
double delta = targetCC - finalCC;
549575
if( Math.abs( delta ) > 0.001 ) {
550576
for(Person person: persons) {
551577
person.knows().clear();
552578
}
553-
if( delta < 0.0) {
554-
fakeTargetCC /= 2.0f;
555-
} else {
556-
fakeTargetCC *= 1.5f;
557-
}
558-
579+
fakeTargetCC += delta;
559580
System.out.println("New Fake targetCC: "+fakeTargetCC );
560581
iterate = true;
561582
}
583+
numIterations++;
562584
}while( iterate );
563585

564586
int countMore = 0;
@@ -583,6 +605,7 @@ public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float>
583605
++index;
584606
}
585607

608+
System.out.println("Number of iterations to converge: "+numIterations);
586609
System.out.println("Number of persons with more degree than expected: "+countMore);
587610
System.out.println("Sum of excess degree: "+sumMore);
588611
System.out.println("Number of persons with less degree than expected: "+countLess);
@@ -594,7 +617,7 @@ public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float>
594617
public void initialize( Configuration conf ) {
595618
targetCC = conf.getFloat("ldbc.snb.datagen.generator.ClusteringKnowsGenerator.clusteringCoefficient", 0.1f);
596619
System.out.println("Initialized clustering coefficient to "+targetCC);
597-
targetCC /= 2.0f;
620+
//targetCC /= 2.0f;
598621
}
599622

600623
public void printStatistics() {

src/main/java/ldbc/snb/datagen/generator/LDBCDatagen.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ public int runGenerateJob(Configuration conf) throws Exception {
9898
knowsGenerator.run(hadoopPrefix+"/persons",hadoopPrefix+"/universityEdges");
9999
long endUniversity = System.currentTimeMillis();
100100

101+
/*
101102
printProgress("Creating main interest correlated edges");
102103
long startInterest= System.currentTimeMillis();
103104
@@ -112,7 +113,7 @@ public int runGenerateJob(Configuration conf) throws Exception {
112113
long endInterest = System.currentTimeMillis();
113114
114115
115-
/*printProgress("Creating random correlated edges");
116+
printProgress("Creating random correlated edges");
116117
long startRandom= System.currentTimeMillis();
117118
118119
knowsGenerator = new HadoopKnowsGenerator( conf,
@@ -132,7 +133,7 @@ public int runGenerateJob(Configuration conf) throws Exception {
132133
printProgress("Merging the different edge files");
133134
ArrayList<String> edgeFileNames = new ArrayList<String>();
134135
edgeFileNames.add(hadoopPrefix+"/universityEdges");
135-
edgeFileNames.add(hadoopPrefix+"/interestEdges");
136+
//edgeFileNames.add(hadoopPrefix+"/interestEdges");
136137
//edgeFileNames.add(hadoopPrefix+"/randomEdges");
137138
long startMerge = System.currentTimeMillis();
138139
HadoopMergeFriendshipFiles merger = new HadoopMergeFriendshipFiles(conf,"ldbc.snb.datagen.hadoop.RandomKeySetter");

src/main/java/ldbc/snb/datagen/generator/tools/GraphUtils.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,27 @@ public static double ClusteringCoefficient( PersonGraph graph ) {
2929
}
3030
return CC / graph.persons().size();
3131
}
32+
33+
public static ArrayList<Double> ClusteringCoefficientList( PersonGraph graph ) {
34+
ArrayList<Double> CC = new ArrayList<Double>();
35+
int numEdges = 0;
36+
for( Long l : graph.persons()) {
37+
int triangles = 0;
38+
Set<Long> neighbors = graph.neighbors(l);
39+
for( Long n : neighbors){
40+
Set<Long> neighbors2 = graph.neighbors(n);
41+
Set<Long> aux = new HashSet<Long>(neighbors);
42+
aux.retainAll(neighbors2);
43+
triangles+=aux.size();
44+
numEdges++;
45+
}
46+
int degree = neighbors.size();
47+
double localCC = 0;
48+
if(degree > 1)
49+
localCC=triangles / (double)(degree*(degree-1));
50+
CC.add(localCC);
51+
52+
}
53+
return CC;
54+
}
3255
}

0 commit comments

Comments
 (0)