1
1
package ldbc .snb .datagen .generator ;
2
2
3
- import ldbc .snb .datagen .dictionary .Dictionaries ;
4
- import ldbc .snb .datagen .generator .tools .GraphUtils ;
5
- import ldbc .snb .datagen .generator .tools .MinHash ;
6
- import ldbc .snb .datagen .generator .tools .PersonGraph ;
7
3
import ldbc .snb .datagen .objects .Knows ;
8
4
import ldbc .snb .datagen .objects .Person ;
9
- import ldbc .snb .datagen .util .RandomGeneratorFarm ;
10
5
import org .apache .hadoop .conf .Configuration ;
11
6
12
7
import java .util .*;
@@ -20,6 +15,10 @@ public class ClusteringKnowsGenerator implements KnowsGenerator {
20
15
private ArrayList <Float > percentages = null ;
21
16
private int stepIndex = 0 ;
22
17
private float targetCC = 0.0f ;
18
+ private int numMisses = 0 ;
19
+ private int numCoreCoreEdges = 0 ;
20
+ private int numCorePeripheryEdges = 0 ;
21
+ private int numCoreExternalEdges = 0 ;
23
22
24
23
private class PersonInfo {
25
24
public int index_ ;
@@ -44,10 +43,11 @@ public int compare(PersonInfo a, PersonInfo b) {
44
43
}
45
44
46
45
private class ClusteringInfo {
47
- public ArrayList <Long > core_node_expected_core_degree_ = new ArrayList <Long >();
48
- public ArrayList <Long > core_node_excedence_degree_ = new ArrayList <Long >();
49
- public ArrayList <Long > core_node_expected_periphery_degree_ = new ArrayList <Long >();
50
- public ArrayList <Long > core_node_expected_external_degree_ = new ArrayList <Long >();
46
+ public ArrayList <Boolean > is_core_ = new ArrayList <Boolean >();
47
+ public ArrayList <Double > core_node_expected_core_degree_ = new ArrayList <Double >();
48
+ public ArrayList <Double > core_node_excedence_degree_ = new ArrayList <Double >();
49
+ public ArrayList <Double > core_node_expected_periphery_degree_ = new ArrayList <Double >();
50
+ public ArrayList <Double > core_node_expected_external_degree_ = new ArrayList <Double >();
51
51
public ArrayList <Float > clustering_coefficient_ = new ArrayList <Float >();
52
52
public ArrayList <Long > community_core_stubs_ = new ArrayList <Long >();
53
53
public ArrayList <Float > community_core_probs_ = new ArrayList <Float >();
@@ -57,10 +57,11 @@ private class ClusteringInfo {
57
57
58
58
ClusteringInfo ( int size , ArrayList <Community > communities ) {
59
59
for ( int i = 0 ; i < size ; ++i ) {
60
- core_node_expected_core_degree_ .add (0L );
61
- core_node_excedence_degree_ .add (0L );
62
- core_node_expected_periphery_degree_ .add (0L );
63
- core_node_expected_external_degree_ .add (0L );
60
+ core_node_expected_core_degree_ .add (0.0 );
61
+ core_node_excedence_degree_ .add (0.0 );
62
+ core_node_expected_periphery_degree_ .add (0.0 );
63
+ core_node_expected_external_degree_ .add (0.0 );
64
+ is_core_ .add (false );
64
65
clustering_coefficient_ .add (0.0f );
65
66
}
66
67
for ( int i = 0 ; i < communities .size (); ++i ) {
@@ -71,6 +72,7 @@ private class ClusteringInfo {
71
72
for ( Community c : communities ) {
72
73
for ( PersonInfo pI : c .core_ ) {
73
74
core_nodes_ .add (pI .index_ );
75
+ is_core_ .set (pI .index_ , true );
74
76
}
75
77
}
76
78
@@ -182,16 +184,15 @@ private void computeCommunityInfo(ClusteringInfo cInfo, Community c, float prob)
182
184
183
185
// Initializing cInfo with expected degrees
184
186
for (PersonInfo pI : c .core_ ) {
185
- cInfo .core_node_expected_core_degree_ .set (pI .index_ , ( long ) ((c .core_ .size () - 1 ) * prob ));
187
+ cInfo .core_node_expected_core_degree_ .set (pI .index_ , new Double ((c .core_ .size () - 1 ) * prob ));
186
188
cInfo .core_node_excedence_degree_ .set (pI .index_ , pI .degree_ - cInfo .core_node_expected_core_degree_ .get (pI .index_ ));
187
- cInfo .core_node_expected_periphery_degree_ .set (pI .index_ , 0L );
189
+ cInfo .core_node_expected_periphery_degree_ .set (pI .index_ , 0.0 );
188
190
}
189
191
190
192
long remainingStubs = 0 ;
191
- //System.out.println(c.core_.size()+" "+c.periphery_.size());
192
193
for (PersonInfo pI : c .core_ ) {
193
- long pDegree = 0 ;
194
- long maxDegree = pI . degree_ - cInfo .core_node_expected_core_degree_ .get (pI .index_ );
194
+ double pDegree = 0 ;
195
+ double maxDegree = ( cInfo .core_node_excedence_degree_ .get (pI .index_ ) );
195
196
for (Long l : peripheryBudget ) {
196
197
if (l != 0 && pDegree < maxDegree ) {
197
198
pDegree ++;
@@ -201,9 +202,7 @@ private void computeCommunityInfo(ClusteringInfo cInfo, Community c, float prob)
201
202
202
203
cInfo .core_node_expected_periphery_degree_ .set (pI .index_ , pDegree );
203
204
204
- // System.out.println(pI.degree_+" "+cInfo.core_node_expected_core_degree_.get(pI.index_)+" "+cInfo.core_node_expected_periphery_degree_.get(pI.index_));
205
-
206
- long deg = ((pI .degree_ - cInfo .core_node_expected_core_degree_ .get (pI .index_ ) - cInfo .core_node_expected_periphery_degree_ .get (pI .index_ )));
205
+ double deg = ((pI .degree_ - cInfo .core_node_expected_core_degree_ .get (pI .index_ ) - cInfo .core_node_expected_periphery_degree_ .get (pI .index_ )));
207
206
cInfo .core_node_expected_external_degree_ .set (pI .index_ , deg );
208
207
remainingStubs += deg ;
209
208
}
@@ -233,10 +232,10 @@ private void estimateCCCommunity( ClusteringInfo cInfo, Community c, float prob
233
232
234
233
float probTwoConnected = 0.0f ;
235
234
for ( Integer i : cInfo .core_nodes_ ) {
236
- long degree1 = cInfo .core_node_expected_external_degree_ .get (i );
235
+ double degree1 = cInfo .core_node_expected_external_degree_ .get (i );
237
236
if (degree1 > 0 ) {
238
237
for (Integer ii : cInfo .core_nodes_ ) {
239
- long degree2 = cInfo .core_node_expected_external_degree_ .get (ii );
238
+ double degree2 = cInfo .core_node_expected_external_degree_ .get (ii );
240
239
if (degree2 > 0 )
241
240
probTwoConnected += degree1 * degree2 / (float ) (2 * sumStubs * sumStubs );
242
241
}
@@ -284,9 +283,6 @@ private void estimateCCCommunity( ClusteringInfo cInfo, Community c, float prob
284
283
external_triangles += cInfo .core_node_expected_external_degree_ .get (pI .index_ ) * (cInfo .core_node_expected_external_degree_ .get (pI .index_ ) - 1 ) * probTriangleSameCommunity ;
285
284
external_triangles += cInfo .core_node_expected_external_degree_ .get (pI .index_ ) * (cInfo .core_node_expected_external_degree_ .get (pI .index_ ) - 1 ) * (1 - probSameCommunity ) * probTwoConnected ;
286
285
}
287
- /*long degree = cInfo.core_node_expected_core_degree_.get(pI.index_) +
288
- cInfo.core_node_expected_periphery_degree_.get(pI.index_) +
289
- cInfo.core_node_expected_external_degree_.get(pI.index_);*/
290
286
long degree = pI .original_degree_ ;
291
287
292
288
if ( degree > 1 ) {
@@ -360,7 +356,10 @@ void createEdgesCommunityCore(ArrayList<Person> persons, Community c) {
360
356
float prob = rand .nextFloat ();
361
357
if ( prob <= c .p_ ) {
362
358
// crear aresta
363
- Knows .createKnow (rand , persons .get (pI .index_ ), persons .get (other .index_ ));
359
+ if (Knows .createKnow (rand , persons .get (pI .index_ ), persons .get (other .index_ )))
360
+ numCoreCoreEdges ++;
361
+ else
362
+ numMisses ++;
364
363
}
365
364
}
366
365
}
@@ -375,14 +374,17 @@ void createEdgesCommunityPeriphery(ClusteringInfo cInfo, ArrayList<Person> perso
375
374
}
376
375
377
376
for ( PersonInfo pI : c .core_ ) {
378
- long pDegree = 0 ;
379
- long maxDegree = cInfo .core_node_expected_periphery_degree_ .get (pI .index_ );
377
+ double pDegree = 0 ;
378
+ double maxDegree = cInfo .core_node_expected_periphery_degree_ .get (pI .index_ );
380
379
int index =0 ;
381
380
for (Long l : peripheryBudget ) {
382
381
if ( l != 0 && pDegree < maxDegree ) {
383
382
pDegree ++;
384
383
l --;
385
- Knows .createKnow (rand , persons .get (pI .index_ ), persons .get (c .periphery_ .get (index ).index_ ));
384
+ if (Knows .createKnow (rand , persons .get (pI .index_ ), persons .get (c .periphery_ .get (index ).index_ )))
385
+ numCorePeripheryEdges ++;
386
+ else
387
+ numMisses ++;
386
388
}
387
389
++index ;
388
390
}
@@ -402,7 +404,7 @@ void fillGraphWithRemainingEdges(ClusteringInfo cInfo, ArrayList<Community> comm
402
404
}
403
405
}
404
406
}
405
- Collections .shuffle (stubs );
407
+ Collections .shuffle (stubs , rand );
406
408
while (stubs .size ()>0 ) {
407
409
int index = rand .nextInt (stubs .size ());
408
410
PersonInfo first = stubs .get (index );
@@ -412,7 +414,10 @@ void fillGraphWithRemainingEdges(ClusteringInfo cInfo, ArrayList<Community> comm
412
414
PersonInfo second = stubs .get (index2 );
413
415
stubs .remove (index2 );
414
416
// create edge
415
- Knows .createKnow (rand , persons .get (first .index_ ), persons .get (second .index_ ));
417
+ if (Knows .createKnow (rand , persons .get (first .index_ ), persons .get (second .index_ )))
418
+ numCoreExternalEdges ++;
419
+ else
420
+ numMisses ++;
416
421
}
417
422
}
418
423
}
@@ -446,22 +451,45 @@ public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float>
446
451
447
452
for (Community c : communities ) {
448
453
createEdgesCommunityCore (persons , c );
449
- createEdgesCommunityPeriphery (cInfo , persons ,c );
454
+ // createEdgesCommunityPeriphery(cInfo, persons,c);
450
455
}
451
456
fillGraphWithRemainingEdges (cInfo , communities , persons );
452
- int count = 0 ;
457
+
458
+ int countMore = 0 ;
459
+ int countLess = 0 ;
460
+ int sumMore = 0 ;
461
+ int sumLess = 0 ;
462
+ int index = 0 ;
453
463
for ( Person p : persons ) {
454
- if (p .knows ().size () < Knows .target_edges (p ,percentages ,step_index ) ) {
455
- count ++;
456
- // System.out.println(p.knows().size()+" "+p.maxNumKnows());
464
+ if (cInfo .is_core_ .get (index )) {
465
+ long target = Knows .target_edges (p , percentages , step_index );
466
+ if (p .knows ().size () > target ) {
467
+ sumMore += -target + p .knows ().size ();
468
+ countMore ++;
469
+ } else if (p .knows ().size () < target ) {
470
+ sumLess += target - p .knows ().size ();
471
+ countLess ++;
472
+ }
457
473
}
474
+ ++index ;
458
475
}
459
- System .out .println ("Number of persons with less degree than expected: " +count );
476
+ System .out .println ("Number of persons with more degree than expected: " +countMore );
477
+ System .out .println ("Sum of excess degree: " +sumMore );
478
+ System .out .println ("Number of persons with less degree than expected: " +countLess );
479
+ System .out .println ("Sum of degree missed: " +sumLess );
480
+ printStatistics ();
460
481
}
461
482
462
483
public void initialize ( Configuration conf ) {
463
484
targetCC = conf .getFloat ("ldbc.snb.datagen.generator.ClusteringKnowsGenerator.clusteringCoefficient" , 0.1f );
464
485
System .out .println ("Initialized clustering coefficient to " +targetCC );
465
486
targetCC /= 2.0f ;
466
487
}
488
+
489
+ public void printStatistics () {
490
+ System .out .println ("Number core-core edges: " +numCoreCoreEdges );
491
+ System .out .println ("Number core-periphery edges: " +numCorePeripheryEdges );
492
+ System .out .println ("Number core-external edges: " +numCoreExternalEdges );
493
+ System .out .println ("Number edges missed: " +numMisses );
494
+ }
467
495
}
0 commit comments