Skip to content

Commit 9540383

Browse files
committed
.
1 parent 7e14116 commit 9540383

File tree

5 files changed

+106
-46
lines changed

5 files changed

+106
-46
lines changed

src/main/java/ldbc/snb/datagen/generator/ClusteringKnowsGenerator.java

Lines changed: 66 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
11
package ldbc.snb.datagen.generator;
22

3-
import ldbc.snb.datagen.dictionary.Dictionaries;
4-
import ldbc.snb.datagen.generator.tools.GraphUtils;
5-
import ldbc.snb.datagen.generator.tools.MinHash;
6-
import ldbc.snb.datagen.generator.tools.PersonGraph;
73
import ldbc.snb.datagen.objects.Knows;
84
import ldbc.snb.datagen.objects.Person;
9-
import ldbc.snb.datagen.util.RandomGeneratorFarm;
105
import org.apache.hadoop.conf.Configuration;
116

127
import java.util.*;
@@ -20,6 +15,10 @@ public class ClusteringKnowsGenerator implements KnowsGenerator {
2015
private ArrayList<Float> percentages = null;
2116
private int stepIndex = 0;
2217
private float targetCC = 0.0f;
18+
private int numMisses = 0;
19+
private int numCoreCoreEdges = 0;
20+
private int numCorePeripheryEdges = 0;
21+
private int numCoreExternalEdges = 0;
2322

2423
private class PersonInfo {
2524
public int index_;
@@ -44,10 +43,11 @@ public int compare(PersonInfo a, PersonInfo b) {
4443
}
4544

4645
private class ClusteringInfo {
47-
public ArrayList<Long> core_node_expected_core_degree_ = new ArrayList<Long>();
48-
public ArrayList<Long> core_node_excedence_degree_ = new ArrayList<Long>();
49-
public ArrayList<Long> core_node_expected_periphery_degree_ = new ArrayList<Long>();
50-
public ArrayList<Long> core_node_expected_external_degree_ = new ArrayList<Long>();
46+
public ArrayList<Boolean> is_core_ = new ArrayList<Boolean>();
47+
public ArrayList<Double> core_node_expected_core_degree_ = new ArrayList<Double>();
48+
public ArrayList<Double> core_node_excedence_degree_ = new ArrayList<Double>();
49+
public ArrayList<Double> core_node_expected_periphery_degree_ = new ArrayList<Double>();
50+
public ArrayList<Double> core_node_expected_external_degree_ = new ArrayList<Double>();
5151
public ArrayList<Float> clustering_coefficient_ = new ArrayList<Float>();
5252
public ArrayList<Long> community_core_stubs_ = new ArrayList<Long>();
5353
public ArrayList<Float> community_core_probs_ = new ArrayList<Float>();
@@ -57,10 +57,11 @@ private class ClusteringInfo {
5757

5858
ClusteringInfo( int size, ArrayList<Community> communities ) {
5959
for( int i = 0; i < size; ++i) {
60-
core_node_expected_core_degree_.add(0L);
61-
core_node_excedence_degree_.add(0L);
62-
core_node_expected_periphery_degree_.add(0L);
63-
core_node_expected_external_degree_.add(0L);
60+
core_node_expected_core_degree_.add(0.0);
61+
core_node_excedence_degree_.add(0.0);
62+
core_node_expected_periphery_degree_.add(0.0);
63+
core_node_expected_external_degree_.add(0.0);
64+
is_core_.add(false);
6465
clustering_coefficient_.add(0.0f);
6566
}
6667
for( int i = 0; i < communities.size(); ++i) {
@@ -71,6 +72,7 @@ private class ClusteringInfo {
7172
for( Community c: communities) {
7273
for( PersonInfo pI : c.core_) {
7374
core_nodes_.add(pI.index_);
75+
is_core_.set(pI.index_, true);
7476
}
7577
}
7678

@@ -182,16 +184,15 @@ private void computeCommunityInfo(ClusteringInfo cInfo, Community c, float prob)
182184

183185
// Initializing cInfo with expected degrees
184186
for (PersonInfo pI : c.core_) {
185-
cInfo.core_node_expected_core_degree_.set(pI.index_, (long) ((c.core_.size() - 1) * prob));
187+
cInfo.core_node_expected_core_degree_.set(pI.index_, new Double((c.core_.size() - 1) * prob));
186188
cInfo.core_node_excedence_degree_.set(pI.index_, pI.degree_ - cInfo.core_node_expected_core_degree_.get(pI.index_));
187-
cInfo.core_node_expected_periphery_degree_.set(pI.index_, 0L);
189+
cInfo.core_node_expected_periphery_degree_.set(pI.index_, 0.0);
188190
}
189191

190192
long remainingStubs = 0;
191-
//System.out.println(c.core_.size()+" "+c.periphery_.size());
192193
for (PersonInfo pI : c.core_) {
193-
long pDegree = 0;
194-
long maxDegree = pI.degree_ - cInfo.core_node_expected_core_degree_.get(pI.index_);
194+
double pDegree = 0;
195+
double maxDegree = (cInfo.core_node_excedence_degree_.get(pI.index_));
195196
for (Long l : peripheryBudget) {
196197
if (l != 0 && pDegree < maxDegree) {
197198
pDegree++;
@@ -201,9 +202,7 @@ private void computeCommunityInfo(ClusteringInfo cInfo, Community c, float prob)
201202

202203
cInfo.core_node_expected_periphery_degree_.set(pI.index_, pDegree);
203204

204-
// System.out.println(pI.degree_+" "+cInfo.core_node_expected_core_degree_.get(pI.index_)+" "+cInfo.core_node_expected_periphery_degree_.get(pI.index_));
205-
206-
long deg = ((pI.degree_ - cInfo.core_node_expected_core_degree_.get(pI.index_) - cInfo.core_node_expected_periphery_degree_.get(pI.index_)));
205+
double deg = ((pI.degree_ - cInfo.core_node_expected_core_degree_.get(pI.index_) - cInfo.core_node_expected_periphery_degree_.get(pI.index_)));
207206
cInfo.core_node_expected_external_degree_.set(pI.index_, deg);
208207
remainingStubs += deg;
209208
}
@@ -233,10 +232,10 @@ private void estimateCCCommunity( ClusteringInfo cInfo, Community c, float prob
233232

234233
float probTwoConnected = 0.0f;
235234
for( Integer i : cInfo.core_nodes_ ) {
236-
long degree1 = cInfo.core_node_expected_external_degree_.get(i);
235+
double degree1 = cInfo.core_node_expected_external_degree_.get(i);
237236
if(degree1 > 0) {
238237
for (Integer ii : cInfo.core_nodes_) {
239-
long degree2 = cInfo.core_node_expected_external_degree_.get(ii);
238+
double degree2 = cInfo.core_node_expected_external_degree_.get(ii);
240239
if(degree2 > 0)
241240
probTwoConnected += degree1 * degree2 / (float) (2 * sumStubs * sumStubs);
242241
}
@@ -284,9 +283,6 @@ private void estimateCCCommunity( ClusteringInfo cInfo, Community c, float prob
284283
external_triangles += cInfo.core_node_expected_external_degree_.get(pI.index_) * (cInfo.core_node_expected_external_degree_.get(pI.index_) - 1) * probTriangleSameCommunity;
285284
external_triangles += cInfo.core_node_expected_external_degree_.get(pI.index_) * (cInfo.core_node_expected_external_degree_.get(pI.index_) - 1) * (1 - probSameCommunity) * probTwoConnected;
286285
}
287-
/*long degree = cInfo.core_node_expected_core_degree_.get(pI.index_) +
288-
cInfo.core_node_expected_periphery_degree_.get(pI.index_) +
289-
cInfo.core_node_expected_external_degree_.get(pI.index_);*/
290286
long degree = pI.original_degree_;
291287

292288
if( degree > 1 ) {
@@ -360,7 +356,10 @@ void createEdgesCommunityCore(ArrayList<Person> persons, Community c) {
360356
float prob = rand.nextFloat();
361357
if( prob <= c.p_ ) {
362358
// crear aresta
363-
Knows.createKnow(rand, persons.get(pI.index_), persons.get(other.index_));
359+
if(Knows.createKnow(rand, persons.get(pI.index_), persons.get(other.index_)))
360+
numCoreCoreEdges++;
361+
else
362+
numMisses++;
364363
}
365364
}
366365
}
@@ -375,14 +374,17 @@ void createEdgesCommunityPeriphery(ClusteringInfo cInfo, ArrayList<Person> perso
375374
}
376375

377376
for ( PersonInfo pI : c.core_ ) {
378-
long pDegree = 0;
379-
long maxDegree = cInfo.core_node_expected_periphery_degree_.get(pI.index_);
377+
double pDegree = 0;
378+
double maxDegree = cInfo.core_node_expected_periphery_degree_.get(pI.index_);
380379
int index =0;
381380
for (Long l : peripheryBudget) {
382381
if( l != 0 && pDegree < maxDegree) {
383382
pDegree++;
384383
l--;
385-
Knows.createKnow(rand, persons.get(pI.index_), persons.get(c.periphery_.get(index).index_));
384+
if(Knows.createKnow(rand, persons.get(pI.index_), persons.get(c.periphery_.get(index).index_)))
385+
numCorePeripheryEdges++;
386+
else
387+
numMisses++;
386388
}
387389
++index;
388390
}
@@ -402,7 +404,7 @@ void fillGraphWithRemainingEdges(ClusteringInfo cInfo, ArrayList<Community> comm
402404
}
403405
}
404406
}
405-
Collections.shuffle(stubs);
407+
Collections.shuffle(stubs,rand);
406408
while(stubs.size()>0) {
407409
int index = rand.nextInt(stubs.size());
408410
PersonInfo first = stubs.get(index);
@@ -412,7 +414,10 @@ void fillGraphWithRemainingEdges(ClusteringInfo cInfo, ArrayList<Community> comm
412414
PersonInfo second = stubs.get(index2);
413415
stubs.remove(index2);
414416
// create edge
415-
Knows.createKnow(rand, persons.get(first.index_), persons.get(second.index_));
417+
if(Knows.createKnow(rand, persons.get(first.index_), persons.get(second.index_)))
418+
numCoreExternalEdges++;
419+
else
420+
numMisses++;
416421
}
417422
}
418423
}
@@ -446,22 +451,45 @@ public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float>
446451

447452
for(Community c : communities ) {
448453
createEdgesCommunityCore(persons, c);
449-
createEdgesCommunityPeriphery(cInfo, persons,c);
454+
// createEdgesCommunityPeriphery(cInfo, persons,c);
450455
}
451456
fillGraphWithRemainingEdges(cInfo, communities, persons);
452-
int count = 0;
457+
458+
int countMore = 0;
459+
int countLess = 0;
460+
int sumMore = 0;
461+
int sumLess = 0;
462+
int index = 0;
453463
for( Person p : persons ) {
454-
if(p.knows().size() < Knows.target_edges(p,percentages,step_index) ) {
455-
count++;
456-
// System.out.println(p.knows().size()+" "+p.maxNumKnows());
464+
if(cInfo.is_core_.get(index)) {
465+
long target = Knows.target_edges(p, percentages, step_index);
466+
if (p.knows().size() > target) {
467+
sumMore += -target + p.knows().size();
468+
countMore++;
469+
} else if (p.knows().size() < target) {
470+
sumLess += target - p.knows().size();
471+
countLess++;
472+
}
457473
}
474+
++index;
458475
}
459-
System.out.println("Number of persons with less degree than expected: "+count);
476+
System.out.println("Number of persons with more degree than expected: "+countMore);
477+
System.out.println("Sum of excess degree: "+sumMore);
478+
System.out.println("Number of persons with less degree than expected: "+countLess);
479+
System.out.println("Sum of degree missed: "+sumLess);
480+
printStatistics();
460481
}
461482

462483
public void initialize( Configuration conf ) {
463484
targetCC = conf.getFloat("ldbc.snb.datagen.generator.ClusteringKnowsGenerator.clusteringCoefficient", 0.1f);
464485
System.out.println("Initialized clustering coefficient to "+targetCC);
465486
targetCC /= 2.0f;
466487
}
488+
489+
public void printStatistics() {
490+
System.out.println("Number core-core edges: "+numCoreCoreEdges);
491+
System.out.println("Number core-periphery edges: "+numCorePeripheryEdges);
492+
System.out.println("Number core-external edges: "+numCoreExternalEdges);
493+
System.out.println("Number edges missed: "+numMisses);
494+
}
467495
}

src/main/java/ldbc/snb/datagen/generator/RandomKnowsGenerator.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import java.util.Comparator;
1010
import java.util.Random;
1111

12+
1213
/**
1314
* Created by aprat on 11/15/14.
1415
*/
@@ -35,7 +36,7 @@ public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float>
3536
}
3637
++index;
3738
}
38-
Collections.shuffle(stubs);
39+
Collections.shuffle(stubs,rand);
3940
while(!stubs.isEmpty()) {
4041
int first = rand.nextInt(stubs.size());
4142
int first_index = stubs.get(first);

src/main/java/ldbc/snb/datagen/hadoop/HadoopKnowsGenerator.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ public static class HadoopKnowsGeneratorReducer extends Reducer<BlockKey, Perso
3131
private HadoopFileKeyChanger.KeySetter<TupleKey> keySetter = null;
3232
private ArrayList<Float> percentages;
3333
private int step_index;
34+
private int numGeneratedEdges = 0;
3435

3536
protected void setup(Context context) {
3637
//this.knowsGenerator = new DistanceKnowsGenerator();
@@ -68,8 +69,14 @@ public void reduce(BlockKey key, Iterable<Person> valueSet,Context context)
6869
this.knowsGenerator.generateKnows(persons, (int)key.block, percentages, step_index);
6970
for( Person p : persons ) {
7071
context.write(keySetter.getKey(p), p);
72+
numGeneratedEdges+=p.knows().size();
7173
}
7274
}
75+
76+
@Override
77+
public void cleanup(Context context) {
78+
System.out.println("Number of generated edges: "+numGeneratedEdges);
79+
}
7380
}
7481

7582
private Configuration conf;

src/main/java/ldbc/snb/datagen/hadoop/HadoopMergeFriendshipFiles.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
package ldbc.snb.datagen.hadoop;
22

3-
import ldbc.snb.datagen.generator.KnowsGenerator;
43
import ldbc.snb.datagen.generator.LDBCDatagen;
54
import ldbc.snb.datagen.objects.Knows;
5+
import ldbc.snb.datagen.objects.Knows.FullComparator;
66
import ldbc.snb.datagen.objects.Person;
77
import org.apache.hadoop.conf.Configuration;
88
import org.apache.hadoop.fs.Path;
@@ -16,7 +16,6 @@
1616
import java.io.IOException;
1717
import java.util.ArrayList;
1818
import java.util.Collections;
19-
import java.util.Comparator;
2019

2120
/**
2221
* Created by aprat on 29/07/15.
@@ -28,6 +27,7 @@ public static class HadoopMergeFriendshipFilesReducer extends Reducer<TupleKey,
2827

2928
private Configuration conf;
3029
private HadoopFileKeyChanger.KeySetter<TupleKey> keySetter = null;
30+
private int numRepeated = 0;
3131

3232
protected void setup(Context context) {
3333
this.conf = context.getConfiguration();
@@ -56,7 +56,8 @@ public void reduce(TupleKey key, Iterable<Person> valueSet,Context context)
5656
index++;
5757
}
5858
person.knows().clear();
59-
Collections.sort(knows);
59+
Knows.FullComparator comparator = new Knows.FullComparator();
60+
Collections.sort(knows, comparator);
6061
if(knows.size() > 0 ) {
6162
long currentTo = knows.get(0).to().accountId();
6263
person.knows().add(knows.get(0));
@@ -65,13 +66,18 @@ public void reduce(TupleKey key, Iterable<Person> valueSet,Context context)
6566
if(currentTo != knows.get(index).to().accountId()) {
6667
person.knows().add(nextKnows);
6768
currentTo = nextKnows.to().accountId();
69+
} else {
70+
numRepeated++;
6871
}
6972
}
7073
}
7174

7275
//System.out.println("Num persons "+index);
7376
context.write(keySetter.getKey(person),person);
7477
}
78+
protected void cleanup(Context context){
79+
System.out.println("Number of repeated edges: "+numRepeated);
80+
}
7581
}
7682

7783
private Configuration conf;

src/main/java/ldbc/snb/datagen/objects/Knows.java

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import java.io.DataOutput;
4646
import java.io.IOException;
4747
import java.util.ArrayList;
48+
import java.util.Comparator;
4849
import java.util.Random;
4950

5051

@@ -110,15 +111,30 @@ public int compareTo(Knows k) {
110111
long res = (to_.accountId() - k.to().accountId());
111112
if( res > 0 ) return 1;
112113
if( res < 0 ) return -1;
113-
long res2 = creationDate_ - k.creationDate();
114+
/*long res2 = creationDate_ - k.creationDate();
114115
if( res2 > 0 ) return 1;
115116
if( res2 < 0 ) return -1;
117+
*/
116118
return 0;
117119
}
118120

121+
static public class FullComparator implements Comparator<Knows> {
122+
123+
public int compare(Knows a, Knows b) {
124+
long res = (a.to_.accountId() - b.to().accountId());
125+
if( res > 0 ) return 1;
126+
if( res < 0 ) return -1;
127+
long res2 = a.creationDate_ - b.creationDate();
128+
if( res2 > 0 ) return 1;
129+
if( res2 < 0 ) return -1;
130+
return 0;
131+
}
132+
133+
}
134+
119135
public static int num = 0;
120136

121-
public static void createKnow( Random random, Person personA, Person personB ) {
137+
public static boolean createKnow( Random random, Person personA, Person personB ) {
122138
long creationDate = Dictionaries.dates.randomKnowsCreationDate(
123139
random,
124140
personA,
@@ -127,9 +143,11 @@ public static void createKnow( Random random, Person personA, Person personB ) {
127143
creationDate = creationDate - personB.creationDate() >= DatagenParams.deltaTime ? creationDate : creationDate + (DatagenParams.deltaTime - (creationDate - personB.creationDate()));
128144
if( creationDate <= Dictionaries.dates.getEndDateTime() ) {
129145
float similarity = Person.Similarity(personA,personB);
130-
personB.knows().add(new Knows(personA, creationDate, similarity));
131-
personA.knows().add(new Knows(personB, creationDate, similarity));
146+
if(!personB.knows().add(new Knows(personA, creationDate, similarity))) return false;
147+
if(!personA.knows().add(new Knows(personB, creationDate, similarity))) return false;
148+
return true;
132149
}
150+
return false;
133151
}
134152

135153
public static long target_edges(Person person, ArrayList<Float> percentages, int step_index ) {

0 commit comments

Comments
 (0)