Skip to content

Commit ace3855

Browse files
committed
Implemented Random knows generator
1 parent f83ea9d commit ace3855

File tree

4 files changed

+96
-10
lines changed

4 files changed

+96
-10
lines changed

src/main/java/ldbc/snb/datagen/generator/ClusteringKnowsGenerator.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ public class ClusteringKnowsGenerator implements KnowsGenerator {
2222
private class PersonInfo {
2323
public int index_;
2424
public long degree_;
25+
public long original_degree_;
2526
}
2627

2728
private class Community {
@@ -88,6 +89,7 @@ private Community findSolution( ArrayList<Person> persons, int begin, int last)
8889
PersonInfo pInfo = new PersonInfo();
8990
pInfo.index_ = i;
9091
pInfo.degree_ = Knows.target_edges(p,percentages,stepIndex);
92+
pInfo.original_degree_ = p.maxNumKnows();
9193
nodes.add(pInfo);
9294
}
9395

@@ -242,7 +244,8 @@ private void estimateCCCommunity( ClusteringInfo cInfo, Community c, float prob
242244
// Computing clustering coefficient of periphery nodes
243245
for (PersonInfo pI: c.periphery_) {
244246
if(pI.degree_ > 1) {
245-
cInfo.clustering_coefficient_.set(pI.index_, prob);
247+
cInfo.clustering_coefficient_.set(pI.index_, pI.degree_*(pI.degree_-1)*prob/(pI.original_degree_*(pI.original_degree_-1)));
248+
//cInfo.clustering_coefficient_.set(pI.index_, prob);
246249
}
247250
}
248251

@@ -279,9 +282,10 @@ private void estimateCCCommunity( ClusteringInfo cInfo, Community c, float prob
279282
external_triangles += cInfo.core_node_expected_external_degree_.get(pI.index_) * (cInfo.core_node_expected_external_degree_.get(pI.index_) - 1) * probTriangleSameCommunity;
280283
external_triangles += cInfo.core_node_expected_external_degree_.get(pI.index_) * (cInfo.core_node_expected_external_degree_.get(pI.index_) - 1) * (1 - probSameCommunity) * probTwoConnected;
281284
}
282-
long degree = cInfo.core_node_expected_core_degree_.get(pI.index_) +
285+
/*long degree = cInfo.core_node_expected_core_degree_.get(pI.index_) +
283286
cInfo.core_node_expected_periphery_degree_.get(pI.index_) +
284-
cInfo.core_node_expected_external_degree_.get(pI.index_);
287+
cInfo.core_node_expected_external_degree_.get(pI.index_);*/
288+
long degree = pI.original_degree_;
285289

286290
if( degree > 1 ) {
287291
cInfo.clustering_coefficient_.set(pI.index_, (internalTriangles+peripheryTriangles+external_triangles)/(float)(degree*(degree-1)));
@@ -417,7 +421,7 @@ public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float>
417421
rand.setSeed(seed);
418422
this.percentages = percentages;
419423
this.stepIndex = step_index;
420-
float targetCC = 0.23f;
424+
float targetCC = 0.15f;
421425

422426

423427
ArrayList<Community> communities = generateCommunities(persons);

src/main/java/ldbc/snb/datagen/generator/LDBCDatagen.java

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,24 +88,45 @@ public int runGenerateJob(Configuration conf) throws Exception {
8888

8989
printProgress("Creating university location correlated edges");
9090
long startUniversity = System.currentTimeMillis();
91-
HadoopKnowsGenerator knowsGenerator = new HadoopKnowsGenerator(conf,"ldbc.snb.datagen.hadoop.UniversityKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", percentages, 0);
91+
HadoopKnowsGenerator knowsGenerator = new HadoopKnowsGenerator(conf,
92+
"ldbc.snb.datagen.hadoop.UniversityKeySetter",
93+
"ldbc.snb.datagen.hadoop.RandomKeySetter",
94+
percentages,
95+
0,
96+
"ldbc.snb.datagen.generator.ClusteringKnowsGenerator");
97+
9298
knowsGenerator.run(hadoopPrefix+"/persons",hadoopPrefix+"/universityEdges");
9399
long endUniversity = System.currentTimeMillis();
94100

95101
printProgress("Creating main interest correlated edges");
96102
long startInterest= System.currentTimeMillis();
97-
knowsGenerator = new HadoopKnowsGenerator(conf,"ldbc.snb.datagen.hadoop.InterestKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", percentages, 1);
103+
104+
knowsGenerator = new HadoopKnowsGenerator( conf,
105+
"ldbc.snb.datagen.hadoop.InterestKeySetter",
106+
"ldbc.snb.datagen.hadoop.RandomKeySetter",
107+
percentages,
108+
1,
109+
"ldbc.snb.datagen.generator.ClusteringKnowsGenerator");
110+
98111
knowsGenerator.run(hadoopPrefix+"/persons",hadoopPrefix+"/interestEdges");
99112
long endInterest = System.currentTimeMillis();
100113

101114

102115
printProgress("Creating random correlated edges");
103116
long startRandom= System.currentTimeMillis();
104-
knowsGenerator = new HadoopKnowsGenerator(conf,"ldbc.snb.datagen.hadoop.RandomKeySetter", "ldbc.snb.datagen.hadoop.RandomKeySetter", percentages, 2);
117+
118+
knowsGenerator = new HadoopKnowsGenerator( conf,
119+
"ldbc.snb.datagen.hadoop.RandomKeySetter",
120+
"ldbc.snb.datagen.hadoop.RandomKeySetter",
121+
percentages,
122+
2,
123+
"ldbc.snb.datagen.generator.RandomKnowsGenerator");
124+
105125
knowsGenerator.run(hadoopPrefix+"/persons",hadoopPrefix+"/randomEdges");
106126
long endRandom= System.currentTimeMillis();
107127

108128

129+
109130
fs.delete(new Path(DatagenParams.hadoopDir + "/persons"), true);
110131
printProgress("Merging the different edge files");
111132
ArrayList<String> edgeFileNames = new ArrayList<String>();
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
package ldbc.snb.datagen.generator;
2+
3+
import ldbc.snb.datagen.objects.Knows;
4+
import ldbc.snb.datagen.objects.Person;
5+
6+
import java.util.ArrayList;
7+
import java.util.Collections;
8+
import java.util.Comparator;
9+
import java.util.Random;
10+
11+
/**
12+
* Created by aprat on 11/15/14.
13+
*/
14+
public class RandomKnowsGenerator implements KnowsGenerator {
15+
16+
Random rand;
17+
18+
19+
public RandomKnowsGenerator() {
20+
rand = new Random();
21+
}
22+
23+
24+
25+
public void generateKnows( ArrayList<Person> persons, int seed, ArrayList<Float> percentages, int step_index ) {
26+
27+
rand.setSeed(seed);
28+
ArrayList<Integer> stubs = new ArrayList<Integer>();
29+
int index = 0;
30+
for(Person p : persons ) {
31+
long degree = Knows.target_edges(p, percentages, step_index);
32+
for( int i =0; i < degree; ++i ) {
33+
stubs.add(index);
34+
}
35+
++index;
36+
}
37+
Collections.shuffle(stubs);
38+
while(!stubs.isEmpty()) {
39+
int first = rand.nextInt(stubs.size());
40+
int first_index = stubs.get(first);
41+
stubs.remove(first);
42+
if(!stubs.isEmpty()) {
43+
int second = rand.nextInt(stubs.size());
44+
int second_index = stubs.get(second);
45+
stubs.remove(second);
46+
if(first_index != second_index ) {
47+
Person p1 = persons.get(first_index);
48+
Person p2 = persons.get(second_index);
49+
Knows.createKnow(rand, p1, p2);
50+
}
51+
}
52+
}
53+
}
54+
}

src/main/java/ldbc/snb/datagen/hadoop/HadoopKnowsGenerator.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,13 @@ public static class HadoopKnowsGeneratorReducer extends Reducer<BlockKey, Perso
3434

3535
protected void setup(Context context) {
3636
//this.knowsGenerator = new DistanceKnowsGenerator();
37-
this.knowsGenerator = new ClusteringKnowsGenerator();
38-
this.percentages = new ArrayList<Float>();
3937
this.conf = context.getConfiguration();
38+
try {
39+
this.knowsGenerator = (KnowsGenerator) Class.forName(conf.get("knowsGeneratorName")).newInstance();
40+
}catch(Exception e) {
41+
System.out.println(e.getMessage());
42+
}
43+
this.percentages = new ArrayList<Float>();
4044
this.step_index = conf.getInt("stepIndex",0);
4145
float p = conf.getFloat("percentage0",0.0f);
4246
int index = 1;
@@ -70,16 +74,18 @@ public void reduce(BlockKey key, Iterable<Person> valueSet,Context context)
7074
private Configuration conf;
7175
private String preKeySetterName;
7276
private String postKeySetterName;
77+
private String knowsGeneratorName;
7378
private ArrayList<Float> percentages;
7479
private int step_index;
7580

7681

77-
public HadoopKnowsGenerator( Configuration conf, String preKeySetterName, String postKeySetterName, ArrayList<Float> percentages, int step_index ) {
82+
public HadoopKnowsGenerator( Configuration conf, String preKeySetterName, String postKeySetterName, ArrayList<Float> percentages, int step_index, String knowsGeneratorName ) {
7883
this.conf = conf;
7984
this.preKeySetterName = preKeySetterName;
8085
this.postKeySetterName = postKeySetterName;
8186
this.percentages = percentages;
8287
this.step_index = step_index;
88+
this.knowsGeneratorName = knowsGeneratorName;
8389
}
8490

8591
public void run( String inputFileName, String outputFileName ) throws Exception {
@@ -116,6 +122,7 @@ public void run( String inputFileName, String outputFileName ) throws Exception
116122
++index;
117123
}
118124
conf.set("postKeySetterName",postKeySetterName);
125+
conf.set("knowsGeneratorName", knowsGeneratorName);
119126
int numThreads = Integer.parseInt(conf.get("ldbc.snb.datagen.generator.numThreads"));
120127
Job job = Job.getInstance(conf, "Knows generator");
121128
job.setMapOutputKeyClass(BlockKey.class);

0 commit comments

Comments
 (0)