Skip to content

Commit d6071c7

Browse files
committed
Added support for scale factors of different typology/benchmarks
Added weights to edges based on country, the closer the country, the larger the weight
1 parent a700684 commit d6071c7

File tree

9 files changed

+220
-86
lines changed

9 files changed

+220
-86
lines changed

params.ini

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11

2-
ldbc.snb.datagen.generator.scaleFactor:1
2+
ldbc.snb.datagen.generator.scaleFactor:snb.interactive.1
33

44
ldbc.snb.datagen.serializer.compressed:false
55

@@ -12,3 +12,4 @@ ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer
1212
ldbc.snb.datagen.generator.numThreads:1
1313

1414
ldbc.snb.datagen.serializer.updateStreams:true
15+

src/main/java/ldbc/snb/datagen/generator/DatagenParams.java

Lines changed: 7 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,6 @@ public String toString() {
230230
public static int numPartitions = 1;
231231
public static int numUpdatePartitions = 1;
232232

233-
private static TreeMap<Integer, ScaleFactor> scaleFactors;
234-
private static final String SCALE_FACTORS_FILE = "scale_factors.xml";
235233

236234
public static void readConf( Configuration conf ) {
237235
try {
@@ -300,55 +298,17 @@ public static void readConf( Configuration conf ) {
300298
flashmobTagDistExp = Double.parseDouble(conf.get(ParameterNames.FLASHMOB_TAG_DIST_EXP.toString()));
301299
updatePortion = Double.parseDouble(conf.get(ParameterNames.UPDATE_PORTION.toString()));
302300
blockSize = Integer.parseInt(conf.get(ParameterNames.BLOCK_SIZE.toString()));
303-
scaleFactors = new TreeMap<Integer, ScaleFactor>();
304-
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
305-
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
306-
Document doc = dBuilder.parse(LDBCDatagen.class.getResourceAsStream("/" + SCALE_FACTORS_FILE));
307-
doc.getDocumentElement().normalize();
308-
309-
System.out.println("Reading scale factors..");
310-
NodeList nodes = doc.getElementsByTagName("scale_factor");
311-
for (int i = 0; i < nodes.getLength(); i++) {
312-
Node node = nodes.item(i);
313-
if (node.getNodeType() == Node.ELEMENT_NODE) {
314-
Element element = (Element) node;
315-
Integer num = Integer.parseInt(element.getAttribute("number"));
316-
ScaleFactor scaleFactor = new ScaleFactor();
317-
NodeList files = element.getElementsByTagName("num_persons");
318-
scaleFactor.numPersons = Integer.parseInt(files.item(0).getTextContent());
319-
files = element.getElementsByTagName("start_year");
320-
scaleFactor.startYear = Integer.parseInt(files.item(0).getTextContent());
321-
files = element.getElementsByTagName("num_years");
322-
scaleFactor.numYears = Integer.parseInt(files.item(0).getTextContent());
323-
scaleFactors.put(num, scaleFactor);
324-
}
325-
}
326-
System.out.println("Number of scale factors read "+scaleFactors.size());
327301
} catch (Exception e) {
328302
System.out.println("Error reading scale factors");
329303
System.err.println(e.getMessage());
330304
System.exit(-1);
331305
}
332306

333307
try {
334-
if (conf.get("ldbc.snb.datagen.generator.numPersons") != null && conf.get("ldbc.snb.datagen.generator.numYears") != null && conf.get("ldbc.snb.datagen.generator.startYear") != null) {
335-
numPersons = Integer.parseInt(conf.get("ldbc.snb.datagen.generator.numPersons"));
336-
startYear = Integer.parseInt(conf.get("ldbc.snb.datagen.generator.startYear"));
337-
numYears = Integer.parseInt(conf.get("ldbc.snb.datagen.generator.numYears"));
338-
endYear = startYear + numYears;
339-
} else {
340-
int scaleFactorId = Integer.parseInt(conf.get("ldbc.snb.datagen.generator.scaleFactor"));
341-
ScaleFactor scaleFactor = scaleFactors.get(scaleFactorId);
342-
System.out.println("Executing with scale factor " + scaleFactorId);
343-
System.out.println(" ... Num Persons " + scaleFactor.numPersons);
344-
System.out.println(" ... Start Year " + scaleFactor.startYear);
345-
System.out.println(" ... Num Years " + scaleFactor.numYears);
346-
numPersons = scaleFactor.numPersons;
347-
startYear = scaleFactor.startYear;
348-
numYears = scaleFactor.numYears;
349-
endYear = startYear + numYears;
350-
}
351-
308+
numPersons = Integer.parseInt(conf.get("ldbc.snb.datagen.generator.numPersons"));
309+
startYear = Integer.parseInt(conf.get("ldbc.snb.datagen.generator.startYear"));
310+
numYears = Integer.parseInt(conf.get("ldbc.snb.datagen.generator.numYears"));
311+
endYear = startYear + numYears;
352312
compressed = conf.getBoolean("ldbc.snb.datagen.serializer.compressed",false);
353313
numThreads = conf.getInt("ldbc.snb.datagen.generator.numThreads",1);
354314
updateStreams = conf.getBoolean("ldbc.snb.datagen.serializer.updateStreams",false);
@@ -358,6 +318,9 @@ public static void readConf( Configuration conf ) {
358318
outputDir = conf.get("ldbc.snb.datagen.serializer.outputDir");
359319
hadoopDir = outputDir+"/hadoop";
360320
socialNetworkDir = outputDir+"social_network";
321+
System.out.println(" ... Num Persons " + numPersons);
322+
System.out.println(" ... Start Year " + startYear);
323+
System.out.println(" ... Num Years " + numYears);
361324
} catch (Exception e) {
362325
System.err.println(e.getMessage());
363326
System.exit(-1);

src/main/java/ldbc/snb/datagen/generator/KnowsGenerator.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,9 @@ void createKnow( Person personA, Person personB ) {
4949
creationDate = creationDate - personA.creationDate() >= DatagenParams.deltaTime ? creationDate : creationDate + (DatagenParams.deltaTime - (creationDate - personA.creationDate()));
5050
creationDate = creationDate - personB.creationDate() >= DatagenParams.deltaTime ? creationDate : creationDate + (DatagenParams.deltaTime - (creationDate - personB.creationDate()));
5151
if( creationDate <= Dictionaries.dates.getEndDateTime() ) {
52-
personB.knows().add(new Knows(personA, creationDate));
53-
personA.knows().add(new Knows(personB, creationDate));
52+
float similarity = Person.Similarity(personA,personB);
53+
personB.knows().add(new Knows(personA, creationDate, similarity));
54+
personA.knows().add(new Knows(personB, creationDate, similarity));
5455
}
5556
}
5657
}

src/main/java/ldbc/snb/datagen/objects/Knows.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ public class Knows implements Writable, Comparable<Knows> {
4848

4949
long creationDate_;
5050
Person.PersonSummary to_= null;
51+
float weight_ = 0.0f;
5152

5253
public Knows() {
5354
to_ = new Person.PersonSummary();
@@ -56,11 +57,13 @@ public Knows() {
5657
public Knows(Knows k) {
5758
to_ = new Person.PersonSummary(k.to());
5859
creationDate_ = k.creationDate();
60+
weight_ = k.weight();
5961
}
6062

61-
public Knows( Person to, long creationDate ){
63+
public Knows( Person to, long creationDate, float weight ){
6264
to_ = new Person.PersonSummary(to);
6365
creationDate_ = creationDate;
66+
weight_ = weight;
6467
}
6568

6669
public Person.PersonSummary to ( ) {
@@ -79,15 +82,24 @@ public void creationDate ( long creationDate ) {
7982
creationDate_ = creationDate;
8083
}
8184

85+
public void weight ( float weight ) {
86+
weight_ = weight;
87+
}
88+
89+
public float weight() {
90+
return weight_;
91+
}
8292

8393
public void readFields(DataInput arg0) throws IOException {
8494
to_.readFields(arg0);
8595
creationDate_ = arg0.readLong();
96+
weight_ = arg0.readFloat();
8697
}
8798

8899
public void write(DataOutput arg0) throws IOException {
89100
to_.write(arg0);
90101
arg0.writeLong(creationDate_);
102+
arg0.writeFloat(weight_);
91103
}
92104

93105
public int compareTo(Knows k) {

src/main/java/ldbc/snb/datagen/objects/Person.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package ldbc.snb.datagen.objects;
22

3+
import ldbc.snb.datagen.dictionary.Dictionaries;
34
import org.apache.hadoop.io.Writable;
45

56
import java.io.DataInput;
@@ -461,4 +462,10 @@ public void write(DataOutput arg0) throws IOException {
461462
arg0.writeLong(classYear_);
462463
}
463464

465+
public static float Similarity(Person personA, Person personB) {
466+
int zorderA = Dictionaries.places.getZorderID(personA.countryId());
467+
int zorderB = Dictionaries.places.getZorderID(personB.countryId());
468+
return 1.0f - (Math.abs(zorderA - zorderB) / 256.0f);
469+
}
470+
464471
}

src/main/java/ldbc/snb/datagen/serializer/graphalytics/CSVPersonSerializer.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,9 @@ protected void serialize(WorkAt workAt) {
109109

110110
protected void serialize(Person p, Knows knows) {
111111
ArrayList<String> arguments = new ArrayList<String>();
112-
long denominator = knows.creationDate() - Math.min(p.creationDate(),knows.to().creationDate());
113-
long numerator = knows.creationDate() - Math.max(p.creationDate(),knows.to().creationDate());
114-
double weight = 1.0 - numerator/(double)denominator;
115112
arguments.add(Long.toString(p.accountId()));
116113
arguments.add(Long.toString(knows.to().accountId()));
117-
arguments.add(Double.toString(weight));
114+
arguments.add(Float.toString(knows.weight()));
118115
writers[FileNames.PERSON_KNOWS_PERSON.ordinal()].writeEntry(arguments);
119116
}
120117
}

src/main/java/ldbc/snb/datagen/util/ConfigParser.java

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,84 @@
11
package ldbc.snb.datagen.util;
22

3+
import ldbc.snb.datagen.generator.LDBCDatagen;
34
import org.apache.hadoop.conf.Configuration;
5+
import org.w3c.dom.Document;
6+
import org.w3c.dom.Element;
7+
import org.w3c.dom.Node;
8+
import org.w3c.dom.NodeList;
49

10+
import javax.xml.parsers.DocumentBuilder;
11+
import javax.xml.parsers.DocumentBuilderFactory;
512
import java.io.FileInputStream;
613
import java.io.InputStreamReader;
714
import java.util.Map;
815
import java.util.Properties;
16+
import java.util.TreeMap;
917

1018
/**
1119
* Created by aprat on 6/1/14.
1220
*/
1321
public class ConfigParser {
1422

23+
private static TreeMap<String, ScaleFactor> scaleFactors;
24+
private static final String SCALE_FACTORS_FILE = "scale_factors.xml";
25+
1526
public static Configuration initialize() {
27+
28+
/** Default Parameters **/
1629
Configuration conf = new Configuration();
17-
conf.set("ldbc.snb.datagen.generator.scaleFactor", Integer.toString(1));
30+
conf.set("", Integer.toString(1));
31+
conf.set("ldbc.snb.datagen.generator.numPersons","10000");
32+
conf.set("ldbc.snb.datagen.generator.startYear","2010");
33+
conf.set("ldbc.snb.datagen.generator.numYears","3");
1834
conf.set("ldbc.snb.datagen.generator.numThreads", Integer.toString(1));
1935
conf.set("ldbc.snb.datagen.serializer.personSerializer", "ldbc.snb.datagen.serializer.snb.interactive.CSVPersonSerializer");
2036
conf.set("ldbc.snb.datagen.serializer.invariantSerializer", "ldbc.snb.datagen.serializer.snb.interactive.CSVInvariantSerializer");
2137
conf.set("ldbc.snb.datagen.serializer.personActivitySerializer", "ldbc.snb.datagen.serializer.snb.interactive.CSVPersonActivitySerializer");
2238
conf.set("ldbc.snb.datagen.generator.distribution.degreeDistribution", "ldbc.snb.datagen.generator.distribution.FacebookDegreeDistribution");
2339
conf.set("ldbc.snb.datagen.serializer.compressed", Boolean.toString(false));
24-
conf.set("ldbc.snb.datagen.serializer.updateStreams", Boolean.toString(false));
40+
conf.set("ldbc.snb.datagen.serializer.updateStreams", Boolean.toString(true));
2541
conf.set("ldbc.snb.datagen.serializer.numPartitions", "1");
2642
conf.set("ldbc.snb.datagen.serializer.outputDir", "./");
2743
conf.set("ldbc.snb.datagen.serializer.socialNetworkDir", "./social_network");
2844
conf.set("ldbc.snb.datagen.serializer.hadoopDir", "./hadoop");
2945
conf.set("ldbc.snb.datagen.generator.deltaTime", "10000");
3046
conf.set("ldbc.snb.datagen.generator.activity", "true");
47+
48+
/** Loading predefined Scale Factors **/
49+
50+
try {
51+
scaleFactors = new TreeMap<String, ScaleFactor>();
52+
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
53+
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
54+
Document doc = dBuilder.parse(LDBCDatagen.class.getResourceAsStream("/" + SCALE_FACTORS_FILE));
55+
doc.getDocumentElement().normalize();
56+
57+
System.out.println("Reading scale factors..");
58+
NodeList nodes = doc.getElementsByTagName("scale_factor");
59+
for (int i = 0; i < nodes.getLength(); i++) {
60+
Node node = nodes.item(i);
61+
if (node.getNodeType() == Node.ELEMENT_NODE) {
62+
Element element = (Element) node;
63+
String scaleFactorName = element.getAttribute("name");
64+
ScaleFactor scaleFactor = new ScaleFactor();
65+
NodeList properties = ((Element) node).getElementsByTagName("property");
66+
for( int j = 0; j < properties.getLength(); ++j ) {
67+
Element property = (Element) properties.item(j);
68+
String name = property.getElementsByTagName("name").item(0).getTextContent();
69+
String value = property.getElementsByTagName("value").item(0).getTextContent();
70+
scaleFactor.properties.put(name,value);
71+
}
72+
System.out.println("Loaded scale factor configuration set "+scaleFactorName);
73+
scaleFactors.put(scaleFactorName, scaleFactor);
74+
}
75+
}
76+
System.out.println("Number of scale factors read "+scaleFactors.size());
77+
} catch (Exception e) {
78+
System.out.println("Error reading scale factors");
79+
System.err.println(e.getMessage());
80+
System.exit(-1);
81+
}
3182
return conf;
3283
}
3384

@@ -36,7 +87,15 @@ public static Configuration readConfig(Configuration conf, String paramsFile) {
3687
Properties properties = new Properties();
3788
properties.load(new InputStreamReader(new FileInputStream(paramsFile), "UTF-8"));
3889
for( String s : properties.stringPropertyNames()) {
39-
conf.set(s,properties.getProperty(s));
90+
if(s.compareTo("ldbc.snb.datagen.generator.scaleFactor") == 0) {
91+
ScaleFactor scaleFactor = scaleFactors.get(properties.get(s));
92+
System.out.println("Applied configuration of scale factor " + properties.get(s));
93+
for( Map.Entry<String,String> e : scaleFactor.properties.entrySet()) {
94+
conf.set(e.getKey(), e.getValue());
95+
}
96+
} else {
97+
conf.set(s, properties.getProperty(s));
98+
}
4099
}
41100
if (conf.get("fs.defaultFS").compareTo("file:///") == 0) {
42101
System.out.println("Running in standalone mode. Setting numThreads to 1");
Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
package ldbc.snb.datagen.util;
22

3+
import java.util.TreeMap;
4+
35
/**
46
* Created by aprat on 5/17/14.
57
*/
68
public class ScaleFactor {
7-
public int numPersons = 10000;
8-
public int startYear = 2010;
9-
public int numYears = 3;
9+
public TreeMap<String,String> properties;
10+
ScaleFactor() {
11+
properties = new TreeMap<String,String>();
12+
}
1013
}

0 commit comments

Comments
 (0)