Skip to content

Commit e54c8b6

Browse files
authored
Fixed several bugs, causing the population distribution to be incorrect (#40)
* Fixed several bugs, causing the population distribution to be incorrect * Made datagen to remove existing substitution parameters folder * Added some exception up propagation * Fixed several bugs related to dates. * Updated tests for query parameters * Added missing tests * Added CHANGELOG
1 parent 02c322d commit e54c8b6

File tree

131 files changed

+4826
-4371
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

131 files changed

+4826
-4371
lines changed

.travis.yml

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,14 @@ dist: trusty
22
sudo: required
33
language: java
44
jdk:
5-
- openjdk7
5+
- openjdk8
66
python:
77
- "2.7"
8-
install:
9-
- curl -s http://www.eu.apache.org/dist/hadoop/common/hadoop-2.6.5/hadoop-2.6.5.tar.gz | tar -xz
10-
before_script:
11-
- export HADOOP_HOME=`readlink -f hadoop-2.6.5`
12-
- export LDBC_SNB_DATAGEN_HOME=`pwd`
13-
- export HADOOP_CLIENT_OPTS="-Xmx2G"
148
script:
15-
- cp test_params.ini params.ini
16-
- ./run.sh
17-
- ls -al
18-
- echo $LDBC_SNB_DATAGEN_HOME
199
- mvn test
10+
- wc -l ./test_data/social_network/*
11+
- head -n 15 ./test_data/social_network/person_0_0.csv
12+
- head -n 15 ./test_data/social_network/person_knows_person_0_0.csv
2013
notifications:
2114
slack: ldbcouncil:OrBanrJ7l0EHQbj8T5YdJYhd
2215
email: false

CHANGELOG.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,18 @@ based on the graphalytics scaling metric.
77
- Fixed a bug which caused wrong serialization of data once the numPartitions
88
parameter was larger than 1.
99

10+
- Integrated codacy and fixed a zillion of coding style issues
11+
12+
- Updated license header and added it to those files where it was missing
13+
14+
- Fixed a Bug causing the distribution of population to be wrong. Added a test
15+
for this
16+
17+
- Fixed a Bug causing the distribution of posts per country to be wrong. Added a
18+
test for this
19+
20+
- Fixed several bugs related to date generation of dependent events
21+
1022
v0.2.6 CHANGELOG
1123
Bi parameter generation
1224
Added testing

paramgenerator/generateparamsbi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def format_date(date):
2929

3030
class ParamsWriter:
3131
def __init__(self, name, param_names):
32-
self.file = codecs.open("substitution_parameters/"+name+"_param.txt", "w",encoding="utf-8")
32+
self.file = codecs.open(sys.argv[2]+"/"+name+"_param.txt", "w",encoding="utf-8")
3333
for i in range(0,len(param_names)):
3434
if i>0:
3535
self.file.write("|")

src/main/java/ldbc/snb/datagen/dictionary/BrowserDictionary.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,13 @@ public class BrowserDictionary {
4848
private static final String SEPARATOR_ = " ";
4949
private ArrayList<String> browsers_;
5050
private ArrayList<Double> cumulativeDistribution_;
51-
private double probAnotherBrowser_ = 0.0f;
51+
private double probAnotherBrowser_ = 0.0f;
5252

5353
public BrowserDictionary(double probAnotherBrowser) {
5454
probAnotherBrowser_ = probAnotherBrowser;
5555
browsers_ = new ArrayList<String>();
5656
cumulativeDistribution_ = new ArrayList<Double>();
57-
load(DatagenParams.browserDictonryFile);
57+
load(DatagenParams.browserDictonryFile);
5858
}
5959

6060
private void load(String fileName) {
@@ -83,7 +83,8 @@ public String getName(int id) {
8383
public int getRandomBrowserId(Random random) {
8484
double prob = random.nextDouble();
8585
int minIdx = 0;
86-
int maxIdx = (byte) ((prob < cumulativeDistribution_.get(minIdx)) ? minIdx : cumulativeDistribution_.size() - 1);
86+
int maxIdx = (byte) ((prob < cumulativeDistribution_.get(minIdx)) ? minIdx : cumulativeDistribution_
87+
.size() - 1);
8788
// Binary search
8889
while ((maxIdx - minIdx) > 1) {
8990
int middlePoint = minIdx + (maxIdx - minIdx) / 2;

src/main/java/ldbc/snb/datagen/dictionary/CompanyDictionary.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ public CompanyDictionary(PlaceDictionary placeDictionary,
8585
for (Integer id : placeDictionary.getCountries()) {
8686
this.companiesByCountry.put(id, new ArrayList<Long>());
8787
}
88-
load(DatagenParams.companiesDictionaryFile);
88+
load(DatagenParams.companiesDictionaryFile);
8989
}
9090

9191
/**
@@ -137,13 +137,16 @@ public long getRandomCompany(RandomGeneratorFarm randomFarm, int countryId) {
137137
int locId = countryId;
138138
ArrayList<Integer> countries = placeDictionary.getCountries();
139139
if (randomFarm.get(RandomGeneratorFarm.Aspect.UNCORRELATED_COMPANY).nextDouble() <= probUnCorrelatedCompany) {
140-
locId = countries.get(randomFarm.get(RandomGeneratorFarm.Aspect.UNCORRELATED_COMPANY_LOCATION).nextInt(countries.size()));
140+
locId = countries.get(randomFarm.get(RandomGeneratorFarm.Aspect.UNCORRELATED_COMPANY_LOCATION)
141+
.nextInt(countries.size()));
141142
}
142143
// In case the country doesn't have any company select another country.
143144
while (companiesByCountry.get(locId).size() == 0) {
144-
locId = countries.get(randomFarm.get(RandomGeneratorFarm.Aspect.UNCORRELATED_COMPANY_LOCATION).nextInt(countries.size()));
145+
locId = countries.get(randomFarm.get(RandomGeneratorFarm.Aspect.UNCORRELATED_COMPANY_LOCATION)
146+
.nextInt(countries.size()));
145147
}
146-
int randomCompanyIdx = randomFarm.get(RandomGeneratorFarm.Aspect.COMPANY).nextInt(companiesByCountry.get(locId).size());
148+
int randomCompanyIdx = randomFarm.get(RandomGeneratorFarm.Aspect.COMPANY).nextInt(companiesByCountry.get(locId)
149+
.size());
147150
return companiesByCountry.get(locId).get(randomCompanyIdx);
148151
}
149152

src/main/java/ldbc/snb/datagen/dictionary/Dictionaries.java

Lines changed: 68 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -42,81 +42,77 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
4242
import java.util.GregorianCalendar;
4343

4444
/**
45-
*
4645
* @author aprat
4746
*/
4847
public class Dictionaries {
4948

50-
public static BrowserDictionary browsers = null;
51-
public static CompanyDictionary companies = null;
52-
public static DateGenerator dates = null;
53-
public static EmailDictionary emails = null;
54-
public static IPAddressDictionary ips = null;
55-
public static LanguageDictionary languages = null;
56-
public static NamesDictionary names = null;
57-
public static PlaceDictionary places = null;
58-
public static PopularPlacesDictionary popularPlaces = null;
59-
public static TagDictionary tags = null;
60-
public static TagMatrix tagMatrix = null;
61-
public static TagTextDictionary tagText = null;
62-
public static UniversityDictionary universities = null;
63-
public static FlashmobTagDictionary flashmobs = null;
64-
65-
66-
public static void loadDictionaries(Configuration conf) {
67-
68-
browsers = new BrowserDictionary(DatagenParams.probAnotherBrowser);
69-
70-
dates = new DateGenerator( conf, new GregorianCalendar(DatagenParams.startYear,
71-
DatagenParams.startMonth,
72-
DatagenParams.startDate),
73-
new GregorianCalendar(DatagenParams.endYear,
74-
DatagenParams.endMonth,
75-
DatagenParams.endDate),
76-
DatagenParams.alpha
49+
public static BrowserDictionary browsers = null;
50+
public static CompanyDictionary companies = null;
51+
public static DateGenerator dates = null;
52+
public static EmailDictionary emails = null;
53+
public static IPAddressDictionary ips = null;
54+
public static LanguageDictionary languages = null;
55+
public static NamesDictionary names = null;
56+
public static PlaceDictionary places = null;
57+
public static PopularPlacesDictionary popularPlaces = null;
58+
public static TagDictionary tags = null;
59+
public static TagMatrix tagMatrix = null;
60+
public static TagTextDictionary tagText = null;
61+
public static UniversityDictionary universities = null;
62+
public static FlashmobTagDictionary flashmobs = null;
63+
64+
65+
public static void loadDictionaries(Configuration conf) {
66+
67+
browsers = new BrowserDictionary(DatagenParams.probAnotherBrowser);
68+
69+
dates = new DateGenerator(conf, new GregorianCalendar(DatagenParams.startYear,
70+
DatagenParams.startMonth,
71+
DatagenParams.startDate),
72+
new GregorianCalendar(DatagenParams.endYear,
73+
DatagenParams.endMonth,
74+
DatagenParams.endDate),
75+
DatagenParams.alpha
7776
);
78-
79-
80-
emails = new EmailDictionary();
81-
82-
places = new PlaceDictionary();
83-
84-
ips = new IPAddressDictionary( places,
85-
DatagenParams.probDiffIPinTravelSeason,
86-
DatagenParams.probDiffIPnotTravelSeason
87-
);
88-
89-
90-
languages = new LanguageDictionary(places,
91-
DatagenParams.probEnglish,
92-
DatagenParams.probSecondLang);
93-
94-
names = new NamesDictionary(places);
95-
96-
popularPlaces = new PopularPlacesDictionary(places);
97-
98-
tags = new TagDictionary( places.getCountries().size(),
99-
DatagenParams.tagCountryCorrProb);
100-
101-
tagMatrix = new TagMatrix();
102-
103-
companies = new CompanyDictionary(places, DatagenParams.probUnCorrelatedCompany);
104-
105-
universities = new UniversityDictionary(places,
106-
DatagenParams.probUnCorrelatedOrganization,
107-
DatagenParams.probTopUniv,
108-
companies.getNumCompanies());
109-
110-
flashmobs = new FlashmobTagDictionary(tags,
111-
dates,
112-
DatagenParams.flashmobTagsPerMonth,
113-
DatagenParams.probInterestFlashmobTag,
114-
DatagenParams.probRandomPerLevel,
115-
DatagenParams.flashmobTagMinLevel,
116-
DatagenParams.flashmobTagMaxLevel,
117-
DatagenParams.flashmobTagDistExp);
118-
119-
tagText = new TagTextDictionary(tags, DatagenParams.ratioReduceText);
120-
}
121-
77+
78+
79+
emails = new EmailDictionary();
80+
81+
places = new PlaceDictionary();
82+
83+
ips = new IPAddressDictionary(places);
84+
85+
86+
languages = new LanguageDictionary(places,
87+
DatagenParams.probEnglish,
88+
DatagenParams.probSecondLang);
89+
90+
names = new NamesDictionary(places);
91+
92+
popularPlaces = new PopularPlacesDictionary(places);
93+
94+
tags = new TagDictionary(places.getCountries().size(),
95+
DatagenParams.tagCountryCorrProb);
96+
97+
tagMatrix = new TagMatrix();
98+
99+
companies = new CompanyDictionary(places, DatagenParams.probUnCorrelatedCompany);
100+
101+
universities = new UniversityDictionary(places,
102+
DatagenParams.probUnCorrelatedOrganization,
103+
DatagenParams.probTopUniv,
104+
companies.getNumCompanies());
105+
106+
flashmobs = new FlashmobTagDictionary(tags,
107+
dates,
108+
DatagenParams.flashmobTagsPerMonth,
109+
DatagenParams.probInterestFlashmobTag,
110+
DatagenParams.probRandomPerLevel,
111+
DatagenParams.flashmobTagMinLevel,
112+
DatagenParams.flashmobTagMaxLevel,
113+
DatagenParams.flashmobTagDistExp);
114+
115+
tagText = new TagTextDictionary(tags, DatagenParams.ratioReduceText);
116+
}
117+
122118
}

src/main/java/ldbc/snb/datagen/dictionary/EmailDictionary.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ public class EmailDictionary {
5858
* @brief Constructor.
5959
*/
6060
public EmailDictionary() {
61-
load(DatagenParams.emailDictionaryFile);
61+
load(DatagenParams.emailDictionaryFile);
6262
}
6363

6464
/**
@@ -97,7 +97,8 @@ public String getRandomEmail(Random randomTop, Random randomEmail) {
9797
int maxIdx = cumulativeDistribution.size() - 1;
9898
double prob = randomTop.nextDouble();
9999
if (prob > cumulativeDistribution.get(maxIdx)) {
100-
int Idx = randomEmail.nextInt(emails.size() - cumulativeDistribution.size()) + cumulativeDistribution.size();
100+
int Idx = randomEmail.nextInt(emails.size() - cumulativeDistribution.size()) + cumulativeDistribution
101+
.size();
101102
return emails.get(Idx);
102103
} else if (prob < cumulativeDistribution.get(minIdx)) {
103104
return emails.get(minIdx);

src/main/java/ldbc/snb/datagen/dictionary/FlashmobTagDictionary.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ public FlashmobTagDictionary(TagDictionary tagDictionary,
9898
this.flashmobTagsPerMonth = flashmobTagsPerMonth;
9999
this.probInterestFlashmobTag = probInterestFlashmobTag;
100100
this.probRandomPerLevel = probRandomPerLevel;
101-
initialize();
101+
initialize();
102102
}
103103

104104
/**
@@ -186,7 +186,7 @@ public ArrayList<FlashmobTag> generateFlashmobTags(Random rand, TreeSet<Integer>
186186
}
187187
int earliestIndex = searchEarliestIndex(fromDate);
188188
for (int i = earliestIndex; i < flashmobTagCumDist.length; ++i) {
189-
if (selectFlashmobTag(rand,i)) {
189+
if (selectFlashmobTag(rand, i)) {
190190
result.add(flashmobTagCumDist[i]);
191191
}
192192
}

0 commit comments

Comments
 (0)