Skip to content

Commit 8bb7054

Browse files
committed
Fixed turtle serializer
1 parent b202535 commit 8bb7054

File tree

9 files changed

+759
-20
lines changed

9 files changed

+759
-20
lines changed
Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
package ldbc.snb.datagen.generator;
2+
3+
import javafx.util.Pair;
4+
import ldbc.snb.datagen.objects.Knows;
5+
import ldbc.snb.datagen.objects.Person;
6+
import org.apache.hadoop.conf.Configuration;
7+
import org.roaringbitmap.RoaringBitmap;
8+
9+
import java.io.BufferedReader;
10+
import java.io.IOException;
11+
import java.io.InputStreamReader;
12+
import java.util.*;
13+
14+
/**
15+
* Created by aprat on 12/07/16.
16+
*/
17+
public class BTERKnowsGenerator implements KnowsGenerator{
18+
19+
private int graphSize = 0;
20+
private Random random;
21+
private Configuration conf;
22+
private long [] expectedDegree;
23+
private double [] p ;
24+
private HashMap<Long,RoaringBitmap> openCommunities = new HashMap<Long,RoaringBitmap>();
25+
private ArrayList<RoaringBitmap> closedCommunities = new ArrayList<RoaringBitmap>();
26+
private RoaringBitmap smallDegreeNodes = new RoaringBitmap();
27+
private RoaringBitmap [] adjacencyMatrix;
28+
private int count = 0;
29+
30+
public int BinarySearch(ArrayList<Pair<Long,Double>> array, Long degree) {
31+
int min = 0;
32+
int max = array.size();
33+
while(min <= max) {
34+
int midPoint = (max - min) / 2 + min;
35+
if(midPoint >= array.size()) return array.size()-1;
36+
if(midPoint < 0) return 0;
37+
if(array.get(midPoint).getKey() > degree ) {
38+
max = midPoint - 1;
39+
} else if(array.get(midPoint).getKey() < degree) {
40+
min = midPoint + 1;
41+
} else {
42+
return midPoint;
43+
}
44+
}
45+
return max;
46+
}
47+
48+
void generateCommunities(RoaringBitmap block) {
49+
Iterator<Integer> iter = block.iterator();
50+
while(iter.hasNext()) {
51+
int node = iter.next();
52+
RoaringBitmap community = openCommunities.get(expectedDegree[node]+1);
53+
if(community != null) {
54+
community.add(node);
55+
if(community.getCardinality() >= (expectedDegree[node]+1)) {
56+
openCommunities.remove(expectedDegree[node]+1);
57+
closedCommunities.add(community);
58+
}
59+
} else {
60+
community = new RoaringBitmap();
61+
community.add(node);
62+
openCommunities.put(expectedDegree[node]+1,community);
63+
}
64+
}
65+
}
66+
67+
void generateEdgesInCommunity(RoaringBitmap community) {
68+
Iterator<Integer> iter = community.iterator();
69+
while(iter.hasNext()) {
70+
int nodeA = iter.next();
71+
Iterator<Integer> iter2 = community.iterator();
72+
while(iter2.hasNext()) {
73+
int nodeB = iter2.next();
74+
if(nodeA < nodeB) {
75+
double prob = random.nextDouble();
76+
if(prob < p[community.getCardinality()-1]) {
77+
adjacencyMatrix[nodeA].add(nodeB);
78+
adjacencyMatrix[nodeB].add(nodeA);
79+
}
80+
}
81+
}
82+
}
83+
}
84+
85+
void generateRemainingEdges() {
86+
LinkedList<Integer> stubs = new LinkedList<Integer>();
87+
for(int i = 0; i < graphSize; ++i) {
88+
long difference = expectedDegree[i]-adjacencyMatrix[i].getCardinality();
89+
if( difference > 0) {
90+
for(int j = 0; j < difference; ++j) {
91+
stubs.add(i);
92+
}
93+
}
94+
}
95+
Collections.shuffle(stubs,random);
96+
while(!stubs.isEmpty()) {
97+
int node1 = stubs.get(0);
98+
stubs.remove(0);
99+
if(!stubs.isEmpty()) {
100+
int node2 = stubs.get(0);
101+
stubs.remove(0);
102+
if(node1 != node2) {
103+
adjacencyMatrix[node1].add(node2);
104+
adjacencyMatrix[node2].add(node1);
105+
}
106+
}
107+
}
108+
}
109+
110+
@Override
111+
public void generateKnows(ArrayList<Person> persons, int seed, ArrayList<Float> percentages, int step_index) {
112+
113+
graphSize = persons.size();
114+
expectedDegree = new long[graphSize];
115+
adjacencyMatrix = new RoaringBitmap[graphSize];
116+
p = new double[graphSize];
117+
for(int i = 0; i < graphSize; ++i) {
118+
adjacencyMatrix[i] = new RoaringBitmap();
119+
}
120+
random = new Random();
121+
random.setSeed(seed);
122+
openCommunities.clear();
123+
closedCommunities.clear();
124+
smallDegreeNodes.clear();
125+
int maxExpectedDegree = 0;
126+
for(int i = 0; i < graphSize; ++i) {
127+
adjacencyMatrix[i].clear();
128+
expectedDegree[i] = Knows.target_edges(persons.get(i),percentages,step_index);
129+
maxExpectedDegree = maxExpectedDegree < expectedDegree[i] ? (int)expectedDegree[i] : maxExpectedDegree;
130+
}
131+
p = new double[maxExpectedDegree+1];
132+
133+
/** Initializing the array of triangles **/
134+
ArrayList<Pair<Long,Double>> ccDistribution = new ArrayList<Pair<Long,Double>>();
135+
try {
136+
BufferedReader reader = new BufferedReader(
137+
new InputStreamReader(getClass().getResourceAsStream(conf.get("ldbc.snb.datagen.generator.BTERKnowsGenerator.ccDistribution")), "UTF-8"));
138+
String line;
139+
while ((line = reader.readLine()) != null) {
140+
String data[] = line.split(" ");
141+
ccDistribution.add(new Pair<Long, Double>(Long.parseLong(data[0]), Double.parseDouble(data[1])));
142+
}
143+
reader.close();
144+
} catch( IOException e) {
145+
e.printStackTrace();
146+
}
147+
148+
p[0] = 0.0;
149+
p[1] = 0.0;
150+
for(int i = 2; i < maxExpectedDegree+1; ++i) {
151+
int degree = i;
152+
int pos = BinarySearch(ccDistribution,(long)degree);
153+
if(ccDistribution.get(pos).getKey() == degree || pos == (ccDistribution.size() - 1)) {
154+
p[degree] = ccDistribution.get(pos).getValue();
155+
} else if( pos < ccDistribution.size() - 1 ){
156+
long minDegree = ccDistribution.get(pos).getKey();
157+
long maxDegree = ccDistribution.get(pos+1).getKey();
158+
double ratio = (degree - minDegree) / (maxDegree - minDegree);
159+
double minCC = ccDistribution.get(pos).getValue();
160+
double maxCC = ccDistribution.get(pos+1).getValue();
161+
double cc_current = ratio * (maxCC - minCC ) + minCC;
162+
p[degree] = Math.pow(cc_current,1/3.0);
163+
}
164+
}
165+
166+
RoaringBitmap block = new RoaringBitmap();
167+
for(int i = 0; i < graphSize; ++i) {
168+
if(expectedDegree[i] > 1 ) {
169+
block.add(i);
170+
} else {
171+
smallDegreeNodes.add(i);
172+
}
173+
}
174+
generateCommunities(block);
175+
176+
TreeMap<Long,RoaringBitmap> sortedMap = new TreeMap<Long,RoaringBitmap>(openCommunities);
177+
RoaringBitmap currentCommunity = null;
178+
long currentCommunitySize = 0;
179+
for(HashMap.Entry<Long,RoaringBitmap> community : sortedMap.entrySet()) {
180+
RoaringBitmap nextCommunity = community.getValue();
181+
if(currentCommunity == null) {
182+
currentCommunity = nextCommunity;
183+
currentCommunitySize = community.getKey();
184+
} else {
185+
while(currentCommunity.getCardinality()<=currentCommunitySize && nextCommunity.getCardinality() > 0) {
186+
int nextNode = nextCommunity.select(0);
187+
currentCommunity.add(nextNode);
188+
nextCommunity.remove(nextNode);
189+
}
190+
if(currentCommunity.getCardinality()>=currentCommunitySize) {
191+
closedCommunities.add(currentCommunity);
192+
currentCommunity=null;
193+
currentCommunitySize = 0;
194+
if(nextCommunity.getCardinality() > 0) {
195+
currentCommunity=nextCommunity;
196+
currentCommunitySize = community.getKey();
197+
}
198+
}
199+
}
200+
}
201+
openCommunities.clear();
202+
203+
for(RoaringBitmap community : closedCommunities) {
204+
generateEdgesInCommunity(community);
205+
}
206+
207+
generateRemainingEdges();
208+
209+
for (int i = 0; i < graphSize; ++i) {
210+
Iterator<Integer> it = adjacencyMatrix[i].iterator();
211+
while (it.hasNext()) {
212+
int next = it.next();
213+
Knows.createKnow(random, persons.get(i), persons.get(next));
214+
}
215+
}
216+
count++;
217+
218+
}
219+
220+
@Override
221+
public void initialize(Configuration conf) {
222+
this.conf = conf;
223+
}
224+
}

src/main/java/ldbc/snb/datagen/generator/LDBCDatagen.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ public int runGenerateJob(Configuration conf) throws Exception {
8989
percentages.add(0.45f);
9090
percentages.add(0.1f);
9191

92+
//percentages.add(1.0f);
93+
//percentages.add(0.1f);
94+
9295

9396
long start = System.currentTimeMillis();
9497
printProgress("Starting: Person generation");

src/main/java/ldbc/snb/datagen/generator/distribution/FacebookDegreeDistribution.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ public void loadFBBuckets() {
8686
String line;
8787
while ((line = fbDataReader.readLine()) != null) {
8888
String data[] = line.split(" ");
89-
buckets_.add(new Bucket((int)Float.parseFloat(data[0]), (int)Float.parseFloat(data[1])));
89+
buckets_.add(new Bucket(Float.parseFloat(data[0]), Float.parseFloat(data[1])));
9090
}
9191
fbDataReader.close();
9292
} catch (IOException e) {
@@ -97,7 +97,7 @@ public void loadFBBuckets() {
9797
}
9898

9999
public void rebuildBucketRange() {
100-
int newMin, newMax;
100+
double newMin, newMax;
101101
for (int i = 0; i < buckets_.size(); i++) {
102102
newMin = buckets_.get(i).min() * mean_ / FB_MEAN_;
103103
newMax = buckets_.get(i).max() * mean_ / FB_MEAN_;

src/main/java/ldbc/snb/datagen/generator/distribution/utils/Bucket.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,27 +59,27 @@ public static ArrayList<Bucket> bucketizeHistogram(ArrayList<Pair<Integer,Intege
5959
return buckets;
6060
}
6161

62-
int min_;
63-
int max_;
62+
double min_;
63+
double max_;
6464

65-
public Bucket(int min, int max) {
65+
public Bucket(double min, double max) {
6666
this.min_ = min;
6767
this.max_ = max;
6868
}
6969

70-
public int min() {
70+
public double min() {
7171
return min_;
7272
}
7373

74-
public void min(int min) {
74+
public void min(double min) {
7575
min_ = min;
7676
}
7777

78-
public int max() {
78+
public double max() {
7979
return max_;
8080
}
8181

82-
public void max(int max) {
82+
public void max(double max) {
8383
max_ = max;
8484
}
8585
}

src/main/java/ldbc/snb/datagen/generator/distribution/utils/BucketedDistribution.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,12 @@ public void reset(long seed) {
3636

3737
public long nextDegree() {
3838
int idx = randomPercentile_.nextInt(buckets_.size());
39-
int minRange = (buckets_.get(idx).min());
40-
int maxRange = (buckets_.get(idx).max());
39+
double minRange = (buckets_.get(idx).min());
40+
double maxRange = (buckets_.get(idx).max());
4141
if( maxRange < minRange ) {
4242
maxRange = minRange;
4343
}
44-
long ret= randomDegree_.get(idx).nextInt( maxRange - minRange + 1) + minRange;
44+
long ret= randomDegree_.get(idx).nextInt( (int)maxRange - (int)minRange + 1) + (int)minRange;
4545
return ret;
4646
}
4747
}

src/main/java/ldbc/snb/datagen/serializer/snb/interactive/TurtlePersonActivitySerializer.java

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
import ldbc.snb.datagen.vocabulary.*;
1414
import org.apache.hadoop.conf.Configuration;
1515

16+
import java.text.SimpleDateFormat;
17+
18+
1619
/**
1720
*
1821
* @author aprat
@@ -22,6 +25,7 @@ public class TurtlePersonActivitySerializer extends PersonActivitySerializer {
2225
private String empty="";
2326
private long membershipId = 0;
2427
private long likeId = 0;
28+
private SimpleDateFormat dateTimeFormat = null;
2529

2630
private enum FileNames {
2731
SOCIAL_NETWORK ("social_network_activity");
@@ -42,6 +46,7 @@ public TurtlePersonActivitySerializer() {
4246
@Override
4347
public void initialize(Configuration conf, int reducerId) {
4448

49+
dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX");
4550
int numFiles = FileNames.values().length;
4651
writers = new HDFSWriter[numFiles];
4752
for( int i = 0; i < numFiles; ++i) {
@@ -72,7 +77,7 @@ protected void serialize(final Forum forum ) {
7277
Turtle.AddTriple(result, false, false, forumPrefix, SNVOC.title,
7378
Turtle.createLiteral(forum.title()));
7479
Turtle.AddTriple(result, false, true, forumPrefix, SNVOC.creationDate,
75-
Turtle.createDataTypeLiteral(Dictionaries.dates.formatDateTime(forum.creationDate()), XSD.DateTime));
80+
Turtle.createDataTypeLiteral(dateTimeFormat.format(forum.creationDate()), XSD.DateTime));
7681

7782
Turtle.createTripleSPO(result, forumPrefix,
7883
SNVOC.hasModerator, SN.getPersonURI(forum.moderator().accountId()));
@@ -96,7 +101,7 @@ protected void serialize( final Post post ) {
96101
Turtle.createDataTypeLiteral(Long.toString(post.messageId()), XSD.Long));
97102

98103
Turtle.AddTriple(result, false, false, prefix, SNVOC.creationDate,
99-
Turtle.createDataTypeLiteral(Dictionaries.dates.formatDateTime(post.creationDate()), XSD.DateTime));
104+
Turtle.createDataTypeLiteral(dateTimeFormat.format(post.creationDate()), XSD.DateTime));
100105

101106
Turtle.AddTriple(result, false, false, prefix, SNVOC.ipaddress,
102107
Turtle.createLiteral(post.ipAddress().toString()));
@@ -135,7 +140,7 @@ protected void serialize(final Comment comment ) {
135140
Turtle.createDataTypeLiteral(Long.toString(comment.messageId()), XSD.Long));
136141

137142
Turtle.AddTriple(result, false, false, prefix, SNVOC.creationDate,
138-
Turtle.createDataTypeLiteral(Dictionaries.dates.formatDateTime(comment.creationDate()), XSD.DateTime));
143+
Turtle.createDataTypeLiteral(dateTimeFormat.format(comment.creationDate()), XSD.DateTime));
139144
Turtle.AddTriple(result, false, false, prefix, SNVOC.ipaddress,
140145
Turtle.createLiteral(comment.ipAddress().toString()));
141146
Turtle.AddTriple(result, false, false, prefix, SNVOC.browser,
@@ -176,7 +181,7 @@ protected void serialize(final Photo photo ) {
176181
Turtle.AddTriple(result, false, false, prefix, SNVOC.browser,
177182
Turtle.createLiteral(Dictionaries.browsers.getName(photo.browserId())));
178183
Turtle.AddTriple(result, false, true, prefix, SNVOC.creationDate,
179-
Turtle.createDataTypeLiteral(Dictionaries.dates.formatDateTime(photo.creationDate()), XSD.DateTime));
184+
Turtle.createDataTypeLiteral(dateTimeFormat.format(photo.creationDate()), XSD.DateTime));
180185

181186
Turtle.createTripleSPO(result, prefix, SNVOC.hasCreator, SN.getPersonURI(photo.author().accountId()));
182187
Turtle.createTripleSPO(result, SN.getForumURI(photo.forumId()), SNVOC.containerOf, prefix);
@@ -198,7 +203,7 @@ protected void serialize( final ForumMembership membership ) {
198203

199204
Turtle.AddTriple(result, true, false, memberhipPrefix, SNVOC.hasPerson, SN.getPersonURI(membership.person().accountId()));
200205
Turtle.AddTriple(result, false, true, memberhipPrefix, SNVOC.joinDate,
201-
Turtle.createDataTypeLiteral(Dictionaries.dates.formatDateTime(membership.creationDate()), XSD.DateTime));
206+
Turtle.createDataTypeLiteral(dateTimeFormat.format(membership.creationDate()), XSD.DateTime));
202207
membershipId++;
203208
writers[FileNames.SOCIAL_NETWORK.ordinal()].write(result.toString());
204209
}
@@ -218,7 +223,7 @@ protected void serialize( final Like like ) {
218223
Turtle.AddTriple(result, true, false, likePrefix, SNVOC.hasComment, prefix);
219224
}
220225
Turtle.AddTriple(result, false, true, likePrefix, SNVOC.creationDate,
221-
Turtle.createDataTypeLiteral(Dictionaries.dates.formatDateTime(like.date), XSD.DateTime));
226+
Turtle.createDataTypeLiteral(dateTimeFormat.format(like.date), XSD.DateTime));
222227
likeId++;
223228
writers[FileNames.SOCIAL_NETWORK.ordinal()].write(result.toString());
224229
}

0 commit comments

Comments
 (0)