Skip to content

Commit d854e5f

Browse files
committed
Further refactoring, fix Empty and Turtle serializers
1 parent 4f08af3 commit d854e5f

28 files changed

+266
-263
lines changed

check-md5sums-ttl.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
[[ `md5sum social_network/social_network_activity_0_0.ttl | cut -d' ' -f1` == '52a5356ecc757a8e3e5bce2c8ea79557' ]]
6+
[[ `md5sum social_network/social_network_person_0_0.ttl | cut -d' ' -f1` == '30973a3dc339617773651b425f7441e4' ]]
7+
[[ `md5sum social_network/social_network_static_0_0.ttl | cut -d' ' -f1` == '3c4f4120a2ea1e101cf7d72fbfe30c48' ]]
8+
9+
[[ `md5sum social_network/updateStream_0_0_forum.csv | cut -d' ' -f1` == '7e00243f68a8171974eabe4ac37df86b' ]]
10+
[[ `md5sum social_network/updateStream_0_0_person.csv | cut -d' ' -f1` == '2e1f44e6d48112a9fd87092206153b57' ]]

src/main/java/ldbc/snb/datagen/hadoop/writer/HDFSCSVWriter.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ public class HDFSCSVWriter extends HDFSWriter {
4545
private StringBuffer buffer;
4646
private boolean endLineSeparator = true;
4747

48-
4948
public HDFSCSVWriter(String outputDir, String prefix, int numPartitions, boolean compressed, String separator, boolean endLineSeparator) throws IOException {
5049
super(outputDir, prefix, numPartitions, compressed, "csv");
5150
this.separator = separator;

src/main/java/ldbc/snb/datagen/serializer/DynamicActivitySerializer.java

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,24 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
4141
import ldbc.snb.datagen.entities.dynamic.messages.Post;
4242
import ldbc.snb.datagen.entities.dynamic.relations.ForumMembership;
4343
import ldbc.snb.datagen.entities.dynamic.relations.Like;
44+
import ldbc.snb.datagen.hadoop.writer.HDFSWriter;
4445

4546
/**
4647
* @author aprat
4748
*/
48-
abstract public class DynamicActivitySerializer extends LDBCSerializer {
49+
abstract public class DynamicActivitySerializer<TWriter extends HDFSWriter> extends LDBCSerializer<TWriter> {
50+
51+
abstract protected void serialize(final Forum forum);
52+
53+
abstract protected void serialize(final Post post);
54+
55+
abstract protected void serialize(final Comment comment);
56+
57+
abstract protected void serialize(final Photo photo);
58+
59+
abstract protected void serialize(final ForumMembership membership);
60+
61+
abstract protected void serialize(final Like like);
4962

5063
public void export(final Forum forum) {
5164
serialize(forum);
@@ -59,32 +72,11 @@ public void export(final Post post) {
5972
serialize(post);
6073
}
6174

62-
public void export(Comment comment) {
63-
serialize(comment);
64-
65-
}
66-
67-
public void export(Photo photo) {
68-
serialize(photo);
75+
public void export(Comment comment) { serialize(comment); }
6976

70-
}
71-
72-
public void export(Like like) {
73-
serialize(like);
74-
75-
}
76-
77-
abstract protected void serialize(final Forum forum);
78-
79-
abstract protected void serialize(final Post post);
77+
public void export(Photo photo) { serialize(photo); }
8078

81-
abstract protected void serialize(final Comment comment);
82-
83-
abstract protected void serialize(final Photo photo);
84-
85-
abstract protected void serialize(final ForumMembership membership);
86-
87-
abstract protected void serialize(final Like like);
79+
public void export(Like like) { serialize(like); }
8880

8981
@Override
9082
protected boolean isDynamic() {

src/main/java/ldbc/snb/datagen/serializer/DynamicPersonSerializer.java

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
4141
import ldbc.snb.datagen.entities.dynamic.relations.Knows;
4242
import ldbc.snb.datagen.entities.dynamic.relations.StudyAt;
4343
import ldbc.snb.datagen.entities.dynamic.relations.WorkAt;
44+
import ldbc.snb.datagen.hadoop.writer.HDFSWriter;
4445

4546
import java.util.ArrayList;
4647
import java.util.Iterator;
@@ -50,10 +51,17 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
5051
/**
5152
* Created by aprat on 10/15/14.
5253
*/
53-
abstract public class DynamicPersonSerializer extends LDBCSerializer {
54+
abstract public class DynamicPersonSerializer<TWriter extends HDFSWriter> extends LDBCSerializer<TWriter> {
5455

55-
public void export(final Person person) {
56+
abstract protected void serialize(final Person p);
5657

58+
abstract protected void serialize(final StudyAt studyAt);
59+
60+
abstract protected void serialize(final WorkAt workAt);
61+
62+
abstract protected void serialize(final Person p, final Knows knows);
63+
64+
public void export(final Person person) {
5765
serialize(person);
5866

5967
long universityId = Dictionaries.universities.getUniversityFromLocation(person.universityLocationId());
@@ -99,14 +107,6 @@ public String buildEmail(TreeSet<String> emails) {
99107
return Joiner.on(";").join(emails);
100108
}
101109

102-
abstract protected void serialize(final Person p);
103-
104-
abstract protected void serialize(final StudyAt studyAt);
105-
106-
abstract protected void serialize(final WorkAt workAt);
107-
108-
abstract protected void serialize(final Person p, final Knows knows);
109-
110110
@Override
111111
protected boolean isDynamic() {
112112
return true;

src/main/java/ldbc/snb/datagen/serializer/LDBCSerializer.java

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,23 @@
11
package ldbc.snb.datagen.serializer;
22

3-
import ldbc.snb.datagen.hadoop.writer.HDFSCSVWriter;
3+
import ldbc.snb.datagen.hadoop.writer.HDFSWriter;
44
import ldbc.snb.datagen.serializer.snb.csv.FileName;
55
import org.apache.hadoop.conf.Configuration;
66

77
import java.io.IOException;
8-
import java.util.HashMap;
98
import java.util.List;
109
import java.util.Map;
1110

12-
abstract public class LDBCSerializer {
11+
abstract public class LDBCSerializer<TWriter extends HDFSWriter> implements Serializer<TWriter> {
1312

14-
protected Map<FileName, HDFSCSVWriter> writers = new HashMap<>();
13+
protected Map<FileName, TWriter> writers;
1514

1615
abstract public List<FileName> getFileNames();
1716

1817
abstract public void writeFileHeaders();
1918

2019
public void initialize(Configuration conf, int reducerId) throws IOException {
21-
for (FileName f : getFileNames()) {
22-
writers.put(f, new HDFSCSVWriter(
23-
conf.get("ldbc.snb.datagen.serializer.socialNetworkDir") + (isDynamic() ? "/dynamic/" : "/static/"),
24-
f.toString() + "_" + reducerId,
25-
conf.getInt("ldbc.snb.datagen.numPartitions", 1),
26-
conf.getBoolean("ldbc.snb.datagen.serializer.compressed", false), "|",
27-
conf.getBoolean("ldbc.snb.datagen.serializer.endlineSeparator", false))
28-
);
29-
}
20+
writers = initialize(conf, reducerId, isDynamic(), getFileNames());
3021
writeFileHeaders();
3122
}
3223

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package ldbc.snb.datagen.serializer;
2+
3+
import ldbc.snb.datagen.hadoop.writer.HDFSWriter;
4+
import ldbc.snb.datagen.serializer.snb.csv.FileName;
5+
import org.apache.hadoop.conf.Configuration;
6+
7+
import java.io.IOException;
8+
import java.util.List;
9+
import java.util.Map;
10+
11+
public interface Serializer<THDFSWriter extends HDFSWriter> {
12+
13+
Map<FileName, THDFSWriter> initialize(Configuration conf, int reducerId, boolean dynamic, List<FileName> fileNames) throws IOException;
14+
15+
}

src/main/java/ldbc/snb/datagen/serializer/StaticSerializer.java

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,20 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
3939
import ldbc.snb.datagen.entities.statictype.TagClass;
4040
import ldbc.snb.datagen.entities.statictype.place.Place;
4141
import ldbc.snb.datagen.entities.statictype.tag.Tag;
42-
42+
import ldbc.snb.datagen.hadoop.writer.HDFSWriter;
4343

4444
/**
4545
* Created by aprat on 12/17/14.
4646
*/
47-
abstract public class StaticSerializer extends LDBCSerializer {
47+
abstract public class StaticSerializer<TWriter extends HDFSWriter> extends LDBCSerializer<TWriter> {
48+
49+
abstract protected void serialize(final Place place);
50+
51+
abstract protected void serialize(final Organisation organisation);
52+
53+
abstract protected void serialize(final TagClass tagClass);
54+
55+
abstract protected void serialize(final Tag tag);
4856

4957
public void export(final TagClass tagclass) {
5058
serialize(tagclass);
@@ -62,14 +70,6 @@ public void export(final Tag tag) {
6270
serialize(tag);
6371
}
6472

65-
abstract protected void serialize(final Place place);
66-
67-
abstract protected void serialize(final Organisation organisation);
68-
69-
abstract protected void serialize(final TagClass tagClass);
70-
71-
abstract protected void serialize(final Tag tag);
72-
7373
@Override
7474
protected boolean isDynamic() {
7575
return false;

src/main/java/ldbc/snb/datagen/serializer/empty/EmptyDynamicActivitySerializer.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
4646
import org.apache.hadoop.conf.Configuration;
4747

4848
import java.io.IOException;
49+
import java.util.Collections;
4950
import java.util.List;
51+
import java.util.Map;
5052

5153
/**
5254
* Created by aprat on 30/01/15.
@@ -103,4 +105,9 @@ protected void serialize(final Like like) {
103105
//This is left intentionally blank
104106
}
105107

108+
@Override
109+
public Map initialize(Configuration conf, int reducerId, boolean dynamic, List list) throws IOException {
110+
return Collections.emptyMap();
111+
}
112+
106113
}

src/main/java/ldbc/snb/datagen/serializer/empty/EmptyDynamicPersonSerializer.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
4444
import org.apache.hadoop.conf.Configuration;
4545

4646
import java.io.IOException;
47+
import java.util.Collections;
4748
import java.util.List;
49+
import java.util.Map;
4850

4951
/**
5052
* Created by aprat on 30/01/15.
@@ -91,4 +93,9 @@ protected void serialize(final Person p, final Knows knows) {
9193
//Intentionally left empty
9294
}
9395

96+
@Override
97+
public Map initialize(Configuration conf, int reducerId, boolean dynamic, List list) throws IOException {
98+
return Collections.emptyMap();
99+
}
100+
94101
}

src/main/java/ldbc/snb/datagen/serializer/empty/EmptyStaticSerializer.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,20 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
3535
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.*/
3636
package ldbc.snb.datagen.serializer.empty;
3737

38+
import com.google.common.collect.ImmutableMap;
39+
import com.google.common.collect.Maps;
3840
import ldbc.snb.datagen.entities.statictype.Organisation;
3941
import ldbc.snb.datagen.entities.statictype.TagClass;
4042
import ldbc.snb.datagen.entities.statictype.place.Place;
4143
import ldbc.snb.datagen.entities.statictype.tag.Tag;
4244
import ldbc.snb.datagen.serializer.StaticSerializer;
4345
import ldbc.snb.datagen.serializer.snb.csv.FileName;
46+
import org.apache.hadoop.conf.Configuration;
4447

48+
import java.io.IOException;
49+
import java.util.Collections;
4550
import java.util.List;
51+
import java.util.Map;
4652

4753
/**
4854
* Created by aprat on 30/01/15.
@@ -83,4 +89,9 @@ protected void serialize(final Tag tag) {
8389

8490
}
8591

92+
@Override
93+
public Map initialize(Configuration conf, int reducerId, boolean dynamic, List list) throws IOException {
94+
return Collections.emptyMap();
95+
}
96+
8697
}

0 commit comments

Comments
 (0)