Skip to content

Commit a1dc1d6

Browse files
committed
add long date serializers
1 parent e726fe3 commit a1dc1d6

File tree

8 files changed

+1359
-8
lines changed

8 files changed

+1359
-8
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,5 @@ target/
1616
.m0friendList0.csv.crc
1717
substitution_parameters/
1818
target/
19-
scripts/
19+
scripts/
20+
*.iml

params.ini

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ ldbc.snb.datagen.generator.startYear:2010
77
ldbc.snb.datagen.serializer.compressed:false
88

99
### Serialization
10-
# -- Regular --
10+
### -- Regular --
1111
ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonSerializer
1212
ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVInvariantSerializer
1313
ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonActivitySerializer
@@ -16,12 +16,9 @@ ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer
1616
# ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVMergeForeignInvariantSerializer
1717
# ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVMergeForeignPersonActivitySerializer
1818

19-
ldbc.snb.datagen.serializer.updateStreams:false
20-
ldbc.snb.datagen.serializer.numUpdatePartitions:4
19+
ldbc.snb.datagen.serializer.updateStreams:true
20+
ldbc.snb.datagen.serializer.numUpdatePartitions:6
2121
ldbc.snb.datagen.serializer.outputDir:/Users/alexaverbuch/hadoopTempDir/output/
2222

2323
ldbc.snb.datagen.generator.numThreads:8
24-
ldbc.snb.datagen.generator.numPartitions:1
25-
26-
# https://github.com/ldbc-dev/ldbc_snb_datagen_0.2/wiki/Compilation_Execution
27-
# https://github.com/ldbc-dev/ldbc_snb_datagen_0.2/blob/master/src/main/resources/params.ini
24+
ldbc.snb.datagen.generator.numPartitions:1
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
package ldbc.snb.datagen.serializer.snb.interactive.num_date;
2+
3+
import ldbc.snb.datagen.dictionary.Dictionaries;
4+
import ldbc.snb.datagen.objects.Organization;
5+
import ldbc.snb.datagen.objects.Place;
6+
import ldbc.snb.datagen.objects.Tag;
7+
import ldbc.snb.datagen.objects.TagClass;
8+
import ldbc.snb.datagen.serializer.HDFSCSVWriter;
9+
import ldbc.snb.datagen.serializer.InvariantSerializer;
10+
import ldbc.snb.datagen.vocabulary.DBP;
11+
import ldbc.snb.datagen.vocabulary.DBPOWL;
12+
import org.apache.hadoop.conf.Configuration;
13+
14+
import java.util.ArrayList;
15+
16+
/**
17+
* Created by aprat on 12/17/14.
18+
*/
19+
public class CSVInvariantSerializer extends InvariantSerializer {
20+
21+
private HDFSCSVWriter[] writers;
22+
23+
private enum FileNames {
24+
TAG ("tag"),
25+
TAG_HAS_TYPE_TAGCLASS("tag_hasType_tagclass"),
26+
TAGCLASS ("tagclass"),
27+
TAGCLASS_IS_SUBCLASS_OF_TAGCLASS ("tagclass_isSubclassOf_tagclass"),
28+
PLACE ("place"),
29+
PLACE_IS_PART_OF_PLACE ("place_isPartOf_place"),
30+
ORGANIZATION ("organisation"),
31+
ORGANIZATION_IS_LOCATED_IN_PLACE ("organisation_isLocatedIn_place");
32+
33+
private final String name;
34+
35+
private FileNames( String name ) {
36+
this.name = name;
37+
}
38+
public String toString() {
39+
return name;
40+
}
41+
}
42+
43+
public void initialize(Configuration conf, int reducerId) {
44+
int numFiles = FileNames.values().length;
45+
writers = new HDFSCSVWriter[numFiles];
46+
for( int i = 0; i < numFiles; ++i) {
47+
writers[i] = new HDFSCSVWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"),
48+
FileNames.values()[i].toString() + "_" + reducerId,conf.getInt("ldbc.snb.datagen.numPartitions",1),conf.getBoolean("ldbc.snb.datagen.serializer.compressed",false),"|",false);
49+
}
50+
51+
ArrayList<String> arguments = new ArrayList<String>();
52+
arguments.add("id");
53+
arguments.add("name");
54+
arguments.add("url");
55+
writers[FileNames.TAG.ordinal()].writeEntry(arguments);
56+
57+
arguments.clear();
58+
arguments.add("Tag.id");
59+
arguments.add("TagClass.id");
60+
writers[FileNames.TAG_HAS_TYPE_TAGCLASS.ordinal()].writeEntry(arguments);
61+
62+
arguments.clear();
63+
arguments.add("id");
64+
arguments.add("name");
65+
arguments.add("url");
66+
writers[FileNames.TAGCLASS.ordinal()].writeEntry(arguments);
67+
68+
arguments.clear();
69+
arguments.add("TagClass.id");
70+
arguments.add("TagClass.id");
71+
writers[FileNames.TAGCLASS_IS_SUBCLASS_OF_TAGCLASS.ordinal()].writeEntry(arguments);
72+
73+
arguments.clear();
74+
arguments.add("id");
75+
arguments.add("name");
76+
arguments.add("url");
77+
arguments.add("type");
78+
writers[FileNames.PLACE.ordinal()].writeEntry(arguments);
79+
80+
arguments.clear();
81+
arguments.add("id");
82+
arguments.add("type");
83+
arguments.add("name");
84+
arguments.add("url");
85+
writers[FileNames.ORGANIZATION.ordinal()].writeEntry(arguments);
86+
87+
arguments.clear();
88+
arguments.add("Organisation.id");
89+
arguments.add("Place.id");
90+
writers[FileNames.ORGANIZATION_IS_LOCATED_IN_PLACE.ordinal()].writeEntry(arguments);
91+
92+
arguments.clear();
93+
arguments.add("Place.id");
94+
arguments.add("Place.id");
95+
writers[FileNames.PLACE_IS_PART_OF_PLACE.ordinal()].writeEntry(arguments);
96+
}
97+
98+
public void close() {
99+
int numFiles = FileNames.values().length;
100+
for(int i = 0; i < numFiles; ++i) {
101+
writers[i].close();
102+
}
103+
}
104+
105+
protected void serialize(Place place) {
106+
ArrayList<String> arguments = new ArrayList<String>();
107+
arguments.add(Integer.toString(place.getId()));
108+
arguments.add(place.getName());
109+
arguments.add(DBP.getUrl(place.getName()));
110+
arguments.add(place.getType());
111+
writers[FileNames.PLACE.ordinal()].writeEntry(arguments);
112+
113+
if (place.getType() == Place.CITY ||
114+
place.getType() == Place.COUNTRY) {
115+
arguments.clear();
116+
arguments.add(Integer.toString(place.getId()));
117+
arguments.add(Integer.toString(Dictionaries.places.belongsTo(place.getId())));
118+
writers[FileNames.PLACE_IS_PART_OF_PLACE.ordinal()].writeEntry(arguments);
119+
}
120+
}
121+
122+
protected void serialize(Organization organization) {
123+
ArrayList<String> arguments = new ArrayList<String>();
124+
arguments.add(Long.toString(organization.id));
125+
arguments.add(organization.type.toString());
126+
arguments.add(organization.name);
127+
arguments.add(DBP.getUrl(organization.name));
128+
writers[FileNames.ORGANIZATION.ordinal()].writeEntry(arguments);
129+
130+
arguments.clear();
131+
arguments.add(Long.toString(organization.id));
132+
arguments.add(Integer.toString(organization.location));
133+
writers[FileNames.ORGANIZATION_IS_LOCATED_IN_PLACE.ordinal()].writeEntry(arguments);
134+
}
135+
136+
protected void serialize(TagClass tagClass) {
137+
ArrayList<String> arguments = new ArrayList<String>();
138+
arguments.add(Integer.toString(tagClass.id));
139+
arguments.add(tagClass.name);
140+
if (tagClass.name.equals("Thing")) {
141+
arguments.add("http://www.w3.org/2002/07/owl#Thing");
142+
} else {
143+
arguments.add(DBPOWL.getUrl(tagClass.name));
144+
}
145+
writers[FileNames.TAGCLASS.ordinal()].writeEntry(arguments);
146+
147+
if (tagClass.parent != -1) {
148+
arguments.clear();
149+
arguments.add(Integer.toString(tagClass.id));
150+
arguments.add(Integer.toString(tagClass.parent));
151+
writers[FileNames.TAGCLASS_IS_SUBCLASS_OF_TAGCLASS.ordinal()].writeEntry(arguments);
152+
}
153+
}
154+
155+
protected void serialize(Tag tag) {
156+
ArrayList<String> arguments = new ArrayList<String>();
157+
arguments.add(Integer.toString(tag.id));
158+
arguments.add(tag.name);
159+
arguments.add(DBP.getUrl(tag.name));
160+
writers[FileNames.TAG.ordinal()].writeEntry(arguments);
161+
162+
arguments.clear();
163+
arguments.add(Integer.toString(tag.id));
164+
arguments.add(Integer.toString(tag.tagClass));
165+
writers[FileNames.TAG_HAS_TYPE_TAGCLASS.ordinal()].writeEntry(arguments);
166+
}
167+
public void reset() {
168+
169+
}
170+
}
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
package ldbc.snb.datagen.serializer.snb.interactive.num_date;
2+
3+
import ldbc.snb.datagen.dictionary.Dictionaries;
4+
import ldbc.snb.datagen.objects.Organization;
5+
import ldbc.snb.datagen.objects.Place;
6+
import ldbc.snb.datagen.objects.Tag;
7+
import ldbc.snb.datagen.objects.TagClass;
8+
import ldbc.snb.datagen.serializer.HDFSCSVWriter;
9+
import ldbc.snb.datagen.serializer.InvariantSerializer;
10+
import ldbc.snb.datagen.vocabulary.DBP;
11+
import ldbc.snb.datagen.vocabulary.DBPOWL;
12+
import org.apache.hadoop.conf.Configuration;
13+
14+
import java.util.ArrayList;
15+
16+
/**
17+
* Created by aprat on 17/02/15.
18+
*/
19+
public class CSVMergeForeignInvariantSerializer extends InvariantSerializer {
20+
21+
private HDFSCSVWriter[] writers;
22+
23+
private enum FileNames {
24+
TAG ("tag"),
25+
TAG_HAS_TYPE_TAGCLASS("tag_hasType_tagclass"),
26+
TAGCLASS ("tagclass"),
27+
TAGCLASS_IS_SUBCLASS_OF_TAGCLASS ("tagclass_isSubclassOf_tagclass"),
28+
PLACE ("place"),
29+
ORGANIZATION ("organisation");
30+
31+
private final String name;
32+
33+
private FileNames( String name ) {
34+
this.name = name;
35+
}
36+
public String toString() {
37+
return name;
38+
}
39+
}
40+
41+
public void initialize(Configuration conf, int reducerId) {
42+
int numFiles = FileNames.values().length;
43+
writers = new HDFSCSVWriter[numFiles];
44+
for( int i = 0; i < numFiles; ++i) {
45+
writers[i] = new HDFSCSVWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir"),
46+
FileNames.values()[i].toString() + "_" + reducerId,conf.getInt("ldbc.snb.datagen.numPartitions",1),conf.getBoolean("ldbc.snb.datagen.serializer.compressed",false),"|",false);
47+
}
48+
49+
ArrayList<String> arguments = new ArrayList<String>();
50+
arguments.add("id");
51+
arguments.add("name");
52+
arguments.add("url");
53+
writers[FileNames.TAG.ordinal()].writeEntry(arguments);
54+
55+
arguments.clear();
56+
arguments.add("Tag.id");
57+
arguments.add("TagClass.id");
58+
writers[FileNames.TAG_HAS_TYPE_TAGCLASS.ordinal()].writeEntry(arguments);
59+
60+
arguments.clear();
61+
arguments.add("id");
62+
arguments.add("name");
63+
arguments.add("url");
64+
writers[FileNames.TAGCLASS.ordinal()].writeEntry(arguments);
65+
66+
arguments.clear();
67+
arguments.add("TagClass.id");
68+
arguments.add("TagClass.id");
69+
writers[FileNames.TAGCLASS_IS_SUBCLASS_OF_TAGCLASS.ordinal()].writeEntry(arguments);
70+
71+
arguments.clear();
72+
arguments.add("id");
73+
arguments.add("name");
74+
arguments.add("url");
75+
arguments.add("type");
76+
arguments.add("isPartOf");
77+
writers[FileNames.PLACE.ordinal()].writeEntry(arguments);
78+
79+
80+
arguments.clear();
81+
arguments.add("id");
82+
arguments.add("type");
83+
arguments.add("name");
84+
arguments.add("url");
85+
arguments.add("place");
86+
writers[FileNames.ORGANIZATION.ordinal()].writeEntry(arguments);
87+
88+
}
89+
90+
public void close() {
91+
int numFiles = FileNames.values().length;
92+
for(int i = 0; i < numFiles; ++i) {
93+
writers[i].close();
94+
}
95+
}
96+
97+
protected void serialize(Place place) {
98+
ArrayList<String> arguments = new ArrayList<String>();
99+
arguments.add(Integer.toString(place.getId()));
100+
arguments.add(place.getName());
101+
arguments.add(DBP.getUrl(place.getName()));
102+
arguments.add(place.getType());
103+
104+
if (place.getType() == Place.CITY ||
105+
place.getType() == Place.COUNTRY) {
106+
arguments.add(Integer.toString(Dictionaries.places.belongsTo(place.getId())));
107+
} else {
108+
arguments.add("");
109+
}
110+
writers[FileNames.PLACE.ordinal()].writeEntry(arguments);
111+
112+
113+
}
114+
115+
protected void serialize(Organization organization) {
116+
ArrayList<String> arguments = new ArrayList<String>();
117+
arguments.add(Long.toString(organization.id));
118+
arguments.add(organization.type.toString());
119+
arguments.add(organization.name);
120+
arguments.add(DBP.getUrl(organization.name));
121+
arguments.add(Integer.toString(organization.location));
122+
writers[FileNames.ORGANIZATION.ordinal()].writeEntry(arguments);
123+
}
124+
125+
protected void serialize(TagClass tagClass) {
126+
ArrayList<String> arguments = new ArrayList<String>();
127+
arguments.add(Integer.toString(tagClass.id));
128+
arguments.add(tagClass.name);
129+
if (tagClass.name.equals("Thing")) {
130+
arguments.add("http://www.w3.org/2002/07/owl#Thing");
131+
} else {
132+
arguments.add(DBPOWL.getUrl(tagClass.name));
133+
}
134+
writers[FileNames.TAGCLASS.ordinal()].writeEntry(arguments);
135+
136+
if (tagClass.parent != -1) {
137+
arguments.clear();
138+
arguments.add(Integer.toString(tagClass.id));
139+
arguments.add(Integer.toString(tagClass.parent));
140+
writers[FileNames.TAGCLASS_IS_SUBCLASS_OF_TAGCLASS.ordinal()].writeEntry(arguments);
141+
}
142+
}
143+
144+
protected void serialize(Tag tag) {
145+
ArrayList<String> arguments = new ArrayList<String>();
146+
arguments.add(Integer.toString(tag.id));
147+
arguments.add(tag.name);
148+
arguments.add(DBP.getUrl(tag.name));
149+
writers[FileNames.TAG.ordinal()].writeEntry(arguments);
150+
151+
arguments.clear();
152+
arguments.add(Integer.toString(tag.id));
153+
arguments.add(Integer.toString(tag.tagClass));
154+
writers[FileNames.TAG_HAS_TYPE_TAGCLASS.ordinal()].writeEntry(arguments);
155+
}public void reset() {
156+
157+
}
158+
159+
}

0 commit comments

Comments
 (0)