Skip to content

Commit f70408d

Browse files
committed
Refactoring WIP
1 parent 6ecb17e commit f70408d

File tree

5 files changed

+80
-158
lines changed

5 files changed

+80
-158
lines changed

src/main/java/ldbc/snb/datagen/serializer/DynamicPersonSerializer.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
4848
/**
4949
* Created by aprat on 10/15/14.
5050
*/
51-
abstract public class DynamicPersonSerializer extends LDBCSerializer{
51+
abstract public class DynamicPersonSerializer extends LDBCSerializer {
5252

5353
public void export(final Person person) {
5454

@@ -79,12 +79,6 @@ public void export(final Person p, final Knows k) {
7979
serialize(p, k);
8080
}
8181

82-
abstract public void reset();
83-
84-
abstract public void initialize(Configuration conf, int reducerId) throws IOException;
85-
86-
abstract public void close();
87-
8882
abstract protected void serialize(final Person p);
8983

9084
abstract protected void serialize(final StudyAt studyAt);

src/main/java/ldbc/snb/datagen/serializer/graphalytics/CSVDynamicPersonSerializerExtended.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,12 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
4444
import ldbc.snb.datagen.entities.dynamic.relations.WorkAt;
4545
import ldbc.snb.datagen.hadoop.writer.HDFSCSVWriter;
4646
import ldbc.snb.datagen.serializer.DynamicPersonSerializer;
47+
import ldbc.snb.datagen.serializer.snb.csv.FileName;
4748
import org.apache.hadoop.conf.Configuration;
4849

4950
import java.io.IOException;
5051
import java.util.ArrayList;
52+
import java.util.List;
5153

5254
public class CSVDynamicPersonSerializerExtended extends DynamicPersonSerializer {
5355

@@ -69,6 +71,16 @@ public String toString() {
6971
}
7072

7173

74+
@Override
75+
public List<FileName> getFileNames() {
76+
return null;
77+
}
78+
79+
@Override
80+
public void writeFileHeaders() {
81+
82+
}
83+
7284
@Override
7385
public void initialize(Configuration conf, int reducerId) throws IOException {
7486
int numFiles = FileNames.values().length;

src/main/java/ldbc/snb/datagen/serializer/snb/csv/dynamicserializer/activity/CSVDynamicActivitySerializer.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
3737

3838
import com.google.common.collect.ImmutableList;
3939
import ldbc.snb.datagen.dictionary.Dictionaries;
40-
import ldbc.snb.datagen.hadoop.writer.HDFSCSVWriter;
4140
import ldbc.snb.datagen.entities.dynamic.messages.Comment;
4241
import ldbc.snb.datagen.entities.dynamic.messages.Photo;
4342
import ldbc.snb.datagen.entities.dynamic.messages.Post;
@@ -46,12 +45,8 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
4645
import ldbc.snb.datagen.entities.dynamic.Forum;
4746
import ldbc.snb.datagen.serializer.DynamicActivitySerializer;
4847
import ldbc.snb.datagen.serializer.snb.csv.FileName;
49-
import org.apache.hadoop.conf.Configuration;
5048
import static ldbc.snb.datagen.serializer.snb.csv.FileName.*;
51-
import ldbc.snb.datagen.serializer.snb.csv.FileName;
5249

53-
import java.io.IOException;
54-
import java.util.ArrayList;
5550
import java.util.Arrays;
5651
import java.util.List;
5752

src/main/java/ldbc/snb/datagen/serializer/snb/csv/dynamicserializer/person/CSVMergeForeignDynamicPersonSerializer.java

Lines changed: 55 additions & 146 deletions
Original file line numberDiff line numberDiff line change
@@ -35,194 +35,103 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
3535
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.*/
3636
package ldbc.snb.datagen.serializer.snb.csv.dynamicserializer.person;
3737

38+
import com.google.common.collect.ImmutableList;
3839
import ldbc.snb.datagen.dictionary.Dictionaries;
39-
import ldbc.snb.datagen.entities.dynamic.relations.Knows;
4040
import ldbc.snb.datagen.entities.dynamic.person.Person;
41+
import ldbc.snb.datagen.entities.dynamic.relations.Knows;
4142
import ldbc.snb.datagen.entities.dynamic.relations.StudyAt;
4243
import ldbc.snb.datagen.entities.dynamic.relations.WorkAt;
4344
import ldbc.snb.datagen.hadoop.writer.HDFSCSVWriter;
4445
import ldbc.snb.datagen.serializer.DynamicPersonSerializer;
46+
import ldbc.snb.datagen.serializer.snb.csv.FileName;
4547
import org.apache.hadoop.conf.Configuration;
4648

49+
import static ldbc.snb.datagen.serializer.snb.csv.FileName.*;
50+
4751
import java.io.IOException;
4852
import java.util.ArrayList;
4953
import java.util.Iterator;
54+
import java.util.List;
5055

5156
/**
5257
* Created by aprat on 17/02/15.
5358
*/
5459
public class CSVMergeForeignDynamicPersonSerializer extends DynamicPersonSerializer {
5560

56-
private HDFSCSVWriter[] writers;
57-
58-
private enum FileNames {
59-
PERSON("person"),
60-
PERSON_SPEAKS_LANGUAGE("person_speaks_language"),
61-
PERSON_HAS_EMAIL("person_email_emailaddress"),
62-
PERSON_HAS_INTEREST_TAG("person_hasInterest_tag"),
63-
PERSON_WORK_AT("person_workAt_organisation"),
64-
PERSON_STUDY_AT("person_studyAt_organisation"),
65-
PERSON_KNOWS_PERSON("person_knows_person");
66-
67-
private final String name;
68-
69-
private FileNames(String name) {
70-
this.name = name;
71-
}
72-
73-
public String toString() {
74-
return name;
75-
}
76-
}
77-
78-
public void initialize(Configuration conf, int reducerId) throws IOException {
79-
int numFiles = FileNames.values().length;
80-
writers = new HDFSCSVWriter[numFiles];
81-
for (int i = 0; i < numFiles; ++i) {
82-
writers[i] = new HDFSCSVWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir")+"/dynamic/", FileNames
83-
.values()[i].toString() + "_" + reducerId, conf.getInt("ldbc.snb.datagen.numPartitions", 1), conf
84-
.getBoolean("ldbc.snb.datagen.serializer.compressed", false), "|", conf
85-
.getBoolean("ldbc.snb.datagen.serializer.endlineSeparator", false));
86-
}
87-
88-
ArrayList<String> arguments = new ArrayList<String>();
89-
arguments.add("id");
90-
arguments.add("firstName");
91-
arguments.add("lastName");
92-
arguments.add("gender");
93-
arguments.add("birthday");
94-
arguments.add("creationDate");
95-
arguments.add("locationIP");
96-
arguments.add("browserUsed");
97-
arguments.add("place");
98-
writers[FileNames.PERSON.ordinal()].writeHeader(arguments);
99-
100-
arguments.clear();
101-
arguments.add("Person.id");
102-
arguments.add("language");
103-
writers[FileNames.PERSON_SPEAKS_LANGUAGE.ordinal()].writeHeader(arguments);
104-
105-
arguments.clear();
106-
arguments.add("Person.id");
107-
arguments.add("email");
108-
writers[FileNames.PERSON_HAS_EMAIL.ordinal()].writeHeader(arguments);
109-
110-
arguments.clear();
111-
arguments.add("Person.id");
112-
arguments.add("Tag.id");
113-
writers[FileNames.PERSON_HAS_INTEREST_TAG.ordinal()].writeHeader(arguments);
114-
115-
arguments.clear();
116-
arguments.add("Person.id");
117-
arguments.add("Organisation.id");
118-
arguments.add("workFrom");
119-
writers[FileNames.PERSON_WORK_AT.ordinal()].writeHeader(arguments);
120-
121-
arguments.clear();
122-
arguments.add("Person.id");
123-
arguments.add("Organisation.id");
124-
arguments.add("classYear");
125-
writers[FileNames.PERSON_STUDY_AT.ordinal()].writeHeader(arguments);
126-
127-
arguments.clear();
128-
arguments.add("Person.id");
129-
arguments.add("Person.id");
130-
arguments.add("creationDate");
131-
writers[FileNames.PERSON_KNOWS_PERSON.ordinal()].writeHeader(arguments);
132-
61+
@Override
62+
public List<FileName> getFileNames() {
63+
return ImmutableList.of(PERSON, PERSON_SPEAKS_LANGUAGE, PERSON_HAS_EMAIL, PERSON_HAS_INTEREST_TAG,
64+
PERSON_WORK_AT, PERSON_STUDY_AT, PERSON_KNOWS_PERSON);
13365
}
13466

13567
@Override
136-
public void close() {
137-
int numFiles = FileNames.values().length;
138-
for (int i = 0; i < numFiles; ++i) {
139-
writers[i].close();
140-
}
68+
public void writeFileHeaders() {
69+
writers.get(PERSON).writeHeader(ImmutableList.of("id", "firstName", "lastName", "gender", "birthday", "creationDate", "locationIP", "browserUsed", "place"));
70+
writers.get(PERSON_SPEAKS_LANGUAGE).writeHeader(ImmutableList.of("Person.id", "language"));
71+
writers.get(PERSON_HAS_EMAIL).writeHeader(ImmutableList.of("Person.id", "email"));
72+
writers.get(PERSON_HAS_INTEREST_TAG).writeHeader(ImmutableList.of("Person.id", "Tag.id"));
73+
writers.get(PERSON_WORK_AT).writeHeader(ImmutableList.of("Person.id", "Organisation.id", "workFrom"));
74+
writers.get(PERSON_STUDY_AT).writeHeader(ImmutableList.of("Person.id", "Organisation.id", "classYear"));
75+
writers.get(PERSON_KNOWS_PERSON).writeHeader(ImmutableList.of("Person.id", "Person.id", "creationDate"));
14176
}
14277

14378
@Override
14479
protected void serialize(final Person p) {
145-
146-
ArrayList<String> arguments = new ArrayList<String>();
147-
148-
arguments.add(Long.toString(p.accountId()));
149-
arguments.add(p.firstName());
150-
arguments.add(p.lastName());
151-
if (p.gender() == 1) {
152-
arguments.add("male");
153-
} else {
154-
arguments.add("female");
155-
}
156-
157-
String dateString = Dictionaries.dates.formatDate(p.birthday());
158-
arguments.add(dateString);
159-
160-
dateString = Dictionaries.dates.formatDateTime(p.creationDate());
161-
arguments.add(dateString);
162-
arguments.add(p.ipAddress().toString());
163-
arguments.add(Dictionaries.browsers.getName(p.browserId()));
164-
arguments.add(Integer.toString(p.cityId()));
165-
writers[FileNames.PERSON.ordinal()].writeEntry(arguments);
166-
167-
ArrayList<Integer> languages = p.languages();
80+
writers.get(PERSON).writeEntry(ImmutableList.of(
81+
Long.toString(p.accountId()),
82+
p.firstName(),
83+
p.lastName(),
84+
p.gender() == 1 ? "male" : "female",
85+
Dictionaries.dates.formatDate(p.birthday()),
86+
Dictionaries.dates.formatDateTime(p.creationDate()),
87+
p.ipAddress().toString(),
88+
Dictionaries.browsers.getName(p.browserId()),
89+
Integer.toString(p.cityId())
90+
));
91+
92+
List<Integer> languages = p.languages();
16893
for (int i = 0; i < languages.size(); i++) {
169-
arguments.clear();
170-
arguments.add(Long.toString(p.accountId()));
171-
arguments.add(Dictionaries.languages.getLanguageName(languages.get(i)));
172-
writers[FileNames.PERSON_SPEAKS_LANGUAGE.ordinal()].writeEntry(arguments);
94+
writers.get(PERSON_SPEAKS_LANGUAGE).writeEntry(ImmutableList.of(
95+
Long.toString(p.accountId()), Dictionaries.languages.getLanguageName(languages.get(i))
96+
));
17397
}
17498

175-
Iterator<String> itString = p.emails().iterator();
176-
while (itString.hasNext()) {
177-
arguments.clear();
178-
String email = itString.next();
179-
arguments.add(Long.toString(p.accountId()));
180-
arguments.add(email);
181-
writers[FileNames.PERSON_HAS_EMAIL.ordinal()].writeEntry(arguments);
99+
Iterator<String> emails = p.emails().iterator();
100+
while (emails.hasNext()) {
101+
writers.get(PERSON_HAS_EMAIL).writeEntry(ImmutableList.of(
102+
Long.toString(p.accountId()), emails.next()
103+
));
182104
}
183105

184-
Iterator<Integer> itInteger = p.interests().iterator();
185-
while (itInteger.hasNext()) {
186-
arguments.clear();
187-
Integer interestIdx = itInteger.next();
188-
arguments.add(Long.toString(p.accountId()));
189-
arguments.add(Integer.toString(interestIdx));
190-
writers[FileNames.PERSON_HAS_INTEREST_TAG.ordinal()].writeEntry(arguments);
106+
Iterator<Integer> interests = p.interests().iterator();
107+
while (interests.hasNext()) {
108+
writers.get(PERSON_HAS_INTEREST_TAG).writeEntry(ImmutableList.of(
109+
Long.toString(p.accountId()), Integer.toString(interests.next())
110+
));
191111
}
192112
}
193113

194114
@Override
195115
protected void serialize(final StudyAt studyAt) {
196-
ArrayList<String> arguments = new ArrayList<String>();
197-
String dateString = Dictionaries.dates.formatYear(studyAt.year);
198-
arguments.add(Long.toString(studyAt.user));
199-
arguments.add(Long.toString(studyAt.university));
200-
arguments.add(dateString);
201-
writers[FileNames.PERSON_STUDY_AT.ordinal()].writeEntry(arguments);
116+
writers.get(PERSON_STUDY_AT).writeEntry(ImmutableList.of(
117+
Long.toString(studyAt.user), Long.toString(studyAt.university), Dictionaries.dates.formatYear(studyAt.year)
118+
));
202119
}
203120

204121
@Override
205122
protected void serialize(final WorkAt workAt) {
206-
ArrayList<String> arguments = new ArrayList<String>();
207-
String dateString = Dictionaries.dates.formatYear(workAt.year);
208-
arguments.add(Long.toString(workAt.user));
209-
arguments.add(Long.toString(workAt.company));
210-
arguments.add(dateString);
211-
writers[FileNames.PERSON_WORK_AT.ordinal()].writeEntry(arguments);
123+
writers.get(PERSON_WORK_AT).writeEntry(ImmutableList.of(
124+
Long.toString(workAt.user), Long.toString(workAt.company), Dictionaries.dates.formatYear(workAt.year)
125+
));
212126
}
213127

214128
@Override
215129
protected void serialize(final Person p, Knows knows) {
216-
ArrayList<String> arguments = new ArrayList<String>();
217-
String dateString = Dictionaries.dates.formatDateTime(knows.creationDate());
218-
arguments.add(Long.toString(p.accountId()));
219-
arguments.add(Long.toString(knows.to().accountId()));
220-
arguments.add(dateString);
221-
writers[FileNames.PERSON_KNOWS_PERSON.ordinal()].writeEntry(arguments);
130+
writers.get(PERSON_KNOWS_PERSON).writeEntry(ImmutableList.of(
131+
Long.toString(p.accountId()),
132+
Long.toString(knows.to().accountId()),
133+
Dictionaries.dates.formatDateTime(knows.creationDate())
134+
));
222135
}
223136

224-
@Override
225-
public void reset() {
226-
// Intentionally left empty
227-
}
228137
}

src/main/java/ldbc/snb/datagen/serializer/snb/turtle/TurtleDynamicPersonSerializer.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,13 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
4444
import ldbc.snb.datagen.entities.dynamic.relations.WorkAt;
4545
import ldbc.snb.datagen.hadoop.writer.HDFSWriter;
4646
import ldbc.snb.datagen.serializer.DynamicPersonSerializer;
47+
import ldbc.snb.datagen.serializer.snb.csv.FileName;
4748
import ldbc.snb.datagen.vocabulary.*;
4849
import org.apache.hadoop.conf.Configuration;
4950

5051
import java.io.IOException;
5152
import java.text.SimpleDateFormat;
53+
import java.util.List;
5254
import java.util.TimeZone;
5355

5456

@@ -75,6 +77,16 @@ public String toString() {
7577
}
7678
}
7779

80+
@Override
81+
public List<FileName> getFileNames() {
82+
return null;
83+
}
84+
85+
@Override
86+
public void writeFileHeaders() {
87+
88+
}
89+
7890
public void initialize(Configuration conf, int reducerId) throws IOException {
7991
dateTimeFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX");
8092
dateTimeFormat .setTimeZone(TimeZone.getTimeZone("GMT"));

0 commit comments

Comments
 (0)