Skip to content

Commit 33da13f

Browse files
committed
Refactored CSVCompositeMergeForeignDynamicPersonSerializer and fixed typo in LDBCDatagen
1 parent fe7e96a commit 33da13f

File tree

8 files changed

+39
-153
lines changed

8 files changed

+39
-153
lines changed

params-csv.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ ldbc.snb.datagen.serializer.staticSerializer:ldbc.snb.datagen.serializer.snb.csv
55
ldbc.snb.datagen.serializer.dynamicActivitySerializer:ldbc.snb.datagen.serializer.snb.csv.dynamicserializer.activity.CSVDynamicActivitySerializer
66

77
# To generate RFC 3339-compliant timestamps (https://tools.ietf.org/html/rfc3339), uncomment the following line:
8-
#ldbc.snb.datagen.serializer.formatter.StringDateFormatter.dateTimeFormat:yyyy-MM-dd'T'HH:mm:ss.SSS+00:00
8+
#ldbc.snb.datagen.util.formatter.StringDateFormatter.dateTimeFormat:yyyy-MM-dd'T'HH:mm:ss.SSS+00:00

src/main/java/ldbc/snb/datagen/LDBCDatagen.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
4040
import ldbc.snb.datagen.hadoop.generator.HadoopPersonActivityGenerator;
4141
import ldbc.snb.datagen.hadoop.generator.HadoopPersonGenerator;
4242
import ldbc.snb.datagen.hadoop.serializer.HadoopPersonSerializer;
43+
import ldbc.snb.datagen.hadoop.serializer.HadoopPersonSortAndSerializer;
4344
import ldbc.snb.datagen.hadoop.serializer.HadoopStaticSerializer;
4445
import ldbc.snb.datagen.hadoop.serializer.HadoopUpdateStreamSorterAndSerializer;
4546
import ldbc.snb.datagen.hadoop.miscjob.HadoopMergeFriendshipFiles;
@@ -157,7 +158,7 @@ public int runGenerateJob(Configuration conf) throws Exception {
157158
HadoopPersonSortAndSerializer serializer = new HadoopPersonSortAndSerializer(conf);
158159
serializer.run(hadoopPrefix + "/mergedPersons");
159160
} else {
160-
HadoopDynamicPersonSerializer serializer = new HadoopDynamicPersonSerializer(conf);
161+
HadoopPersonSerializer serializer = new HadoopPersonSerializer(conf);
161162
serializer.run(hadoopPrefix + "/mergedPersons");
162163
}
163164
long endPersonSerializing = System.currentTimeMillis();

src/main/java/ldbc/snb/datagen/serializer/snb/csv/dynamicserializer/person/CSVCompositeMergeForeignDynamicPersonSerializer.java

Lines changed: 27 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
3535
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.*/
3636
package ldbc.snb.datagen.serializer.snb.csv.dynamicserializer.person;
3737

38+
import com.google.common.collect.ImmutableList;
3839
import ldbc.snb.datagen.dictionary.Dictionaries;
3940
import ldbc.snb.datagen.entities.dynamic.relations.Knows;
4041
import ldbc.snb.datagen.entities.dynamic.person.Person;
@@ -47,183 +48,67 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
4748

4849
import java.io.IOException;
4950
import java.util.ArrayList;
51+
import java.util.Arrays;
5052
import java.util.Iterator;
5153
import java.util.List;
54+
import ldbc.snb.datagen.serializer.snb.csv.FileName;
5255

56+
import static ldbc.snb.datagen.serializer.snb.csv.FileName.*;
5357
/**
5458
* Created by aprat on 17/02/15.
5559
*/
5660
public class CSVCompositeMergeForeignDynamicPersonSerializer extends DynamicPersonSerializer {
57-
58-
private HDFSCSVWriter[] writers;
59-
60-
private enum FileNames {
61-
PERSON("person"),
62-
PERSON_HAS_INTEREST_TAG("person_hasInterest_tag"),
63-
PERSON_WORK_AT("person_workAt_organisation"),
64-
PERSON_STUDY_AT("person_studyAt_organisation"),
65-
PERSON_KNOWS_PERSON("person_knows_person");
66-
67-
private final String name;
68-
69-
private FileNames(String name) {
70-
this.name = name;
71-
}
72-
73-
public String toString() {
74-
return name;
75-
}
76-
}
77-
7861
@Override
7962
public List<FileName> getFileNames() {
80-
return null;
63+
return Arrays.asList(PERSON,PERSON_HAS_INTEREST_TAG,PERSON_WORK_AT,PERSON_STUDY_AT,PERSON_KNOWS_PERSON);
8164
}
8265

8366
@Override
8467
public void writeFileHeaders() {
85-
86-
}
87-
88-
public void initialize(Configuration conf, int reducerId) throws IOException {
89-
int numFiles = FileNames.values().length;
90-
writers = new HDFSCSVWriter[numFiles];
91-
for (int i = 0; i < numFiles; ++i) {
92-
writers[i] = new HDFSCSVWriter(conf.get("ldbc.snb.datagen.serializer.socialNetworkDir")+"/dynamic/", FileNames
93-
.values()[i].toString() + "_" + reducerId, conf.getInt("ldbc.snb.datagen.numPartitions", 1), conf
94-
.getBoolean("ldbc.snb.datagen.serializer.compressed", false), "|", conf
95-
.getBoolean("ldbc.snb.datagen.serializer.endlineSeparator", false));
96-
}
97-
98-
ArrayList<String> arguments = new ArrayList<String>();
99-
arguments.add("id");
100-
arguments.add("firstName");
101-
arguments.add("lastName");
102-
arguments.add("gender");
103-
arguments.add("birthday");
104-
arguments.add("creationDate");
105-
arguments.add("locationIP");
106-
arguments.add("browserUsed");
107-
arguments.add("place");
108-
arguments.add("language");
109-
arguments.add("email");
110-
writers[FileNames.PERSON.ordinal()].writeHeader(arguments);
111-
112-
arguments.clear();
113-
arguments.add("Person.id");
114-
arguments.add("Tag.id");
115-
writers[FileNames.PERSON_HAS_INTEREST_TAG.ordinal()].writeHeader(arguments);
116-
117-
arguments.clear();
118-
arguments.add("Person.id");
119-
arguments.add("Organisation.id");
120-
arguments.add("workFrom");
121-
writers[FileNames.PERSON_WORK_AT.ordinal()].writeHeader(arguments);
122-
123-
arguments.clear();
124-
arguments.add("Person.id");
125-
arguments.add("Organisation.id");
126-
arguments.add("classYear");
127-
writers[FileNames.PERSON_STUDY_AT.ordinal()].writeHeader(arguments);
128-
129-
arguments.clear();
130-
arguments.add("Person.id");
131-
arguments.add("Person.id");
132-
arguments.add("creationDate");
133-
writers[FileNames.PERSON_KNOWS_PERSON.ordinal()].writeHeader(arguments);
134-
135-
}
136-
137-
@Override
138-
public void close() {
139-
int numFiles = FileNames.values().length;
140-
for (int i = 0; i < numFiles; ++i) {
141-
writers[i].close();
142-
}
68+
writers.get(PERSON).writeHeader(ImmutableList.of("id","firstName","lastName","gender","birthday","creationDate","locationIP","browserUsed","place","language","email"));
69+
writers.get(PERSON_HAS_INTEREST_TAG).writeHeader(ImmutableList.of("Person.id","Tag.id"));
70+
writers.get(PERSON_WORK_AT).writeHeader(ImmutableList.of("Person.id","Organisation.id","workFrom"));
71+
writers.get(PERSON_STUDY_AT).writeHeader(ImmutableList.of("Person.id","Organisation.id","classYear"));
72+
writers.get(PERSON_KNOWS_PERSON).writeHeader(ImmutableList.of("Person.id","Person.id","creationDate"));
14373
}
14474

14575
@Override
14676
protected void serialize(final Person p) {
14777

148-
ArrayList<String> arguments = new ArrayList<String>();
149-
150-
arguments.add(Long.toString(p.accountId()));
151-
arguments.add(p.firstName());
152-
arguments.add(p.lastName());
153-
if (p.gender() == 1) {
154-
arguments.add("male");
155-
} else {
156-
arguments.add("female");
157-
}
158-
159-
String dateString = Dictionaries.dates.formatDate(p.birthday());
160-
arguments.add(dateString);
161-
162-
dateString = Dictionaries.dates.formatDateTime(p.creationDate());
163-
arguments.add(dateString);
164-
arguments.add(p.ipAddress().toString());
165-
arguments.add(Dictionaries.browsers.getName(p.browserId()));
166-
arguments.add(Integer.toString(p.cityId()));
167-
168-
ArrayList<Integer> languages = p.languages();
169-
StringBuilder languagesBuilder = new StringBuilder();
170-
for (int i = 0; i < languages.size()-1; i++) {
171-
languagesBuilder.append(Dictionaries.languages.getLanguageName(languages.get(i))+";");
172-
}
173-
if(languages.size() > 0) {
174-
languagesBuilder.append(Dictionaries.languages.getLanguageName(languages.get(languages.size()-1)));
175-
}
176-
arguments.add(languagesBuilder.toString());
177-
178-
StringBuilder emailsBuilder = new StringBuilder();
179-
Iterator<String> itString = p.emails().iterator();
180-
for (int i = 0; i < p.emails().size()-1; i++) {
181-
emailsBuilder.append(itString.next()+";");
182-
}
183-
if(itString.hasNext()) {
184-
emailsBuilder.append(itString.next());
185-
}
186-
arguments.add(emailsBuilder.toString());
187-
writers[FileNames.PERSON.ordinal()].writeEntry(arguments);
78+
writers.get(PERSON).writeEntry(ImmutableList.of(Long.toString(
79+
p.accountId()),
80+
p.firstName(),
81+
p.lastName(),
82+
getGender(p.gender()),
83+
Dictionaries.dates.formatDate(p.birthday()),
84+
Dictionaries.dates.formatDateTime(p.creationDate()),
85+
p.ipAddress().toString(),
86+
Dictionaries.browsers.getName(p.browserId()),
87+
Integer.toString(p.cityId()),
88+
buildLanguages(p.languages()),
89+
buildEmail(p.emails())
90+
));
18891

18992
Iterator<Integer> itInteger = p.interests().iterator();
19093
while (itInteger.hasNext()) {
191-
arguments.clear();
19294
Integer interestIdx = itInteger.next();
193-
arguments.add(Long.toString(p.accountId()));
194-
arguments.add(Integer.toString(interestIdx));
195-
writers[FileNames.PERSON_HAS_INTEREST_TAG.ordinal()].writeEntry(arguments);
95+
writers.get(PERSON_HAS_INTEREST_TAG).writeEntry(ImmutableList.of(Long.toString(p.accountId()),Integer.toString(interestIdx)));
19696
}
19797
}
19898

19999
@Override
200100
protected void serialize(final StudyAt studyAt) {
201-
ArrayList<String> arguments = new ArrayList<String>();
202-
String dateString = Dictionaries.dates.formatYear(studyAt.year);
203-
arguments.add(Long.toString(studyAt.user));
204-
arguments.add(Long.toString(studyAt.university));
205-
arguments.add(dateString);
206-
writers[FileNames.PERSON_STUDY_AT.ordinal()].writeEntry(arguments);
101+
writers.get(PERSON_STUDY_AT).writeEntry(ImmutableList.of(Long.toString(studyAt.user),Long.toString(studyAt.university),Dictionaries.dates.formatYear(studyAt.year)));
207102
}
208103

209104
@Override
210105
protected void serialize(final WorkAt workAt) {
211-
ArrayList<String> arguments = new ArrayList<String>();
212-
String dateString = Dictionaries.dates.formatYear(workAt.year);
213-
arguments.add(Long.toString(workAt.user));
214-
arguments.add(Long.toString(workAt.company));
215-
arguments.add(dateString);
216-
writers[FileNames.PERSON_WORK_AT.ordinal()].writeEntry(arguments);
106+
writers.get(PERSON_WORK_AT).writeEntry(ImmutableList.of(Long.toString(workAt.user), Long.toString(workAt.company), Dictionaries.dates.formatYear(workAt.year)));
217107
}
218108

219109
@Override
220110
protected void serialize(final Person p, Knows knows) {
221-
ArrayList<String> arguments = new ArrayList<String>();
222-
String dateString = Dictionaries.dates.formatDateTime(knows.creationDate());
223-
arguments.add(Long.toString(p.accountId()));
224-
arguments.add(Long.toString(knows.to().accountId()));
225-
arguments.add(dateString);
226-
writers[FileNames.PERSON_KNOWS_PERSON.ordinal()].writeEntry(arguments);
111+
writers.get(PERSON_KNOWS_PERSON).writeEntry(ImmutableList.of(Long.toString(p.accountId()),Long.toString(knows.to().accountId()),Dictionaries.dates.formatDateTime(knows.creationDate())));
227112
}
228113

229114
}

src/main/java/ldbc/snb/datagen/util/ConfigParser.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,9 @@ public static Configuration initialize() throws Exception {
8282
conf.set("ldbc.snb.datagen.serializer.endlineSeparator", Boolean.toString(false));
8383
conf.set("ldbc.snb.datagen.generator.deltaTime", "10000");
8484
conf.set("ldbc.snb.datagen.generator.activity", "true");
85-
conf.set("ldbc.snb.datagen.serializer.dateFormatter", "ldbc.snb.datagen.serializer.formatter.StringDateFormatter");
86-
conf.set("ldbc.snb.datagen.serializer.formatter.StringDateFormatter.dateTimeFormat", "yyyy-MM-dd'T'HH:mm:ss.SSSZ");
87-
conf.set("ldbc.snb.datagen.serializer.formatter.StringDateFormatter.dateFormat", "yyyy-MM-dd");
85+
conf.set("ldbc.snb.datagen.serializer.dateFormatter", "ldbc.snb.datagen.util.formatter.StringDateFormatter");
86+
conf.set("ldbc.snb.datagen.util.formatter.StringDateFormatter.dateTimeFormat", "yyyy-MM-dd'T'HH:mm:ss.SSSZ");
87+
conf.set("ldbc.snb.datagen.util.formatter.StringDateFormatter.dateFormat", "yyyy-MM-dd");
8888
conf.set("ldbc.snb.datagen.generator.person.similarity", "ldbc.snb.datagen.entities.dynamic.person.similarity.GeoDistanceSimilarity");
8989
conf.set("ldbc.snb.datagen.parametergenerator.python", "python");
9090
conf.set("ldbc.snb.datagen.parametergenerator.parameters", "true");

src/main/java/ldbc/snb/datagen/util/DateUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
3838
import ldbc.snb.datagen.DatagenParams;
3939
import ldbc.snb.datagen.generator.tools.PowerDistribution;
4040
import ldbc.snb.datagen.entities.dynamic.person.Person;
41-
import ldbc.snb.datagen.serializer.formatter.DateFormatter;
41+
import ldbc.snb.datagen.util.formatter.DateFormatter;
4242
import org.apache.hadoop.conf.Configuration;
4343

4444
import java.util.*;

src/main/java/ldbc/snb/datagen/util/formatter/DateFormatter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
3333
You should have received a copy of the GNU General Public License
3434
along with this program; if not, write to the Free Software
3535
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.*/
36-
package ldbc.snb.datagen.serializer.formatter;
36+
package ldbc.snb.datagen.util.formatter;
3737

3838
import org.apache.hadoop.conf.Configuration;
3939

src/main/java/ldbc/snb/datagen/util/formatter/LongDateFormatter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
3333
You should have received a copy of the GNU General Public License
3434
along with this program; if not, write to the Free Software
3535
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.*/
36-
package ldbc.snb.datagen.serializer.formatter;
36+
package ldbc.snb.datagen.util.formatter;
3737

3838
import org.apache.hadoop.conf.Configuration;
3939

src/main/java/ldbc/snb/datagen/util/formatter/StringDateFormatter.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Linked Data Benchmark Council (http://www.ldbcouncil.org)
3333
You should have received a copy of the GNU General Public License
3434
along with this program; if not, write to the Free Software
3535
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.*/
36-
package ldbc.snb.datagen.serializer.formatter;
36+
package ldbc.snb.datagen.util.formatter;
3737

3838
import org.apache.hadoop.conf.Configuration;
3939

@@ -56,11 +56,11 @@ public class StringDateFormatter implements DateFormatter {
5656
public void initialize(Configuration conf) {
5757

5858
formatDateTimeString_ = conf
59-
.get("ldbc.snb.datagen.serializer.formatter.StringDateFormatter.dateTimeFormat", formatDateTimeString_);
59+
.get("ldbc.snb.datagen.util.formatter.StringDateFormatter.dateTimeFormat", formatDateTimeString_);
6060
gmtDateTimeFormatter_ = new SimpleDateFormat(formatDateTimeString_);
6161
gmtDateTimeFormatter_.setTimeZone(TimeZone.getTimeZone("GMT"));
6262
formatDateString_ = conf
63-
.get("ldbc.snb.datagen.serializer.formatter.StringDateFormatter.dateFormat", formatDateString_);
63+
.get("ldbc.snb.datagen.util.formatter.StringDateFormatter.dateFormat", formatDateString_);
6464
gmtDateFormatter_ = new SimpleDateFormat(formatDateString_);
6565
gmtDateFormatter_.setTimeZone(TimeZone.getTimeZone("GMT"));
6666
date_ = new Date();

0 commit comments

Comments
 (0)