Skip to content

Commit 180cfee

Browse files
committed
Updated version and added Javadoc
1 parent 09b042e commit 180cfee

File tree

4 files changed

+137
-35
lines changed

4 files changed

+137
-35
lines changed

build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ plugins {
77
}
88

99
group 'org.radarcns'
10-
version '0.5.3-SNAPSHOT'
10+
version '0.5.3'
1111
mainClassName = 'org.radarcns.hdfs.Application'
1212

1313
sourceCompatibility = '1.8'

src/main/java/org/radarcns/hdfs/ObservationKeyPathFactory.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@
1919
import java.nio.file.Path;
2020
import java.nio.file.Paths;
2121
import java.time.Instant;
22+
import javax.annotation.Nonnull;
2223
import org.apache.avro.generic.GenericRecord;
2324

2425
public class ObservationKeyPathFactory extends RecordPathFactory {
26+
@Nonnull
2527
@Override
26-
public Path getRelativePath(String topic, GenericRecord key, GenericRecord value, Instant time, int attempt) {
28+
public Path getRelativePath(@Nonnull String topic, GenericRecord key, GenericRecord value, Instant time, int attempt) {
2729
String projectId = sanitizeId(key.get("projectId"), "unknown-project");
2830
String userId = sanitizeId(key.get("userId"), "unknown-user");
2931

@@ -33,8 +35,9 @@ public Path getRelativePath(String topic, GenericRecord key, GenericRecord value
3335
return Paths.get(projectId, userId, topic, outputFileName);
3436
}
3537

38+
@Nonnull
3639
@Override
37-
public String getCategory(GenericRecord key, GenericRecord value) {
40+
public String getCategory(@Nonnull GenericRecord key, @Nonnull GenericRecord value) {
3841
return sanitizeId(key.get("sourceId"), "unknown-source");
3942
}
4043
}

src/main/java/org/radarcns/hdfs/RecordPathFactory.java

Lines changed: 129 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@
2525
import java.time.format.DateTimeFormatter;
2626
import java.time.format.DateTimeParseException;
2727
import java.util.regex.Pattern;
28+
import javax.annotation.Nonnull;
29+
import javax.annotation.Nullable;
2830
import org.apache.avro.Schema;
31+
import org.apache.avro.Schema.Field;
2932
import org.apache.avro.Schema.Type;
3033
import org.apache.avro.generic.GenericRecord;
3134
import org.slf4j.Logger;
@@ -41,7 +44,17 @@ public abstract class RecordPathFactory implements Plugin {
4144
private Path root;
4245
private String extension;
4346

44-
public RecordOrganization getRecordOrganization(String topic, GenericRecord record, int attempt) {
47+
/**
48+
* Get the organization of given record in given topic.
49+
* @param topic Kafka topic name
50+
* @param record record with possible key and value fields containing records
51+
* @param attempt number of previous attempts to write given record. This increases if previous
52+
* paths already existed and are incompatible.
53+
* @return organization of given record
54+
*/
55+
@Nonnull
56+
public RecordOrganization getRecordOrganization(@Nonnull String topic,
57+
@Nonnull GenericRecord record, int attempt) {
4558
GenericRecord keyField = (GenericRecord) record.get("key");
4659
GenericRecord valueField = (GenericRecord) record.get("value");
4760

@@ -58,8 +71,27 @@ public RecordOrganization getRecordOrganization(String topic, GenericRecord reco
5871
return new RecordOrganization(outputPath, category, time);
5972
}
6073

61-
public abstract Path getRelativePath(String topic, GenericRecord key, GenericRecord value, Instant time, int attempt);
74+
/**
75+
* Get the relative path corresponding to given record on given topic.
76+
* @param topic Kafka topic name
77+
* @param key record key
78+
* @param value record value
79+
* @param time time contained in the record
80+
* @param attempt number of previous attempts to write given record. This increases if previous
81+
* paths already existed and are incompatible.
82+
* @return relative path corresponding to given parameters.
83+
*/
84+
@Nonnull
85+
public abstract Path getRelativePath(@Nonnull String topic, @Nullable GenericRecord key,
86+
@Nullable GenericRecord value, @Nullable Instant time, int attempt);
6287

88+
/**
89+
* Get the category of a record, representing a partitioning for a given topic and user.
90+
* @param key record key
91+
* @param value record value
92+
* @return category name.
93+
*/
94+
@Nonnull
6395
public abstract String getCategory(GenericRecord key, GenericRecord value);
6496

6597
public Path getRoot() {
@@ -86,80 +118,147 @@ public DateTimeFormatter getTimeBinFormat() {
86118
return HOURLY_TIME_BIN_FORMAT;
87119
}
88120

89-
public String getTimeBin(Instant time) {
121+
@Nonnull
122+
public String getTimeBin(@Nullable Instant time) {
90123
return time == null ? "unknown_date" : getTimeBinFormat().format(time);
91124
}
92125

126+
/**
127+
* Organization of a record.
128+
*/
93129
public static class RecordOrganization {
94130
private final Path path;
95131
private final Instant time;
96132
private final String category;
97133

98-
public RecordOrganization(Path path, String category, Instant time) {
134+
/**
135+
* Organization of a record.
136+
*
137+
* @param path path that the record should be stored in.
138+
* @param category category or partition that the record belongs to
139+
* @param time time contained in the record, if any
140+
*/
141+
public RecordOrganization(@Nonnull Path path, @Nonnull String category,
142+
@Nullable Instant time) {
99143
this.path = path;
100144
this.time = time;
101145
this.category = category;
102146
}
103147

148+
@Nonnull
104149
public Path getPath() {
105150
return path;
106151
}
107152

153+
@Nullable
108154
public Instant getTime() {
109155
return time;
110156
}
111157

158+
@Nonnull
112159
public String getCategory() {
113160
return category;
114161
}
115162
}
116163

117-
public static Instant getDate(GenericRecord keyField, GenericRecord valueField) {
118-
Schema.Field timeField = valueField.getSchema().getField("time");
119-
if (timeField != null && timeField.schema().getType() == Type.DOUBLE) {
120-
double time = (Double) valueField.get(timeField.pos());
121-
// Convert from millis to date and apply dateFormat
122-
return Instant.ofEpochMilli((long) (time * 1000d));
164+
/**
165+
* Get the date contained in given records
166+
* @param keyField key field of the record
167+
* @param valueField value field of the record
168+
* @return date contained in the values of either record, or {@code null} if not found or
169+
* it cannot be parsed.
170+
*/
171+
@Nullable
172+
public static Instant getDate(@Nullable GenericRecord keyField,
173+
@Nullable GenericRecord valueField) {
174+
Schema.Field timeField;
175+
176+
if (valueField != null) {
177+
timeField = valueField.getSchema().getField("time");
178+
if (timeField != null && timeField.schema().getType() == Type.DOUBLE) {
179+
double time = (Double) valueField.get(timeField.pos());
180+
// Convert from millis to date and apply dateFormat
181+
return Instant.ofEpochMilli((long) (time * 1000d));
182+
}
123183
}
124-
timeField = keyField.getSchema().getField("timeStart");
125184

126-
if (timeField != null && timeField.schema().getType() == Type.DOUBLE) {
127-
double time = (Double) keyField.get(timeField.pos());
128-
// Convert from millis to date and apply dateFormat
129-
return Instant.ofEpochMilli((long) (time * 1000d));
185+
if (keyField != null) {
186+
timeField = keyField.getSchema().getField("timeStart");
187+
188+
if (timeField != null && timeField.schema().getType() == Type.DOUBLE) {
189+
double time = (Double) keyField.get(timeField.pos());
190+
// Convert from millis to date and apply dateFormat
191+
return Instant.ofEpochMilli((long) (time * 1000d));
192+
}
193+
194+
// WindowedKey
195+
timeField = keyField.getSchema().getField("start");
196+
if (timeField != null && timeField.schema().getType() == Type.LONG) {
197+
return Instant.ofEpochMilli((Long) keyField.get("start"));
198+
}
130199
}
131200

132-
// WindowedKey
133-
timeField = keyField.getSchema().getField("start");
134-
if (timeField != null && timeField.schema().getType() == Type.LONG) {
135-
return Instant.ofEpochMilli((Long) keyField.get("start"));
201+
if (valueField != null) {
202+
Instant result = parseDateTime(valueField);
203+
if (result != null) {
204+
return result;
205+
}
206+
result = parseDate(valueField);
207+
if (result != null) {
208+
return result;
209+
}
136210
}
137211

212+
return null;
213+
}
214+
215+
/**
216+
* Parse the dateTime field of a record, if present.
217+
*
218+
* @param record record that may contain a dateTime field
219+
* @return {@code Instant} representing the dateTime or {@code null} if the field cannot be
220+
* found or parsed.
221+
*/
222+
@Nullable
223+
public static Instant parseDateTime(@Nonnull GenericRecord record) {
138224
// dateTime
139-
timeField = valueField.getSchema().getField("dateTime");
225+
Field timeField = record.getSchema().getField("dateTime");
140226
if (timeField != null && timeField.schema().getType() == Type.STRING) {
141-
String dateTime = valueField.get(timeField.pos()).toString();
227+
String dateTime = record.get(timeField.pos()).toString();
142228
try {
143-
return Instant.parse(dateTime);
229+
if (dateTime.charAt(dateTime.length() - 1) == 'Z') {
230+
return Instant.parse(dateTime);
231+
} else {
232+
return LocalDateTime.parse(dateTime).toInstant(UTC);
233+
}
144234
} catch (DateTimeParseException ex) {
145-
// try local date
146-
}
147-
try {
148-
return LocalDateTime.parse(dateTime).toInstant(UTC);
149-
} catch (DateTimeParseException ex) {
150-
// no other options
235+
// try next data type
151236
}
152237
}
153238

154-
timeField = valueField.getSchema().getField("date");
239+
return null;
240+
}
241+
242+
/**
243+
* Parse the date field of a record, if present.
244+
*
245+
* @param record record that may contain a date field
246+
* @return {@code Instant} representing the start of given date or {@code null} if the field
247+
* cannot be found or parsed.
248+
*/
249+
@Nullable
250+
public static Instant parseDate(@Nonnull GenericRecord record) {
251+
// dateTime
252+
Field timeField = record.getSchema().getField("date");
155253
if (timeField != null && timeField.schema().getType() == Type.STRING) {
156-
String date = valueField.get(timeField.pos()).toString();
254+
String date = record.get(timeField.pos()).toString();
157255
try {
158256
return LocalDate.parse(date).atStartOfDay(UTC).toInstant();
159257
} catch (DateTimeParseException ex) {
160258
// no other options
161259
}
162260
}
261+
163262
return null;
164263
}
165264

src/main/java/org/radarcns/hdfs/accounting/TopicPartition.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
package org.radarcns.hdfs.accounting;
1818

1919
import java.util.Objects;
20-
import org.jetbrains.annotations.NotNull;
20+
import javax.annotation.Nonnull;
2121

2222
public final class TopicPartition implements Comparable<TopicPartition> {
2323
public final String topic;
@@ -45,7 +45,7 @@ public int hashCode() {
4545
}
4646

4747
@Override
48-
public int compareTo(@NotNull TopicPartition o) {
48+
public int compareTo(@Nonnull TopicPartition o) {
4949
int result = topic.compareTo(o.topic);
5050
if (result != 0) {
5151
return result;

0 commit comments

Comments
 (0)