Skip to content

Commit 5cbebb6

Browse files
authored
Merge pull request #32 from cedardevs/1250-date-parsing-exception
1250 date parsing exception
2 parents b086f0a + 76c3391 commit 5cbebb6

File tree

11 files changed

+820
-620
lines changed

11 files changed

+820
-620
lines changed

schemas-analyze/src/main/java/org/cedar/schemas/analyze/Analyzers.java

Lines changed: 3 additions & 247 deletions
Original file line numberDiff line numberDiff line change
@@ -5,38 +5,15 @@
55
import org.slf4j.Logger;
66
import org.slf4j.LoggerFactory;
77

8-
import java.time.*;
9-
import java.time.format.DateTimeFormatter;
10-
import java.time.format.DateTimeFormatterBuilder;
11-
import java.time.format.ResolverStyle;
12-
import java.time.temporal.ChronoField;
13-
import java.time.temporal.ChronoUnit;
14-
import java.time.temporal.TemporalAccessor;
15-
import java.time.temporal.TemporalQueries;
168
import java.util.ArrayList;
179
import java.util.List;
1810

1911
import static org.cedar.schemas.avro.psi.TimeRangeDescriptor.*;
12+
import org.cedar.schemas.analyze.DateInfo;
2013

2114
public class Analyzers {
2215
private static final Logger log = LoggerFactory.getLogger(Analyzers.class);
2316

24-
public static final DateTimeFormatter PARSE_DATE_FORMATTER = new DateTimeFormatterBuilder()
25-
.appendOptional(DateTimeFormatter.ISO_ZONED_DATE_TIME) // e.g. 2010-12-30T00:00:00Z
26-
.appendOptional(DateTimeFormatter.ISO_LOCAL_DATE_TIME) // e.g. 2010-12-30T00:00:00
27-
.appendOptional(DateTimeFormatter.ISO_LOCAL_DATE) // e.g. 2010-12-30
28-
.appendOptional(new DateTimeFormatterBuilder()
29-
.appendValue(ChronoField.YEAR) // e.g. -200
30-
.optionalStart()
31-
.appendPattern("-MM") // e.g. -200-10
32-
.optionalEnd()
33-
.optionalStart()
34-
.appendPattern("-dd") // e.g. -200-01-01
35-
.optionalEnd()
36-
.toFormatter())
37-
.toFormatter()
38-
.withResolverStyle(ResolverStyle.STRICT);
39-
4017
public static ParsedRecord addAnalysis(ParsedRecord record) {
4118
if (record == null) {
4219
return null; // pass through
@@ -64,7 +41,7 @@ public static Analysis analyze(Discovery discovery) {
6441
}
6542
return Analysis.newBuilder()
6643
.setIdentification(analyzeIdentifiers(discovery))
67-
.setTemporalBounding(analyzeTemporalBounding(discovery))
44+
.setTemporalBounding(Temporal.analyzeBounding(discovery))
6845
.setSpatialBounding(analyzeSpatialBounding(discovery))
6946
.setTitles(analyzeTitles(discovery))
7047
.setDescription(analyzeDescription(discovery))
@@ -90,45 +67,10 @@ public static IdentificationAnalysis analyzeIdentifiers(Discovery metadata) {
9067
.setParentIdentifierExists(parentIdInfo.exists)
9168
.setParentIdentifierString(parentIdInfo.value)
9269
.setHierarchyLevelNameExists(hierarchyInfo.exists)
93-
.setMatchesIdentifiers((hierarchyInfo.exists && hierarchyInfo.value.equals("granule") && parentIdInfo.exists) || (hierarchyInfo.exists && !hierarchyInfo.value.equals("granule")) || !hierarchyInfo.exists)
70+
.setIsGranule(hierarchyInfo.exists && hierarchyInfo.value.equals("granule") && parentIdInfo.exists)
9471
.build();
9572
}
9673

97-
public static TemporalBoundingAnalysis analyzeTemporalBounding(Discovery metadata) {
98-
TemporalBoundingAnalysis.Builder builder = TemporalBoundingAnalysis.newBuilder();
99-
100-
if (metadata != null && metadata.getTemporalBounding() != null) {
101-
// Gather info
102-
DateInfo beginInfo = new DateInfo(metadata.getTemporalBounding().getBeginDate(), true);
103-
DateInfo endInfo = new DateInfo(metadata.getTemporalBounding().getEndDate(), false);
104-
DateInfo instantInfo = new DateInfo(metadata.getTemporalBounding().getInstant(), true);
105-
TimeRangeDescriptor rangeDescriptor = rangeDescriptor(beginInfo, endInfo, instantInfo);
106-
107-
// Build
108-
builder.setBeginDescriptor(beginInfo.descriptor);
109-
builder.setBeginPrecision(beginInfo.precision);
110-
builder.setBeginIndexable(beginInfo.indexable);
111-
builder.setBeginZoneSpecified(beginInfo.zoneSpecified);
112-
builder.setBeginUtcDateTimeString(beginInfo.utcDateTimeString);
113-
114-
builder.setEndDescriptor(endInfo.descriptor);
115-
builder.setEndPrecision(endInfo.precision);
116-
builder.setEndIndexable(endInfo.indexable);
117-
builder.setEndZoneSpecified(endInfo.zoneSpecified);
118-
builder.setEndUtcDateTimeString(endInfo.utcDateTimeString);
119-
120-
builder.setInstantDescriptor(instantInfo.descriptor);
121-
builder.setInstantPrecision(instantInfo.precision);
122-
builder.setInstantIndexable(instantInfo.indexable);
123-
builder.setInstantZoneSpecified(instantInfo.zoneSpecified);
124-
builder.setInstantUtcDateTimeString(instantInfo.utcDateTimeString);
125-
126-
builder.setRangeDescriptor(rangeDescriptor);
127-
}
128-
129-
return builder.build();
130-
}
131-
13274
public static SpatialBoundingAnalysis analyzeSpatialBounding(Discovery metadata) {
13375
SpatialBoundingAnalysis.Builder builder = SpatialBoundingAnalysis.newBuilder();
13476
if (metadata != null) {
@@ -203,190 +145,4 @@ public StringInfo(String input) {
203145
}
204146
}
205147

206-
static class DateInfo {
207-
public final ValidDescriptor descriptor;
208-
public final String precision;
209-
public final boolean indexable;
210-
public final String zoneSpecified;
211-
public final String utcDateTimeString;
212-
213-
public DateInfo(String dateString, boolean start) {
214-
if (dateString == null || dateString.length() == 0) {
215-
descriptor = ValidDescriptor.UNDEFINED;
216-
precision = null;
217-
indexable = true;
218-
zoneSpecified = null;
219-
utcDateTimeString = null;
220-
return;
221-
}
222-
223-
Long longDate = parseLong(dateString);
224-
TemporalAccessor parsedDate = parseDate(dateString);
225-
if (longDate != null && !indexable(longDate)) {
226-
descriptor = ValidDescriptor.VALID;
227-
precision = precision(longDate);
228-
indexable = indexable(longDate);
229-
zoneSpecified = timezone(longDate);
230-
utcDateTimeString = utcDateTimeString(longDate, start);
231-
}
232-
else if (parsedDate != null) {
233-
descriptor = ValidDescriptor.VALID;
234-
precision = precision(parsedDate);
235-
indexable = indexable(parsedDate);
236-
zoneSpecified = timezone(parsedDate);
237-
utcDateTimeString = utcDateTimeString(parsedDate, start);
238-
}
239-
else {
240-
descriptor = ValidDescriptor.INVALID;
241-
precision = null;
242-
indexable = false;
243-
zoneSpecified = null;
244-
utcDateTimeString = null;
245-
}
246-
}
247-
248-
private static Long parseLong(String number) {
249-
try {
250-
return Long.parseLong(number);
251-
} catch (Exception e) {
252-
return null;
253-
}
254-
}
255-
256-
private static TemporalAccessor parseDate(String date) {
257-
try {
258-
return PARSE_DATE_FORMATTER.parseBest(
259-
date,
260-
ZonedDateTime::from,
261-
LocalDateTime::from,
262-
LocalDate::from,
263-
YearMonth::from,
264-
Year::from);
265-
} catch (Exception e) {
266-
return null;
267-
}
268-
}
269-
}
270-
271-
static boolean indexable(Long year) {
272-
// Year must be in the range [-292_275_055, 292_278_994] in order to be parsed as a date by ES (Joda time magic number). However,
273-
// this number is a bit arbitrary, and prone to change when ES switches to the Java time library (minimum supported year
274-
// being -999,999,999). We will limit the year ourselves instead to -100,000,000 -- since this is a fairly safe bet for
275-
// supportability across many date libraries if the utcDateTime ends up used as is by a downstream app.
276-
return year >= -100_000_000L;
277-
}
278-
279-
static boolean indexable(TemporalAccessor date) {
280-
return true; // if it's a parsable accessor, it's indexable
281-
}
282-
283-
static String precision(Long year) {
284-
return ChronoUnit.YEARS.toString();
285-
}
286-
287-
static String precision(TemporalAccessor date) {
288-
if (date == null) {
289-
return null;
290-
}
291-
return date.query(TemporalQueries.precision()).toString();
292-
}
293-
294-
static String timezone(Object date) {
295-
return date instanceof ZonedDateTime ? ((ZonedDateTime) date).getOffset().toString() : null;
296-
}
297-
298-
static String utcDateTimeString(TemporalAccessor parsedDate, boolean start) {
299-
if (parsedDate == null) {
300-
return null;
301-
}
302-
303-
if (parsedDate instanceof Year) {
304-
LocalDateTime yearDate = start ?
305-
((Year) parsedDate).atMonth(1).atDay(1).atStartOfDay() :
306-
((Year) parsedDate).atMonth(12).atEndOfMonth().atTime(23, 59, 59, 999000000);
307-
return DateTimeFormatter.ISO_ZONED_DATE_TIME.format(yearDate.atZone(ZoneOffset.UTC));
308-
}
309-
if (parsedDate instanceof YearMonth) {
310-
LocalDateTime yearMonthDate = start ?
311-
((YearMonth) parsedDate).atDay(1).atStartOfDay() :
312-
((YearMonth) parsedDate).atEndOfMonth().atTime(23, 59, 59, 999000000);
313-
return DateTimeFormatter.ISO_ZONED_DATE_TIME.format((yearMonthDate.atZone(ZoneOffset.UTC)));
314-
}
315-
if (parsedDate instanceof LocalDate) {
316-
LocalDateTime localDate = start ?
317-
((LocalDate) parsedDate).atStartOfDay() :
318-
((LocalDate) parsedDate).atTime(23, 59, 59, 999000000);
319-
return DateTimeFormatter.ISO_ZONED_DATE_TIME.format(localDate.atZone(ZoneOffset.UTC));
320-
}
321-
if (parsedDate instanceof LocalDateTime) {
322-
return DateTimeFormatter.ISO_ZONED_DATE_TIME.format(((LocalDateTime) parsedDate).atZone(ZoneOffset.UTC));
323-
}
324-
if (parsedDate instanceof ZonedDateTime) {
325-
return DateTimeFormatter.ISO_ZONED_DATE_TIME.format(((ZonedDateTime) parsedDate).withZoneSameInstant(ZoneOffset.UTC));
326-
}
327-
328-
return null;
329-
}
330-
331-
static String utcDateTimeString(Long year, boolean start) {
332-
return start ? year.toString() + "-01-01T00:00:00Z" : year.toString() + "-12-31T23:59:59.999Z";
333-
}
334-
335-
static TimeRangeDescriptor rangeDescriptor(DateInfo beginInfo, DateInfo endInfo, DateInfo instantInfo) {
336-
ValidDescriptor begin = beginInfo.descriptor;
337-
ValidDescriptor end = endInfo.descriptor;
338-
ValidDescriptor instant = instantInfo.descriptor;
339-
340-
if (begin == ValidDescriptor.VALID &&
341-
end == ValidDescriptor.VALID &&
342-
instant == ValidDescriptor.UNDEFINED) {
343-
Boolean inOrder = beginLTEEnd(beginInfo, endInfo);
344-
return inOrder == null ? INVALID : inOrder ? BOUNDED : BACKWARDS;
345-
}
346-
if (begin == ValidDescriptor.VALID &&
347-
end == ValidDescriptor.UNDEFINED &&
348-
instant == ValidDescriptor.UNDEFINED) {
349-
return ONGOING;
350-
}
351-
if (begin == ValidDescriptor.UNDEFINED &&
352-
end == ValidDescriptor.UNDEFINED &&
353-
instant == ValidDescriptor.VALID) {
354-
return INSTANT;
355-
}
356-
if (begin == ValidDescriptor.UNDEFINED &&
357-
end == ValidDescriptor.UNDEFINED &&
358-
instant == ValidDescriptor.UNDEFINED) {
359-
return UNDEFINED;
360-
}
361-
362-
return INVALID;
363-
}
364-
365-
static Boolean beginLTEEnd(DateInfo beginInfo, DateInfo endInfo) {
366-
boolean beginIndexable = beginInfo.indexable;
367-
boolean endIndexable = endInfo.indexable;
368-
boolean beginIsYears = beginInfo.precision.equals(ChronoUnit.YEARS.toString());
369-
boolean endIsYears = endInfo.precision.equals(ChronoUnit.YEARS.toString());
370-
371-
if (beginIndexable && endIndexable) {
372-
// Compare actual dates with UTC string
373-
ZonedDateTime beginDate = ZonedDateTime.parse(beginInfo.utcDateTimeString);
374-
ZonedDateTime endDate = ZonedDateTime.parse(endInfo.utcDateTimeString);
375-
return beginDate.isBefore(endDate) || beginDate.isEqual(endDate);
376-
}
377-
else if ((beginIsYears && endIsYears) || (beginIsYears && endIndexable) || (beginIndexable && endIsYears)) {
378-
// Compare years only as longs; parse both as string objects since both may not be just a long.
379-
// Watch out for negative years...
380-
String beginYearText = beginInfo.utcDateTimeString.substring(0, beginInfo.utcDateTimeString.indexOf('-', 1));
381-
String endYearText = endInfo.utcDateTimeString.substring(0, endInfo.utcDateTimeString.indexOf('-', 1));
382-
Long beginYear = Long.parseLong(beginYearText);
383-
Long endYear = Long.parseLong(endYearText);
384-
return beginYear <= endYear;
385-
}
386-
else {
387-
// One or both has an INVALID search format that is not just due to a paleo year
388-
return null;
389-
}
390-
}
391-
392148
}

0 commit comments

Comments
 (0)