Skip to content

Commit f06e97d

Browse files
committed
updated time range descriptor to be NOT_APPLICABLE when any date is INVALID and to clarify AMBIGUOUS cases; also exact same timestamps for begin and end dates now correctly identified as INSTANT
1 parent b7e05ad commit f06e97d

File tree

3 files changed

+119
-37
lines changed

3 files changed

+119
-37
lines changed

schemas-analyze/src/main/java/org/cedar/schemas/analyze/Analyzers.java

Lines changed: 82 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import java.time.*;
99
import java.time.format.DateTimeFormatter;
1010
import java.time.format.DateTimeFormatterBuilder;
11+
import java.time.format.DateTimeParseException;
1112
import java.time.format.ResolverStyle;
1213
import java.time.temporal.ChronoField;
1314
import java.time.temporal.ChronoUnit;
@@ -203,7 +204,7 @@ public StringInfo(String input) {
203204
}
204205
}
205206

206-
static class DateInfo {
207+
static class DateInfo implements Comparable<DateInfo> {
207208
public final ValidDescriptor descriptor;
208209
public final String precision;
209210
public final boolean indexable;
@@ -266,6 +267,42 @@ private static TemporalAccessor parseDate(String date) {
266267
return null;
267268
}
268269
}
270+
271+
@Override
272+
public int compareTo(DateInfo o) {
273+
boolean thisIndexable = this.indexable;
274+
boolean oIndexable = o.indexable;
275+
boolean thisIsYears = this.precision.equals(ChronoUnit.YEARS.toString());
276+
boolean oIsYears = o.precision.equals(ChronoUnit.YEARS.toString());
277+
278+
if (thisIndexable && oIndexable) {
279+
// Compare actual dates with UTC string
280+
ZonedDateTime thisDate = ZonedDateTime.parse(this.utcDateTimeString);
281+
ZonedDateTime oDate = ZonedDateTime.parse(o.utcDateTimeString);
282+
if (thisDate.isEqual(oDate)) {
283+
return 0;
284+
} else {
285+
return thisDate.isBefore(oDate) ? -1 : 1;
286+
}
287+
}
288+
else if ((thisIsYears && oIsYears) || (thisIsYears && oIndexable) || (thisIndexable && oIsYears)) {
289+
// Compare years only as longs; parse both as string objects since both may not be just a long.
290+
// Watch out for negative years...
291+
String thisYearText = this.utcDateTimeString.substring(0, this.utcDateTimeString.indexOf('-', 1));
292+
String oYearText = o.utcDateTimeString.substring(0, o.utcDateTimeString.indexOf('-', 1));
293+
Long thisYear = Long.parseLong(thisYearText);
294+
Long oYear = Long.parseLong(oYearText);
295+
if (thisYear == oYear) {
296+
return 0;
297+
} else {
298+
return thisYear < oYear ? -1 : 1;
299+
}
300+
}
301+
else {
302+
// One or both has an INVALID search format that is not just due to a paleo year
303+
throw new DateTimeException("One or both dates being compared have an INVALID format.");
304+
}
305+
}
269306
}
270307

271308
static boolean indexable(Long year) {
@@ -337,28 +374,65 @@ static TimeRangeDescriptor rangeDescriptor(DateInfo beginInfo, DateInfo endInfo,
337374
ValidDescriptor end = endInfo.descriptor;
338375
ValidDescriptor instant = instantInfo.descriptor;
339376

340-
if (begin == ValidDescriptor.VALID &&
341-
end == ValidDescriptor.VALID &&
377+
// A time range cannot be described as an error exists with one or more dates:
378+
if(begin == ValidDescriptor.INVALID ||
379+
end == ValidDescriptor.INVALID ||
380+
instant == ValidDescriptor.INVALID) {
381+
return NOT_APPLICABLE;
382+
}
383+
384+
// Dates are all undefined so range is undefined:
385+
if (begin == ValidDescriptor.UNDEFINED &&
386+
end == ValidDescriptor.UNDEFINED &&
342387
instant == ValidDescriptor.UNDEFINED) {
343-
Boolean inOrder = beginLTEEnd(beginInfo, endInfo);
344-
return inOrder == null ? INVALID : inOrder ? BOUNDED : BACKWARDS;
388+
return UNDEFINED;
345389
}
390+
391+
// If begin is valid but end is undefined, this indicates an ongoing range:
346392
if (begin == ValidDescriptor.VALID &&
347393
end == ValidDescriptor.UNDEFINED &&
348394
instant == ValidDescriptor.UNDEFINED) {
349395
return ONGOING;
350396
}
397+
398+
// Valid instant is straightforward:
351399
if (begin == ValidDescriptor.UNDEFINED &&
352400
end == ValidDescriptor.UNDEFINED &&
353401
instant == ValidDescriptor.VALID) {
354402
return INSTANT;
355403
}
356-
if (begin == ValidDescriptor.UNDEFINED &&
357-
end == ValidDescriptor.UNDEFINED &&
404+
405+
// Dates describe more than one valid range descriptor, which is ambiguous:
406+
if ( ( begin == ValidDescriptor.VALID && end == ValidDescriptor.VALID && instant == ValidDescriptor.VALID ) ||
407+
( begin == ValidDescriptor.VALID && end == ValidDescriptor.UNDEFINED && instant == ValidDescriptor.VALID ) ) {
408+
return AMBIGUOUS;
409+
}
410+
411+
// Begin and end dates are independently valid but based on how they compare to each other can describe very
412+
// different range types:
413+
if (begin == ValidDescriptor.VALID &&
414+
end == ValidDescriptor.VALID &&
358415
instant == ValidDescriptor.UNDEFINED) {
359-
return UNDEFINED;
416+
try {
417+
int comparator = beginInfo.compareTo(endInfo);
418+
TimeRangeDescriptor descriptor;
419+
switch (comparator) {
420+
case -1: descriptor = BOUNDED;
421+
break;
422+
case 0: descriptor = INSTANT;
423+
break;
424+
case 1: descriptor = BACKWARDS;
425+
break;
426+
default: descriptor = INVALID;
427+
break;
428+
}
429+
return descriptor;
430+
} catch(DateTimeException e) {
431+
return INVALID;
432+
}
360433
}
361434

435+
// Covers undefined begin date with valid end date which is meaningless, regardless of presence of an instant date
362436
return INVALID;
363437
}
364438

schemas-analyze/src/test/groovy/org/cedar/schemas/analyze/AnalyzersSpec.groovy

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -176,18 +176,24 @@ class AnalyzersSpec extends Specification {
176176
result.rangeDescriptor == descriptor
177177

178178
where:
179-
descriptor | situation | begin | end | instant
180-
ONGOING | 'start date exists but not end date' | '2010-01-01' | '' | null
181-
BOUNDED | 'start and end date exist and are valid' | '2000-01-01T00:00:00Z' | '2001-01-01T00:00:00Z' | null
182-
UNDEFINED | 'neither start nor end date exist' | '' | '' | null
183-
INSTANT | 'neither start nor end date exist but valid instant does' | '' | '' | '2001-01-01'
184-
INVALID | 'end date exists but not start date' | '' | '2010' | null
185-
BACKWARDS | 'start and end date exist but start after end' | '2100-01-01T00:00:00Z' | '2002-01-01' | null
186-
INVALID | 'neither start nor end date exist but invalid instant does' | '' | '' | '2001-01-32'
187-
INVALID | 'has valid start, end, and instant' | '2010-01-01' | '2001-01-01T00:00:00Z' | '2001-01-32'
179+
descriptor | situation | begin | end | instant
180+
ONGOING | 'begin date exists but not end date' | '2010-01-01' | '' | null
181+
BOUNDED | 'begin and end date exist and are valid' | '2000-01-01T00:00:00Z' | '2001-01-01T00:00:00Z' | null
182+
UNDEFINED | 'begin, end, and instant date all undefined' | '' | '' | null
183+
INSTANT | 'neither begin nor end date exist but valid instant does' | '' | '' | '2001-01-01'
184+
INVALID | 'end date exists but not begin date, valid instant present' | '' | '2010' | '2001-01-01'
185+
INVALID | 'end date exists but not begin date, instant undefined' | '' | '2010' | null
186+
BACKWARDS | 'begin and end date exist but start after end' | '2100-01-01T00:00:00Z' | '2002-01-01' | null
187+
NOT_APPLICABLE | 'invalid instant present' | '' | '' | '2001-01-32'
188+
NOT_APPLICABLE | 'invalid begin date present' | '2001-01-32' | '' | ''
189+
NOT_APPLICABLE | 'invalid end date present' | '' | '2001-01-32' | ''
190+
AMBIGUOUS | 'has valid begin, end, and instant' | '2010-01-01' | '2001-01-01T00:00:00Z' | '2001-01-31'
191+
AMBIGUOUS | 'begin date and instant exist but not end date' | '2010-01-01' | '' | '2001-01-31'
192+
INSTANT | 'begin and end date equal timestamps' | '2000-01-01T00:00:00Z' | '2000-01-01T00:00:00Z' | null
193+
BOUNDED | 'begin and end date same day but times assumed BoD and EoD' | '2010-01-01' | '2010-01-01' | null
188194
}
189195

190-
def "Begin date LTE end date check is #value when #situation"() {
196+
def "Range descriptor is #value when #situation"() {
191197
given:
192198
def bounding = TemporalBounding.newBuilder()
193199
.setBeginDate(begin)
@@ -202,22 +208,22 @@ class AnalyzersSpec extends Specification {
202208
result.rangeDescriptor == value
203209

204210
where:
205-
value | situation | begin | end
206-
BOUNDED | 'start is valid format and before valid format end' | '2010-01-01' | '2011-01-01'
207-
BACKWARDS | 'start is valid format and after valid format end' | '2011-01-01T00:00:00Z' | '2001-01-01T00:00:00Z'
208-
BOUNDED | 'start is invalid format but paleo and before valid format end' | '-1000000000' | '2015'
209-
BOUNDED | 'start and end both invalid but paleo and start before end' | '-2000000000' | '-1000000000'
210-
BOUNDED | 'start valid LT end valid but years less than 4 digits' | '-900' | '100-01-01'
211-
BACKWARDS | 'start and end both invalid but paleo and start after end' | '-1000000000' | '-2000000000'
212-
BOUNDED | 'start and end both same instant' | '2000-01-01T00:00:00Z' | '2000-01-01T00:00:00Z'
213-
ONGOING | 'start exists but not end' | '2000-01-01T00:00:00Z' | ''
214-
INVALID | 'start does not exist but end does' | '' | '2000-01-01T00:00:00Z'
215-
UNDEFINED | 'neither start nor end exist' | '' | ''
216-
INVALID | 'start is invalid format but paleo and end is fully invalid' | '-1000000000' | '1999-13-12'
217-
INVALID | 'start is fully invalid and end is invalid format but paleo' | '15mya' | '-1000000000'
218-
INVALID | 'start is valid and end is fully invalid' | '2000-01-01T00:00:00Z' | '2000-12-31T25:00:00Z'
219-
INVALID | 'start and end both fully invalid' | '2000-01-01T00:61:00Z' | '2000-11-31T00:00:00Z'
220-
INVALID | 'start is fully invalid but end is valid' | '2000-01-01T00:00:61Z' | '2000-01-02T00:00:00Z'
211+
value | situation | begin | end
212+
BOUNDED | 'start is valid format and before valid format end' | '2010-01-01' | '2011-01-01'
213+
BACKWARDS | 'start is valid format and after valid format end' | '2011-01-01T00:00:00Z' | '2001-01-01T00:00:00Z'
214+
BOUNDED | 'start is paleo and before valid format end' | '-1000000000' | '2015'
215+
BOUNDED | 'start and end both paleo and start before end' | '-2000000000' | '-1000000000'
216+
BOUNDED | 'start valid LT end valid but years less than 4 digits' | '-900' | '100-01-01'
217+
BACKWARDS | 'start and end both paleo and start after end' | '-1000000000' | '-2000000000'
218+
INSTANT | 'start and end both same instant' | '2000-01-01T00:00:00Z' | '2000-01-01T00:00:00Z'
219+
ONGOING | 'start exists but not end' | '2000-01-01T00:00:00Z' | ''
220+
INVALID | 'start does not exist but end does' | '' | '2000-01-01T00:00:00Z'
221+
UNDEFINED | 'neither start nor end exist' | '' | ''
222+
NOT_APPLICABLE | 'start is paleo and end is invalid' | '-1000000000' | '1999-13-12'
223+
NOT_APPLICABLE | 'start is invalid and end is paleo' | '15mya' | '-1000000000'
224+
NOT_APPLICABLE | 'start is valid and end is invalid' | '2000-01-01T00:00:00Z' | '2000-12-31T25:00:00Z'
225+
NOT_APPLICABLE | 'start and end both invalid' | '2000-01-01T00:61:00Z' | '2000-11-31T00:00:00Z'
226+
NOT_APPLICABLE | 'start is invalid but end is valid' | '2000-01-01T00:00:61Z' | '2000-01-02T00:00:00Z'
221227
}
222228

223229
def "analyzes when links are #testCase"() {

schemas-core/src/main/resources/avro/psi/analysis.avsc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -424,12 +424,14 @@
424424
"type": "enum",
425425
"name": "TimeRangeDescriptor",
426426
"symbols": [
427+
"AMBIGUOUS",
428+
"BACKWARDS",
427429
"BOUNDED",
428-
"ONGOING",
429430
"INSTANT",
430-
"UNDEFINED",
431431
"INVALID",
432-
"BACKWARDS"
432+
"ONGOING",
433+
"NOT_APPLICABLE",
434+
"UNDEFINED"
433435
]
434436
}
435437
],

0 commit comments

Comments
 (0)