Skip to content

Commit a07ea3d

Browse files
committed
experiment with getting all the instant range info in analysis
note: I just made a weird expansion to the temporal schema for this to see if this works best for handling these dates - need to think out the best place to actually incorporate into the schema if so.
1 parent 4937291 commit a07ea3d

File tree

9 files changed

+115
-279
lines changed

9 files changed

+115
-279
lines changed

schemas-analyze/src/main/java/org/cedar/schemas/analyze/DateInfo.java

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import java.time.temporal.ChronoField;
88
import java.time.temporal.ChronoUnit;
99
import java.time.temporal.TemporalAccessor;
10+
import java.time.Year;
11+
import java.time.YearMonth;
1012
import java.time.DateTimeException;
1113
import java.time.ZonedDateTime;
1214
import java.util.ArrayList;
@@ -25,9 +27,14 @@ public class DateInfo implements Comparable<DateInfo> {
2527
public final String zoneSpecified;
2628
public final String utcDateTimeString;
2729
public final Long year;
28-
public final Integer dayOfYear; // values 1 - 366
29-
public final Integer dayOfMonth; // values 1 - 31
30-
public final Integer month; // values 1 - 12
30+
public Integer dayOfYear; // values 1 - 366 // TODO temp remove final from these fields, just to sanity check things
31+
public Integer dayOfMonth; // values 1 - 31
32+
public Integer month; // values 1 - 12
33+
34+
// special stuff for dealing with how we interpret year or month precision instants:
35+
public Integer endDayOfYear;
36+
public Integer endDayOfMonth;
37+
public Integer endMonth;
3138

3239
public DateInfo(String dateString, boolean start) {
3340
if (dateString == null || dateString.length() == 0) {
@@ -55,6 +62,27 @@ public DateInfo(String dateString, boolean start) {
5562
dayOfYear = extractField(parsedDate, ChronoField.DAY_OF_YEAR);
5663
dayOfMonth = extractField(parsedDate, ChronoField.DAY_OF_MONTH);
5764
month = extractField(parsedDate, ChronoField.MONTH_OF_YEAR);
65+
if (parsedDate != null && dayOfMonth == null && month != null) { // TODO or parsedDate instanceof YearMonth
66+
endMonth = month;
67+
dayOfMonth = 1;
68+
endDayOfMonth = ((YearMonth)parsedDate).lengthOfMonth();
69+
dayOfYear = ((YearMonth)parsedDate).atDay(1).getDayOfYear();
70+
endDayOfYear = ((YearMonth)parsedDate).atEndOfMonth().getDayOfYear();
71+
System.out.println("year month - setting endDayOf* fields..." + endDayOfMonth);
72+
System.out.println("year month - setting endDayOf* fields..." + dayOfYear);
73+
System.out.println("year month - setting endDayOf* fields..." + endDayOfYear);
74+
} else if (parsedDate != null && month == null && year != null) { // TODO or instanceof Year?
75+
dayOfMonth = 1;
76+
dayOfYear = 1;
77+
endDayOfYear = ((Year)parsedDate).length(); // number of days in the year, including leap years
78+
endDayOfMonth = 31;
79+
month = 1;
80+
endMonth = 12;
81+
} else {
82+
endMonth = month;
83+
endDayOfMonth = dayOfMonth;
84+
endDayOfYear = dayOfYear;
85+
}
5886

5987
if (longDate != null && !indexable(longDate)) {
6088
descriptor = ValidDescriptor.VALID;

schemas-analyze/src/main/java/org/cedar/schemas/analyze/Temporal.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,9 @@ public static TemporalBoundingAnalysis analyzeBounding(Discovery metadata) {
111111
builder.setInstantDayOfYear(instantInfo.dayOfYear);
112112
builder.setInstantDayOfMonth(instantInfo.dayOfMonth);
113113
builder.setInstantMonth(instantInfo.month);
114+
builder.setInstantEndDayOfYear(instantInfo.endDayOfYear);
115+
builder.setInstantEndDayOfMonth(instantInfo.endDayOfMonth);
116+
builder.setInstantEndMonth(instantInfo.endMonth);
114117

115118
builder.setRangeDescriptor(rangeDescriptor);
116119
}

schemas-analyze/src/test/groovy/org/cedar/schemas/analyze/AnalyzersSpec.groovy

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@ class AnalyzersSpec extends Specification {
9797
instantDayOfMonth : null,
9898
instantYear : null,
9999
instantMonth : null,
100+
instantEndDayOfYear : null,
101+
instantEndDayOfMonth : null,
102+
instantEndMonth : null,
100103
rangeDescriptor : BOUNDED,
101104
],
102105
spatialBounding : [

schemas-analyze/src/test/groovy/org/cedar/schemas/analyze/DateInfoSpec.groovy

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,18 @@ class DateInfoSpec extends Specification {
3131
where:
3232
input | year | dayOfYear | dayOfMonth | month
3333
'1999-12-31' | 1999 | 365 | 31 | 12
34-
'1999' | 1999 | null | null | null
34+
'1999' | 1999 | 1 | 1 | 1
35+
'1999-06' | 1999 | 152 | 1 | 6
3536
'1999-01-01T00:00:00Z' | 1999 | 1 | 1 | 1
3637
'-1000000000' | -1_000_000_000L | null | null | null // too big paleo number is parsed as a year value, although it cannot be a datetime string
37-
'-35000000' | -35_000_000L | null | null | null // this one can be a datetime string
38+
'-35000000' | -35_000_000L | 1 | 1 | 1 // this one can be a datetime string
3839
'2008-04-01T00:00:00Z' | 2008 | 92 | 1 | 4
3940
'1975-06-15T12:30:00Z' | 1975 | 166 | 15 | 6
4041
null | null | null | null | null
4142
'not a date' | null | null | null | null
4243
'2020-02-29' | 2020 | 60 | 29 | 2 // leap day
4344
'2020-12-31' | 2020 | 366 | 31 | 12 // last day of leap year
44-
'20191025' | 20191025 | null | null | null // string datetime from the bug
45+
'20191025' | 20191025 | 1 | 1 | 1 // string datetime from the bug
4546
}
4647

4748
def 'extracts date info from date string: #input'() {

schemas-analyze/src/test/groovy/org/cedar/schemas/analyze/TemporalSpec.groovy

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,35 @@ class TemporalSpec extends Specification {
1717

1818
final String analysisAvro = ClassLoader.systemClassLoader.getResourceAsStream('avro/psi/analysis.avsc').text
1919

20+
def "instant #description (#instant) correctly extrapolates date range"() {
21+
given:
22+
def bounding = TemporalBounding.newBuilder()
23+
.setInstant(instant)
24+
.build()
25+
def discovery = Discovery.newBuilder().setTemporalBounding(bounding).build()
26+
27+
when:
28+
def result = Temporal.analyzeBounding(discovery)
29+
30+
then:
31+
println("????"+result)
32+
result.instantMonth == month
33+
result.instantEndMonth == endMonth
34+
result.instantDayOfMonth == dayOfMonth
35+
result.instantEndDayOfMonth == endDayOfMonth
36+
result.instantDayOfYear == dayOfYear
37+
result.instantEndDayOfYear == endDayOfYear
38+
39+
where:
40+
41+
description | instant | year | dayOfYear | dayOfMonth | month | endYear | endDayOfYear | endDayOfMonth | endMonth
42+
"instant with month precision" | '2003-02' | 2003 | 32 | 1 | 2 | 2003 | 59 | 28 | 2
43+
"instant on leapyear with month precision" | '2004-02' | 2004 | 32 | 1 | 2 | 2004 | 60 | 29 | 2
44+
"instant with day precision" | '2001-06-22' | 2001 | 173 | 22 | 6 | 2001 | 173 | 22 | 6
45+
"instant with day precision on leapyear" | '2020-06-22' | 2020 | 174 | 22 | 6 | 2020 | 174 | 22 | 6
46+
47+
}
48+
2049
def "#descriptor date range correctly identified when #situation"() {
2150
given:
2251
def bounding = TemporalBounding.newBuilder()

schemas-core/src/main/resources/avro/psi/analysis.avsc

Lines changed: 42 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@
297297
"string"
298298
],
299299
"default": null,
300-
"doc": "The precision of the beginning of the analyzed bounding, e.g. 'Years', 'Seconds', etc. See java.time.temporal.ChronoUnit"
300+
"doc": "The precision of the beginning of the analyzed bounding, e.g. 'Years', 'Seconds', etc. See java.time.temporal.ChronoUnit. Note that this may sometimes not exactly match the input data, since it reflects the Java class the date is able to be parsed into"
301301
},
302302
{
303303
"name": "beginIndexable",
@@ -306,7 +306,7 @@
306306
"boolean"
307307
],
308308
"default": null,
309-
"doc": "Indicates whether or not the beginning can be indexed for searching"
309+
"doc": "Indicates whether or not beginUtcDateTimeString can be indexed for searching"
310310
},
311311
{
312312
"name": "beginZoneSpecified",
@@ -342,7 +342,7 @@
342342
"int"
343343
],
344344
"default": null,
345-
"doc": "If possible, an integer indicating the day of year for the beginning value"
345+
"doc": "If possible, an integer indicating the day of year for the beginning value. This may be populated by extrapolating into a DateTime. See beginPrecision for the best indicator of if this value is extrapolated or provided in the original date format"
346346
},
347347
{
348348
"name": "beginDayOfMonth",
@@ -351,7 +351,7 @@
351351
"int"
352352
],
353353
"default": null,
354-
"doc": "If possible, an integer indicating the day of month for the beginning value"
354+
"doc": "If possible, an integer indicating the day of month for the beginning value. This may be populated by extrapolating into a DateTime. See beginPrecision for the best indicator of if this value is extrapolated or provided in the original date format"
355355
},
356356
{
357357
"name": "beginMonth",
@@ -360,7 +360,7 @@
360360
"int"
361361
],
362362
"default": null,
363-
"doc": "If possible, an integer indicating the month for the beginning value"
363+
"doc": "If possible, an integer indicating the month for the beginning value. This may be populated by extrapolating into a DateTime. See beginPrecision for the best indicator of if this value is extrapolated or provided in the original date format"
364364
},
365365
{
366366
"name": "endDescriptor",
@@ -378,7 +378,7 @@
378378
"string"
379379
],
380380
"default": null,
381-
"doc": "The precision of the end of the analyzed bounding, e.g. 'Years', 'Seconds', etc. See java.time.temporal.ChronoUnit"
381+
"doc": "The precision of the end of the analyzed bounding, e.g. 'Years', 'Seconds', etc. See java.time.temporal.ChronoUnit. Note that this may sometimes not exactly match the input data, since it reflects the Java class the date is able to be parsed into"
382382
},
383383
{
384384
"name": "endIndexable",
@@ -387,7 +387,7 @@
387387
"boolean"
388388
],
389389
"default": null,
390-
"doc": "Indicates whether or not the ending can be indexed for searching"
390+
"doc": "Indicates whether or not endUtcDateTimeString can be indexed for searching"
391391
},
392392
{
393393
"name": "endZoneSpecified",
@@ -423,7 +423,7 @@
423423
"int"
424424
],
425425
"default": null,
426-
"doc": "If possible, an integer indicating the day of year for the end value"
426+
"doc": "If possible, an integer indicating the day of year for the end value. This may be populated by extrapolating into a DateTime. See endPrecision for the best indicator of if this value is extrapolated or provided in the original date format"
427427
},
428428
{
429429
"name": "endDayOfMonth",
@@ -432,7 +432,7 @@
432432
"int"
433433
],
434434
"default": null,
435-
"doc": "If possible, an integer indicating the day of month for the end value"
435+
"doc": "If possible, an integer indicating the day of month for the end value. This may be populated by extrapolating into a DateTime. See endPrecision for the best indicator of if this value is extrapolated or provided in the original date format"
436436
},
437437
{
438438
"name": "endMonth",
@@ -441,7 +441,7 @@
441441
"int"
442442
],
443443
"default": null,
444-
"doc": "If possible, an integer indicating the month for the end value"
444+
"doc": "If possible, an integer indicating the month for the end value. This may be populated by extrapolating into a DateTime. See endPrecision for the best indicator of if this value is extrapolated or provided in the original date format"
445445
},
446446
{
447447
"name": "instantDescriptor",
@@ -459,7 +459,7 @@
459459
"string"
460460
],
461461
"default": null,
462-
"doc": "The precision of the instant of the analyzed bounding, e.g. 'Years', 'Seconds', etc. See java.time.temporal.ChronoUnit"
462+
"doc": "The precision of the instant of the analyzed bounding, e.g. 'Years', 'Seconds', etc. See java.time.temporal.ChronoUnit. Note that this may sometimes not exactly match the input data, since it reflects the Java class the date is able to be parsed into"
463463
},
464464
{
465465
"name": "instantIndexable",
@@ -468,7 +468,7 @@
468468
"boolean"
469469
],
470470
"default": null,
471-
"doc": "Indicates whether or not the instant can be indexed for searching"
471+
"doc": "Indicates whether or not instantUtcDateTimeString can be indexed for searching"
472472
},
473473
{
474474
"name": "instantZoneSpecified",
@@ -504,7 +504,16 @@
504504
"int"
505505
],
506506
"default": null,
507-
"doc": "If possible, an integer indicating the day of year for the instant value"
507+
"doc": "If possible, an integer indicating the day of year for the instant value. This may be populated by extrapolating into a DateTime. See instantPrecision for the best indicator of if this value is extrapolated or provided in the original date format"
508+
},
509+
{
510+
"name": "instantEndDayOfYear",
511+
"type": [
512+
"null",
513+
"int"
514+
],
515+
"default": null,
516+
"doc": "If instantPrecision does not include day, an integer indicating the assumed day of year the instant ends, e.g. the last day of a month or year"
508517
},
509518
{
510519
"name": "instantDayOfMonth",
@@ -513,7 +522,16 @@
513522
"int"
514523
],
515524
"default": null,
516-
"doc": "If possible, an integer indicating the day of month for the instant value"
525+
"doc": "If possible, an integer indicating the day of month for the instant value. This may be populated by extrapolating into a DateTime. See instantPrecision for the best indicator of if this value is extrapolated or provided in the original date format"
526+
},
527+
{
528+
"name": "instantEndDayOfMonth",
529+
"type": [
530+
"null",
531+
"int"
532+
],
533+
"default": null,
534+
"doc": "If instantPrecision does not include day, an integer indicating the assumed day of month the instant ends, e.g. the last day of a month or year"
517535
},
518536
{
519537
"name": "instantMonth",
@@ -522,7 +540,16 @@
522540
"int"
523541
],
524542
"default": null,
525-
"doc": "If possible, an integer indicating the month for the instant value"
543+
"doc": "If possible, an integer indicating the month for the instant value. This may be populated by extrapolating into a DateTime. See instantPrecision for the best indicator of if this value is extrapolated or provided in the original date format"
544+
},
545+
{
546+
"name": "instantEndMonth",
547+
"type": [
548+
"null",
549+
"int"
550+
],
551+
"default": null,
552+
"doc": "If instantPrecision does not include day, an integer indicating the assumed month the instant ends, e.g. the last month of the year"
526553
},
527554
{
528555
"name": "rangeDescriptor",

0 commit comments

Comments
 (0)