55import org .slf4j .Logger ;
66import org .slf4j .LoggerFactory ;
77
8- import java .time .*;
9- import java .time .format .DateTimeFormatter ;
10- import java .time .format .DateTimeFormatterBuilder ;
11- import java .time .format .ResolverStyle ;
12- import java .time .temporal .ChronoField ;
13- import java .time .temporal .ChronoUnit ;
14- import java .time .temporal .TemporalAccessor ;
15- import java .time .temporal .TemporalQueries ;
168import java .util .ArrayList ;
179import java .util .List ;
1810
1911import static org .cedar .schemas .avro .psi .TimeRangeDescriptor .*;
12+ import org .cedar .schemas .analyze .DateInfo ;
2013
2114public class Analyzers {
2215 private static final Logger log = LoggerFactory .getLogger (Analyzers .class );
2316
24- public static final DateTimeFormatter PARSE_DATE_FORMATTER = new DateTimeFormatterBuilder ()
25- .appendOptional (DateTimeFormatter .ISO_ZONED_DATE_TIME ) // e.g. 2010-12-30T00:00:00Z
26- .appendOptional (DateTimeFormatter .ISO_LOCAL_DATE_TIME ) // e.g. 2010-12-30T00:00:00
27- .appendOptional (DateTimeFormatter .ISO_LOCAL_DATE ) // e.g. 2010-12-30
28- .appendOptional (new DateTimeFormatterBuilder ()
29- .appendValue (ChronoField .YEAR ) // e.g. -200
30- .optionalStart ()
31- .appendPattern ("-MM" ) // e.g. -200-10
32- .optionalEnd ()
33- .optionalStart ()
34- .appendPattern ("-dd" ) // e.g. -200-01-01
35- .optionalEnd ()
36- .toFormatter ())
37- .toFormatter ()
38- .withResolverStyle (ResolverStyle .STRICT );
39-
4017 public static ParsedRecord addAnalysis (ParsedRecord record ) {
4118 if (record == null ) {
4219 return null ; // pass through
@@ -64,7 +41,7 @@ public static Analysis analyze(Discovery discovery) {
6441 }
6542 return Analysis .newBuilder ()
6643 .setIdentification (analyzeIdentifiers (discovery ))
67- .setTemporalBounding (analyzeTemporalBounding (discovery ))
44+ .setTemporalBounding (Temporal . analyzeBounding (discovery ))
6845 .setSpatialBounding (analyzeSpatialBounding (discovery ))
6946 .setTitles (analyzeTitles (discovery ))
7047 .setDescription (analyzeDescription (discovery ))
@@ -90,45 +67,10 @@ public static IdentificationAnalysis analyzeIdentifiers(Discovery metadata) {
9067 .setParentIdentifierExists (parentIdInfo .exists )
9168 .setParentIdentifierString (parentIdInfo .value )
9269 .setHierarchyLevelNameExists (hierarchyInfo .exists )
93- .setMatchesIdentifiers (( hierarchyInfo .exists && hierarchyInfo .value .equals ("granule" ) && parentIdInfo . exists ) || ( hierarchyInfo . exists && ! hierarchyInfo . value . equals ( "granule" )) || ! hierarchyInfo .exists )
70+ .setIsGranule ( hierarchyInfo .exists && hierarchyInfo .value .equals ("granule" ) && parentIdInfo .exists )
9471 .build ();
9572 }
9673
97- public static TemporalBoundingAnalysis analyzeTemporalBounding (Discovery metadata ) {
98- TemporalBoundingAnalysis .Builder builder = TemporalBoundingAnalysis .newBuilder ();
99-
100- if (metadata != null && metadata .getTemporalBounding () != null ) {
101- // Gather info
102- DateInfo beginInfo = new DateInfo (metadata .getTemporalBounding ().getBeginDate (), true );
103- DateInfo endInfo = new DateInfo (metadata .getTemporalBounding ().getEndDate (), false );
104- DateInfo instantInfo = new DateInfo (metadata .getTemporalBounding ().getInstant (), true );
105- TimeRangeDescriptor rangeDescriptor = rangeDescriptor (beginInfo , endInfo , instantInfo );
106-
107- // Build
108- builder .setBeginDescriptor (beginInfo .descriptor );
109- builder .setBeginPrecision (beginInfo .precision );
110- builder .setBeginIndexable (beginInfo .indexable );
111- builder .setBeginZoneSpecified (beginInfo .zoneSpecified );
112- builder .setBeginUtcDateTimeString (beginInfo .utcDateTimeString );
113-
114- builder .setEndDescriptor (endInfo .descriptor );
115- builder .setEndPrecision (endInfo .precision );
116- builder .setEndIndexable (endInfo .indexable );
117- builder .setEndZoneSpecified (endInfo .zoneSpecified );
118- builder .setEndUtcDateTimeString (endInfo .utcDateTimeString );
119-
120- builder .setInstantDescriptor (instantInfo .descriptor );
121- builder .setInstantPrecision (instantInfo .precision );
122- builder .setInstantIndexable (instantInfo .indexable );
123- builder .setInstantZoneSpecified (instantInfo .zoneSpecified );
124- builder .setInstantUtcDateTimeString (instantInfo .utcDateTimeString );
125-
126- builder .setRangeDescriptor (rangeDescriptor );
127- }
128-
129- return builder .build ();
130- }
131-
13274 public static SpatialBoundingAnalysis analyzeSpatialBounding (Discovery metadata ) {
13375 SpatialBoundingAnalysis .Builder builder = SpatialBoundingAnalysis .newBuilder ();
13476 if (metadata != null ) {
@@ -203,190 +145,4 @@ public StringInfo(String input) {
203145 }
204146 }
205147
206- static class DateInfo {
207- public final ValidDescriptor descriptor ;
208- public final String precision ;
209- public final boolean indexable ;
210- public final String zoneSpecified ;
211- public final String utcDateTimeString ;
212-
213- public DateInfo (String dateString , boolean start ) {
214- if (dateString == null || dateString .length () == 0 ) {
215- descriptor = ValidDescriptor .UNDEFINED ;
216- precision = null ;
217- indexable = true ;
218- zoneSpecified = null ;
219- utcDateTimeString = null ;
220- return ;
221- }
222-
223- Long longDate = parseLong (dateString );
224- TemporalAccessor parsedDate = parseDate (dateString );
225- if (longDate != null && !indexable (longDate )) {
226- descriptor = ValidDescriptor .VALID ;
227- precision = precision (longDate );
228- indexable = indexable (longDate );
229- zoneSpecified = timezone (longDate );
230- utcDateTimeString = utcDateTimeString (longDate , start );
231- }
232- else if (parsedDate != null ) {
233- descriptor = ValidDescriptor .VALID ;
234- precision = precision (parsedDate );
235- indexable = indexable (parsedDate );
236- zoneSpecified = timezone (parsedDate );
237- utcDateTimeString = utcDateTimeString (parsedDate , start );
238- }
239- else {
240- descriptor = ValidDescriptor .INVALID ;
241- precision = null ;
242- indexable = false ;
243- zoneSpecified = null ;
244- utcDateTimeString = null ;
245- }
246- }
247-
248- private static Long parseLong (String number ) {
249- try {
250- return Long .parseLong (number );
251- } catch (Exception e ) {
252- return null ;
253- }
254- }
255-
256- private static TemporalAccessor parseDate (String date ) {
257- try {
258- return PARSE_DATE_FORMATTER .parseBest (
259- date ,
260- ZonedDateTime ::from ,
261- LocalDateTime ::from ,
262- LocalDate ::from ,
263- YearMonth ::from ,
264- Year ::from );
265- } catch (Exception e ) {
266- return null ;
267- }
268- }
269- }
270-
271- static boolean indexable (Long year ) {
272- // Year must be in the range [-292_275_055, 292_278_994] in order to be parsed as a date by ES (Joda time magic number). However,
273- // this number is a bit arbitrary, and prone to change when ES switches to the Java time library (minimum supported year
274- // being -999,999,999). We will limit the year ourselves instead to -100,000,000 -- since this is a fairly safe bet for
275- // supportability across many date libraries if the utcDateTime ends up used as is by a downstream app.
276- return year >= -100_000_000L ;
277- }
278-
279- static boolean indexable (TemporalAccessor date ) {
280- return true ; // if it's a parsable accessor, it's indexable
281- }
282-
283- static String precision (Long year ) {
284- return ChronoUnit .YEARS .toString ();
285- }
286-
287- static String precision (TemporalAccessor date ) {
288- if (date == null ) {
289- return null ;
290- }
291- return date .query (TemporalQueries .precision ()).toString ();
292- }
293-
294- static String timezone (Object date ) {
295- return date instanceof ZonedDateTime ? ((ZonedDateTime ) date ).getOffset ().toString () : null ;
296- }
297-
298- static String utcDateTimeString (TemporalAccessor parsedDate , boolean start ) {
299- if (parsedDate == null ) {
300- return null ;
301- }
302-
303- if (parsedDate instanceof Year ) {
304- LocalDateTime yearDate = start ?
305- ((Year ) parsedDate ).atMonth (1 ).atDay (1 ).atStartOfDay () :
306- ((Year ) parsedDate ).atMonth (12 ).atEndOfMonth ().atTime (23 , 59 , 59 , 999000000 );
307- return DateTimeFormatter .ISO_ZONED_DATE_TIME .format (yearDate .atZone (ZoneOffset .UTC ));
308- }
309- if (parsedDate instanceof YearMonth ) {
310- LocalDateTime yearMonthDate = start ?
311- ((YearMonth ) parsedDate ).atDay (1 ).atStartOfDay () :
312- ((YearMonth ) parsedDate ).atEndOfMonth ().atTime (23 , 59 , 59 , 999000000 );
313- return DateTimeFormatter .ISO_ZONED_DATE_TIME .format ((yearMonthDate .atZone (ZoneOffset .UTC )));
314- }
315- if (parsedDate instanceof LocalDate ) {
316- LocalDateTime localDate = start ?
317- ((LocalDate ) parsedDate ).atStartOfDay () :
318- ((LocalDate ) parsedDate ).atTime (23 , 59 , 59 , 999000000 );
319- return DateTimeFormatter .ISO_ZONED_DATE_TIME .format (localDate .atZone (ZoneOffset .UTC ));
320- }
321- if (parsedDate instanceof LocalDateTime ) {
322- return DateTimeFormatter .ISO_ZONED_DATE_TIME .format (((LocalDateTime ) parsedDate ).atZone (ZoneOffset .UTC ));
323- }
324- if (parsedDate instanceof ZonedDateTime ) {
325- return DateTimeFormatter .ISO_ZONED_DATE_TIME .format (((ZonedDateTime ) parsedDate ).withZoneSameInstant (ZoneOffset .UTC ));
326- }
327-
328- return null ;
329- }
330-
331- static String utcDateTimeString (Long year , boolean start ) {
332- return start ? year .toString () + "-01-01T00:00:00Z" : year .toString () + "-12-31T23:59:59.999Z" ;
333- }
334-
335- static TimeRangeDescriptor rangeDescriptor (DateInfo beginInfo , DateInfo endInfo , DateInfo instantInfo ) {
336- ValidDescriptor begin = beginInfo .descriptor ;
337- ValidDescriptor end = endInfo .descriptor ;
338- ValidDescriptor instant = instantInfo .descriptor ;
339-
340- if (begin == ValidDescriptor .VALID &&
341- end == ValidDescriptor .VALID &&
342- instant == ValidDescriptor .UNDEFINED ) {
343- Boolean inOrder = beginLTEEnd (beginInfo , endInfo );
344- return inOrder == null ? INVALID : inOrder ? BOUNDED : BACKWARDS ;
345- }
346- if (begin == ValidDescriptor .VALID &&
347- end == ValidDescriptor .UNDEFINED &&
348- instant == ValidDescriptor .UNDEFINED ) {
349- return ONGOING ;
350- }
351- if (begin == ValidDescriptor .UNDEFINED &&
352- end == ValidDescriptor .UNDEFINED &&
353- instant == ValidDescriptor .VALID ) {
354- return INSTANT ;
355- }
356- if (begin == ValidDescriptor .UNDEFINED &&
357- end == ValidDescriptor .UNDEFINED &&
358- instant == ValidDescriptor .UNDEFINED ) {
359- return UNDEFINED ;
360- }
361-
362- return INVALID ;
363- }
364-
365- static Boolean beginLTEEnd (DateInfo beginInfo , DateInfo endInfo ) {
366- boolean beginIndexable = beginInfo .indexable ;
367- boolean endIndexable = endInfo .indexable ;
368- boolean beginIsYears = beginInfo .precision .equals (ChronoUnit .YEARS .toString ());
369- boolean endIsYears = endInfo .precision .equals (ChronoUnit .YEARS .toString ());
370-
371- if (beginIndexable && endIndexable ) {
372- // Compare actual dates with UTC string
373- ZonedDateTime beginDate = ZonedDateTime .parse (beginInfo .utcDateTimeString );
374- ZonedDateTime endDate = ZonedDateTime .parse (endInfo .utcDateTimeString );
375- return beginDate .isBefore (endDate ) || beginDate .isEqual (endDate );
376- }
377- else if ((beginIsYears && endIsYears ) || (beginIsYears && endIndexable ) || (beginIndexable && endIsYears )) {
378- // Compare years only as longs; parse both as string objects since both may not be just a long.
379- // Watch out for negative years...
380- String beginYearText = beginInfo .utcDateTimeString .substring (0 , beginInfo .utcDateTimeString .indexOf ('-' , 1 ));
381- String endYearText = endInfo .utcDateTimeString .substring (0 , endInfo .utcDateTimeString .indexOf ('-' , 1 ));
382- Long beginYear = Long .parseLong (beginYearText );
383- Long endYear = Long .parseLong (endYearText );
384- return beginYear <= endYear ;
385- }
386- else {
387- // One or both has an INVALID search format that is not just due to a paleo year
388- return null ;
389- }
390- }
391-
392148}
0 commit comments