Skip to content

Commit 5fd41df

Browse files
authored
Extend ISO8601 parser to specify forbidden fields, allowing it to be used on more formats (#108606)
Use the new ISO8601 parser for `strict_year`, `strict_year_month`, `strict_date_time`, `strict_date_time_no_millis`, `strict_date_hour_minute_second`, `strict_date_hour_minute_second_millis`, and `strict_date_hour_minute_second_fraction` date formats.
1 parent 851e955 commit 5fd41df

File tree

10 files changed

+611
-156
lines changed

10 files changed

+611
-156
lines changed

docs/changelog/108606.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
pr: 108606
2+
summary: "Extend ISO8601 datetime parser to specify forbidden fields, allowing it to be used\
3+
\ on more formats"
4+
area: Infra/Core
5+
type: enhancement
6+
issues: []
7+
highlight:
8+
title: New custom parser for more ISO-8601 date formats
9+
body: |-
10+
Following on from #106486, this extends the custom ISO-8601 datetime parser to cover the `strict_year`,
11+
`strict_year_month`, `strict_date_time`, `strict_date_time_no_millis`, `strict_date_hour_minute_second`,
12+
`strict_date_hour_minute_second_millis`, and `strict_date_hour_minute_second_fraction` date formats.
13+
As before, the parser will use the existing java.time parser if there are parsing issues, and the
14+
`es.datetime.java_time_parsers=true` JVM property will force the use of the old parsers regardless.

server/src/main/java/org/elasticsearch/common/time/DateFormatters.java

Lines changed: 217 additions & 74 deletions
Large diffs are not rendered by default.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.common.time;
10+
11+
enum DecimalSeparator {
12+
DOT,
13+
COMMA,
14+
BOTH
15+
}

server/src/main/java/org/elasticsearch/common/time/Iso8601DateTimeParser.java

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,14 @@ class Iso8601DateTimeParser implements DateTimeParser {
2424
// and we already account for . or , in decimals
2525
private final Locale locale;
2626

27-
Iso8601DateTimeParser(Set<ChronoField> mandatoryFields, boolean optionalTime) {
28-
parser = new Iso8601Parser(mandatoryFields, optionalTime, Map.of());
27+
Iso8601DateTimeParser(
28+
Set<ChronoField> mandatoryFields,
29+
boolean optionalTime,
30+
ChronoField maxAllowedField,
31+
DecimalSeparator decimalSeparator,
32+
TimezonePresence timezonePresence
33+
) {
34+
parser = new Iso8601Parser(mandatoryFields, optionalTime, maxAllowedField, decimalSeparator, timezonePresence, Map.of());
2935
timezone = null;
3036
locale = null;
3137
}
@@ -57,7 +63,18 @@ public DateTimeParser withLocale(Locale locale) {
5763
}
5864

5965
Iso8601DateTimeParser withDefaults(Map<ChronoField, Integer> defaults) {
60-
return new Iso8601DateTimeParser(new Iso8601Parser(parser.mandatoryFields(), parser.optionalTime(), defaults), timezone, locale);
66+
return new Iso8601DateTimeParser(
67+
new Iso8601Parser(
68+
parser.mandatoryFields(),
69+
parser.optionalTime(),
70+
parser.maxAllowedField(),
71+
parser.decimalSeparator(),
72+
parser.timezonePresence(),
73+
defaults
74+
),
75+
timezone,
76+
locale
77+
);
6178
}
6279

6380
@Override

server/src/main/java/org/elasticsearch/common/time/Iso8601Parser.java

Lines changed: 83 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,18 @@
1313
import java.time.DateTimeException;
1414
import java.time.ZoneId;
1515
import java.time.ZoneOffset;
16+
import java.time.format.DateTimeFormatter;
1617
import java.time.temporal.ChronoField;
1718
import java.util.EnumMap;
1819
import java.util.EnumSet;
1920
import java.util.Map;
21+
import java.util.Objects;
2022
import java.util.Set;
2123

2224
/**
2325
* Parses datetimes in ISO8601 format (and subsequences thereof).
2426
* <p>
25-
* This is faster than the generic parsing in {@link java.time.format.DateTimeFormatter}, as this is hard-coded and specific to ISO-8601.
27+
* This is faster than the generic parsing in {@link DateTimeFormatter}, as this is hard-coded and specific to ISO-8601.
2628
* Various public libraries provide their own variant of this mechanism. We use our own for a few reasons:
2729
* <ul>
2830
* <li>
@@ -37,13 +39,14 @@
3739
*/
3840
class Iso8601Parser {
3941

40-
private static final Set<ChronoField> VALID_MANDATORY_FIELDS = EnumSet.of(
42+
private static final Set<ChronoField> VALID_SPECIFIED_FIELDS = EnumSet.of(
4143
ChronoField.YEAR,
4244
ChronoField.MONTH_OF_YEAR,
4345
ChronoField.DAY_OF_MONTH,
4446
ChronoField.HOUR_OF_DAY,
4547
ChronoField.MINUTE_OF_HOUR,
46-
ChronoField.SECOND_OF_MINUTE
48+
ChronoField.SECOND_OF_MINUTE,
49+
ChronoField.NANO_OF_SECOND
4750
);
4851

4952
private static final Set<ChronoField> VALID_DEFAULT_FIELDS = EnumSet.of(
@@ -57,31 +60,51 @@ class Iso8601Parser {
5760

5861
private final Set<ChronoField> mandatoryFields;
5962
private final boolean optionalTime;
63+
@Nullable
64+
private final ChronoField maxAllowedField;
65+
private final DecimalSeparator decimalSeparator;
66+
private final TimezonePresence timezonePresence;
6067
private final Map<ChronoField, Integer> defaults;
6168

6269
/**
6370
* Constructs a new {@code Iso8601Parser} object
6471
*
65-
* @param mandatoryFields
66-
* The set of fields that must be present for a valid parse. These should be specified in field order
67-
* (eg if {@link ChronoField#DAY_OF_MONTH} is specified, {@link ChronoField#MONTH_OF_YEAR} should also be specified).
68-
* {@link ChronoField#YEAR} is always mandatory.
69-
* @param optionalTime
70-
* {@code false} if the presence of time fields follows {@code mandatoryFields},
71-
* {@code true} if a time component is always optional, despite the presence of time fields in {@code mandatoryFields}.
72-
* This makes it possible to specify 'time is optional, but if it is present, it must have these fields'
73-
* by settings {@code optionalTime = true} and putting time fields such as {@link ChronoField#HOUR_OF_DAY}
74-
* and {@link ChronoField#MINUTE_OF_HOUR} in {@code mandatoryFields}.
75-
* @param defaults
76-
* Map of default field values, if they are not present in the parsed string.
72+
* @param mandatoryFields The set of fields that must be present for a valid parse. These should be specified in field order
73+
* (eg if {@link ChronoField#DAY_OF_MONTH} is specified,
74+
* {@link ChronoField#MONTH_OF_YEAR} should also be specified).
75+
* {@link ChronoField#YEAR} is always mandatory.
76+
* @param optionalTime {@code false} if the presence of time fields follows {@code mandatoryFields},
77+
* {@code true} if a time component is always optional,
78+
* despite the presence of time fields in {@code mandatoryFields}.
79+
* This makes it possible to specify 'time is optional, but if it is present, it must have these fields'
80+
* by settings {@code optionalTime = true} and putting time fields such as {@link ChronoField#HOUR_OF_DAY}
81+
* and {@link ChronoField#MINUTE_OF_HOUR} in {@code mandatoryFields}.
82+
* @param maxAllowedField The most-specific field allowed in the parsed string,
83+
* or {@code null} if everything up to nanoseconds is allowed.
84+
* @param decimalSeparator The decimal separator that is allowed.
85+
* @param timezonePresence Specifies if the timezone is optional, mandatory, or forbidden.
86+
* @param defaults Map of default field values, if they are not present in the parsed string.
7787
*/
78-
Iso8601Parser(Set<ChronoField> mandatoryFields, boolean optionalTime, Map<ChronoField, Integer> defaults) {
79-
checkChronoFields(mandatoryFields, VALID_MANDATORY_FIELDS);
88+
Iso8601Parser(
89+
Set<ChronoField> mandatoryFields,
90+
boolean optionalTime,
91+
@Nullable ChronoField maxAllowedField,
92+
DecimalSeparator decimalSeparator,
93+
TimezonePresence timezonePresence,
94+
Map<ChronoField, Integer> defaults
95+
) {
96+
checkChronoFields(mandatoryFields, VALID_SPECIFIED_FIELDS);
97+
if (maxAllowedField != null && VALID_SPECIFIED_FIELDS.contains(maxAllowedField) == false) {
98+
throw new IllegalArgumentException("Invalid chrono field specified " + maxAllowedField);
99+
}
80100
checkChronoFields(defaults.keySet(), VALID_DEFAULT_FIELDS);
81101

82102
this.mandatoryFields = EnumSet.of(ChronoField.YEAR); // year is always mandatory
83103
this.mandatoryFields.addAll(mandatoryFields);
84104
this.optionalTime = optionalTime;
105+
this.maxAllowedField = maxAllowedField;
106+
this.decimalSeparator = Objects.requireNonNull(decimalSeparator);
107+
this.timezonePresence = Objects.requireNonNull(timezonePresence);
85108
this.defaults = defaults.isEmpty() ? Map.of() : new EnumMap<>(defaults);
86109
}
87110

@@ -103,6 +126,18 @@ Set<ChronoField> mandatoryFields() {
103126
return mandatoryFields;
104127
}
105128

129+
ChronoField maxAllowedField() {
130+
return maxAllowedField;
131+
}
132+
133+
DecimalSeparator decimalSeparator() {
134+
return decimalSeparator;
135+
}
136+
137+
TimezonePresence timezonePresence() {
138+
return timezonePresence;
139+
}
140+
106141
private boolean isOptional(ChronoField field) {
107142
return mandatoryFields.contains(field) == false;
108143
}
@@ -186,7 +221,7 @@ private ParseResult parse(CharSequence str, @Nullable ZoneId defaultTimezone) {
186221
: ParseResult.error(4);
187222
}
188223

189-
if (str.charAt(4) != '-') return ParseResult.error(4);
224+
if (str.charAt(4) != '-' || maxAllowedField == ChronoField.YEAR) return ParseResult.error(4);
190225

191226
// MONTHS
192227
Integer months = parseInt(str, 5, 7);
@@ -208,7 +243,7 @@ private ParseResult parse(CharSequence str, @Nullable ZoneId defaultTimezone) {
208243
: ParseResult.error(7);
209244
}
210245

211-
if (str.charAt(7) != '-') return ParseResult.error(7);
246+
if (str.charAt(7) != '-' || maxAllowedField == ChronoField.MONTH_OF_YEAR) return ParseResult.error(7);
212247

213248
// DAYS
214249
Integer days = parseInt(str, 8, 10);
@@ -230,7 +265,7 @@ private ParseResult parse(CharSequence str, @Nullable ZoneId defaultTimezone) {
230265
: ParseResult.error(10);
231266
}
232267

233-
if (str.charAt(10) != 'T') return ParseResult.error(10);
268+
if (str.charAt(10) != 'T' || maxAllowedField == ChronoField.DAY_OF_MONTH) return ParseResult.error(10);
234269
if (len == 11) {
235270
return isOptional(ChronoField.HOUR_OF_DAY)
236271
? new ParseResult(
@@ -252,7 +287,7 @@ private ParseResult parse(CharSequence str, @Nullable ZoneId defaultTimezone) {
252287
Integer hours = parseInt(str, 11, 13);
253288
if (hours == null || hours > 23) return ParseResult.error(11);
254289
if (len == 13) {
255-
return isOptional(ChronoField.MINUTE_OF_HOUR)
290+
return isOptional(ChronoField.MINUTE_OF_HOUR) && timezonePresence != TimezonePresence.MANDATORY
256291
? new ParseResult(
257292
withZoneOffset(
258293
years,
@@ -285,13 +320,13 @@ private ParseResult parse(CharSequence str, @Nullable ZoneId defaultTimezone) {
285320
: ParseResult.error(13);
286321
}
287322

288-
if (str.charAt(13) != ':') return ParseResult.error(13);
323+
if (str.charAt(13) != ':' || maxAllowedField == ChronoField.HOUR_OF_DAY) return ParseResult.error(13);
289324

290325
// MINUTES + timezone
291326
Integer minutes = parseInt(str, 14, 16);
292327
if (minutes == null || minutes > 59) return ParseResult.error(14);
293328
if (len == 16) {
294-
return isOptional(ChronoField.SECOND_OF_MINUTE)
329+
return isOptional(ChronoField.SECOND_OF_MINUTE) && timezonePresence != TimezonePresence.MANDATORY
295330
? new ParseResult(
296331
withZoneOffset(
297332
years,
@@ -324,15 +359,17 @@ private ParseResult parse(CharSequence str, @Nullable ZoneId defaultTimezone) {
324359
: ParseResult.error(16);
325360
}
326361

327-
if (str.charAt(16) != ':') return ParseResult.error(16);
362+
if (str.charAt(16) != ':' || maxAllowedField == ChronoField.MINUTE_OF_HOUR) return ParseResult.error(16);
328363

329364
// SECONDS + timezone
330365
Integer seconds = parseInt(str, 17, 19);
331366
if (seconds == null || seconds > 59) return ParseResult.error(17);
332367
if (len == 19) {
333-
return new ParseResult(
334-
withZoneOffset(years, months, days, hours, minutes, seconds, defaultZero(ChronoField.NANO_OF_SECOND), defaultTimezone)
335-
);
368+
return isOptional(ChronoField.NANO_OF_SECOND) && timezonePresence != TimezonePresence.MANDATORY
369+
? new ParseResult(
370+
withZoneOffset(years, months, days, hours, minutes, seconds, defaultZero(ChronoField.NANO_OF_SECOND), defaultTimezone)
371+
)
372+
: ParseResult.error(19);
336373
}
337374
if (isZoneId(str, 19)) {
338375
ZoneId timezone = parseZoneId(str, 19);
@@ -343,11 +380,9 @@ private ParseResult parse(CharSequence str, @Nullable ZoneId defaultTimezone) {
343380
: ParseResult.error(19);
344381
}
345382

346-
char decSeparator = str.charAt(19);
347-
if (decSeparator != '.' && decSeparator != ',') return ParseResult.error(19);
383+
if (checkDecimalSeparator(str.charAt(19)) == false || maxAllowedField == ChronoField.SECOND_OF_MINUTE) return ParseResult.error(19);
348384

349385
// NANOS + timezone
350-
// nanos are always optional
351386
// the last number could be millis or nanos, or any combination in the middle
352387
// so we keep parsing numbers until we get to not a number
353388
int nanos = 0;
@@ -364,7 +399,9 @@ private ParseResult parse(CharSequence str, @Nullable ZoneId defaultTimezone) {
364399
nanos *= NANO_MULTIPLICANDS[29 - pos];
365400

366401
if (len == pos) {
367-
return new ParseResult(withZoneOffset(years, months, days, hours, minutes, seconds, nanos, defaultTimezone));
402+
return timezonePresence != TimezonePresence.MANDATORY
403+
? new ParseResult(withZoneOffset(years, months, days, hours, minutes, seconds, nanos, defaultTimezone))
404+
: ParseResult.error(pos);
368405
}
369406
if (isZoneId(str, pos)) {
370407
ZoneId timezone = parseZoneId(str, pos);
@@ -377,6 +414,16 @@ private ParseResult parse(CharSequence str, @Nullable ZoneId defaultTimezone) {
377414
return ParseResult.error(pos);
378415
}
379416

417+
private boolean checkDecimalSeparator(char separator) {
418+
boolean isDot = separator == '.';
419+
boolean isComma = separator == ',';
420+
return switch (decimalSeparator) {
421+
case DOT -> isDot;
422+
case COMMA -> isComma;
423+
case BOTH -> isDot || isComma;
424+
};
425+
}
426+
380427
private static boolean isZoneId(CharSequence str, int pos) {
381428
// all region zoneIds must start with [A-Za-z] (see ZoneId#of)
382429
// this also covers Z and UT/UTC/GMT zone variants
@@ -385,10 +432,14 @@ private static boolean isZoneId(CharSequence str, int pos) {
385432
}
386433

387434
/**
388-
* This parses the zone offset, which is of the format accepted by {@link java.time.ZoneId#of(String)}.
435+
* This parses the zone offset, which is of the format accepted by {@link ZoneId#of(String)}.
389436
* It has fast paths for numerical offsets, but falls back on {@code ZoneId.of} for non-trivial zone ids.
390437
*/
391438
private ZoneId parseZoneId(CharSequence str, int pos) {
439+
if (timezonePresence == TimezonePresence.FORBIDDEN) {
440+
return null;
441+
}
442+
392443
int len = str.length();
393444
char first = str.charAt(pos);
394445

server/src/main/java/org/elasticsearch/common/time/JavaDateFormatter.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
import java.time.temporal.TemporalAccessor;
1919
import java.util.ArrayList;
2020
import java.util.Arrays;
21-
import java.util.Collections;
2221
import java.util.List;
2322
import java.util.Locale;
2423
import java.util.Map;
@@ -149,19 +148,24 @@ static DateFormatter combined(String input, List<DateFormatter> formatters) {
149148
assert formatters.isEmpty() == false;
150149

151150
DateTimePrinter printer = null;
152-
List<DateTimeParser> parsers = new ArrayList<>(formatters.size());
153-
List<DateTimeParser> roundUpParsers = new ArrayList<>(formatters.size());
151+
List<DateTimeParser[]> parsers = new ArrayList<>(formatters.size());
152+
List<DateTimeParser[]> roundUpParsers = new ArrayList<>(formatters.size());
154153

155154
for (DateFormatter formatter : formatters) {
156155
JavaDateFormatter javaDateFormatter = (JavaDateFormatter) formatter;
157156
if (printer == null) {
158157
printer = javaDateFormatter.printer;
159158
}
160-
Collections.addAll(parsers, javaDateFormatter.parsers);
161-
Collections.addAll(roundUpParsers, javaDateFormatter.roundupParsers);
159+
parsers.add(javaDateFormatter.parsers);
160+
roundUpParsers.add(javaDateFormatter.roundupParsers);
162161
}
163162

164-
return new JavaDateFormatter(input, printer, roundUpParsers.toArray(DateTimeParser[]::new), parsers.toArray(DateTimeParser[]::new));
163+
return new JavaDateFormatter(
164+
input,
165+
printer,
166+
roundUpParsers.stream().flatMap(Arrays::stream).toArray(DateTimeParser[]::new),
167+
parsers.stream().flatMap(Arrays::stream).toArray(DateTimeParser[]::new)
168+
);
165169
}
166170

167171
private JavaDateFormatter(String format, DateTimePrinter printer, DateTimeParser[] roundupParsers, DateTimeParser[] parsers) {
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.common.time;
10+
11+
enum TimezonePresence {
12+
OPTIONAL,
13+
MANDATORY,
14+
FORBIDDEN
15+
}

0 commit comments

Comments
 (0)