Skip to content

Commit fd5a708

Browse files
authored
[6.8] Week based parsing for ingest date processor backport(#58597) (#58805)
Date processor was incorrectly parsing week based dates because when a weekbased year was provided ingest module was thinking year field was not present on a date and was trying to applying the logic for dd/MM type of dates. Date Processor is also allowing users to specify locale parameter. It should be taken into account when parsing dates - currently only used for formatting. If someone specifies 'en-us' locale, then calendar data rules for that locale should be used. The iso8601 ingest format is still using joda implementation closes #58479
1 parent 138e3c7 commit fd5a708

File tree

6 files changed

+296
-23
lines changed

6 files changed

+296
-23
lines changed

docs/reference/release-notes/6.8.asciidoc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ This issue is fixed in Elasticsearch 6.8.10 and 7.7.1.
3434

3535
Also see <<breaking-changes-6.8,Breaking changes in 6.8>>.
3636

37+
* Java based - formats with '8' prefix - week based parsing and calculations are using JDK default calendar data provider which is Sunday,1.
38+
Sunday is considered first day of a week and it requires only 1 day in a week to for the first week of the year.
39+
It can be worked around by using locale which is based on ISO8601 rule (Monday,4) - for instance en-GB
40+
This issue is fixed in Elasticsearch 7.7 https://github.com/elastic/elasticsearch/pull/48209
41+
3742
[[enhancement-6.8.9]]
3843
[float]
3944
=== Enhancements

modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/DateFormat.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import java.time.ZonedDateTime;
3434
import java.time.temporal.ChronoField;
3535
import java.time.temporal.TemporalAccessor;
36+
import java.time.temporal.WeekFields;
3637
import java.util.Arrays;
3738
import java.util.List;
3839
import java.util.Locale;
@@ -101,7 +102,10 @@ Function<String, DateTime> getFunction(String format, DateTimeZone timezone, Loc
101102
TemporalAccessor accessor = formatter.parse(text);
102103
// if there is no year, we fall back to the current one and
103104
// fill the rest of the date up with the parsed date
104-
if (accessor.isSupported(ChronoField.YEAR) == false) {
105+
if (accessor.isSupported(ChronoField.YEAR) == false
106+
&& accessor.isSupported(ChronoField.YEAR_OF_ERA) == false
107+
&& accessor.isSupported(WeekFields.of(locale).weekOfWeekBasedYear()) == false) {
108+
105109
ZonedDateTime newTime = Instant.EPOCH.atZone(ZoneOffset.UTC).withYear(year);
106110
for (ChronoField field : FIELDS) {
107111
if (accessor.isSupported(field)) {
@@ -112,7 +116,7 @@ Function<String, DateTime> getFunction(String format, DateTimeZone timezone, Loc
112116
accessor = newTime.withZoneSameLocal(DateUtils.dateTimeZoneToZoneId(timezone));
113117
}
114118

115-
long millis = DateFormatters.from(accessor).toInstant().toEpochMilli();
119+
long millis = DateFormatters.from(accessor, locale).toInstant().toEpochMilli();
116120
return new DateTime(millis, timezone);
117121
};
118122
} else {

modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/DateFormatTests.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,32 @@ public void testParseDefaultYearBackwardsCompatible() {
7777
assertThat(nextYear, is(jodaDateTime.withZone(DateTimeZone.UTC).getYear()));
7878
}
7979

80+
public void testParseWeekBased() {
81+
String format = "8YYYY-ww";
82+
ZoneId zoneId = ZoneId.of("Europe/Amsterdam");
83+
DateTimeZone timezone = DateUtils.zoneIdToDateTimeZone(zoneId);
84+
85+
Function<String, DateTime> javaFunction = DateFormat.Java.getFunction(format, timezone, Locale.ROOT);
86+
DateTime dateTime = javaFunction.apply("2020-33");
87+
assertThat(dateTime, equalTo(new DateTime(2020,8,9,0,0,0,0,timezone)));
88+
}
89+
90+
public void testParseWeekBasedWithLocale() {
91+
String format = "8YYYY-ww";
92+
ZoneId zoneId = ZoneId.of("Europe/Amsterdam");
93+
DateTimeZone timezone = DateUtils.zoneIdToDateTimeZone(zoneId);
94+
95+
Function<String, DateTime> javaFunctionUS = DateFormat.Java.getFunction(format, timezone, Locale.US);
96+
DateTime dateTime = javaFunctionUS.apply("2020-33");
97+
//33rd week of 2020 starts on 9th August 2020 as per US locale
98+
assertThat(dateTime, equalTo(new DateTime(2020,8,9,0,0,0,0,timezone)));
99+
100+
Function<String, DateTime> javaFunctionUK = DateFormat.Java.getFunction(format, timezone, Locale.UK);
101+
dateTime = javaFunctionUK.apply("2020-33");
102+
//33rd week of 2020 starts on 10th August 2020 as per UK locale
103+
assertThat(dateTime, equalTo(new DateTime(2020,8,10,0,0,0,0,timezone)));
104+
}
105+
80106
public void testParseUnixMs() {
81107
assertThat(DateFormat.UnixMs.getFunction(null, DateTimeZone.UTC, null).apply("1000500").getMillis(), equalTo(1000500L));
82108
}

modules/ingest-common/src/test/resources/rest-api-spec/test/ingest/30_date_processor.yml

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,208 @@ teardown:
4242
- match: { _source.date_source_field: "12/06/2010" }
4343
- match: { _source.date_target_field: "2010-06-12T00:00:00.000+02:00" }
4444

45+
---
46+
"Test week based date parsing with default locale - Sunday,1 rule":
47+
- do:
48+
indices.create:
49+
index: test
50+
body:
51+
mappings:
52+
test:
53+
properties:
54+
date_source_field:
55+
type: date
56+
format: 8YYYY-ww
57+
58+
- do:
59+
ingest.put_pipeline:
60+
id: "my_pipeline"
61+
body: >
62+
{
63+
"description": "_description",
64+
"processors": [
65+
{
66+
"date" : {
67+
"field" : "date_source_field",
68+
"target_field" : "date_target_field",
69+
"formats" : ["8YYYY-ww"]
70+
}
71+
}
72+
]
73+
}
74+
- match: { acknowledged: true }
75+
76+
- do:
77+
ingest.simulate:
78+
id: "my_pipeline"
79+
body: >
80+
{
81+
"docs": [
82+
{
83+
"_source": {
84+
"date_source_field": "2020-33"
85+
}
86+
}
87+
]
88+
}
89+
- length: { docs: 1 }
90+
- match: { docs.0.doc._source.date_source_field: "2020-33" }
91+
#we don't have a support for changing calendar data provider in 6.8. Hence it is using Sunday,1 (java default)
92+
- match: { docs.0.doc._source.date_target_field: "2020-08-09T00:00:00.000Z" }
93+
- length: { docs.0.doc._ingest: 1 }
94+
- is_true: docs.0.doc._ingest.timestamp
95+
96+
- do:
97+
index:
98+
index: test
99+
type: test
100+
id: 1
101+
pipeline: "my_pipeline"
102+
body: {date_source_field: "2020-33"}
103+
104+
- do:
105+
get:
106+
index: test
107+
type: test
108+
id: 1
109+
- match: { _source.date_source_field: "2020-33" }
110+
#we don't have a support for changing calendar data provider in 6.8. Hence it is using Sunday,1 (java default)
111+
- match: { _source.date_target_field: "2020-08-09T00:00:00.000Z" }
112+
113+
---
114+
"Test week based date parsing with ISO based locale - Monday,4 rule":
115+
- do:
116+
indices.create:
117+
index: test
118+
body:
119+
mappings:
120+
test:
121+
properties:
122+
date_source_field:
123+
type: date
124+
format: 8YYYY-ww
125+
locale: en-GB
126+
127+
- do:
128+
ingest.put_pipeline:
129+
id: "my_pipeline"
130+
body: >
131+
{
132+
"description": "_description",
133+
"processors": [
134+
{
135+
"date" : {
136+
"field" : "date_source_field",
137+
"target_field" : "date_target_field",
138+
"formats" : ["8YYYY-ww"],
139+
"locale" : "en-GB"
140+
}
141+
}
142+
]
143+
}
144+
- match: { acknowledged: true }
145+
146+
- do:
147+
ingest.simulate:
148+
id: "my_pipeline"
149+
body: >
150+
{
151+
"docs": [
152+
{
153+
"_source": {
154+
"date_source_field": "2020-33"
155+
}
156+
}
157+
]
158+
}
159+
- length: { docs: 1 }
160+
- match: { docs.0.doc._source.date_source_field: "2020-33" }
161+
- match: { docs.0.doc._source.date_target_field: "2020-08-10T00:00:00.000Z" }
162+
- length: { docs.0.doc._ingest: 1 }
163+
- is_true: docs.0.doc._ingest.timestamp
164+
165+
- do:
166+
index:
167+
index: test
168+
type: test
169+
id: 1
170+
pipeline: "my_pipeline"
171+
body: {date_source_field: "2020-33"}
172+
173+
- do:
174+
get:
175+
index: test
176+
type: test
177+
id: 1
178+
- match: { _source.date_source_field: "2020-33" }
179+
- match: { _source.date_target_field: "2020-08-10T00:00:00.000Z" }
180+
181+
182+
---
183+
"Test week based date parsing with locale":
184+
#locale is used when parsing as well on a pipeline. As per US locale, start of the 33rd week 2020 is on 09August2020 (sunday)
185+
- do:
186+
indices.create:
187+
index: test
188+
body:
189+
mappings:
190+
test:
191+
properties:
192+
date_source_field:
193+
type: date
194+
format: 8YYYY-ww
195+
locale: en-US
196+
197+
- do:
198+
ingest.put_pipeline:
199+
id: "my_pipeline"
200+
body: >
201+
{
202+
"description": "_description",
203+
"processors": [
204+
{
205+
"date" : {
206+
"field" : "date_source_field",
207+
"target_field" : "date_target_field",
208+
"formats" : ["8YYYY-ww"],
209+
"locale" : "en-US"
210+
}
211+
}
212+
]
213+
}
214+
- match: { acknowledged: true }
215+
216+
- do:
217+
ingest.simulate:
218+
id: "my_pipeline"
219+
body: >
220+
{
221+
"docs": [
222+
{
223+
"_source": {
224+
"date_source_field": "2020-33"
225+
}
226+
}
227+
]
228+
}
229+
- length: { docs: 1 }
230+
- match: { docs.0.doc._source.date_source_field: "2020-33" }
231+
- match: { docs.0.doc._source.date_target_field: "2020-08-09T00:00:00.000Z" }
232+
- length: { docs.0.doc._ingest: 1 }
233+
- is_true: docs.0.doc._ingest.timestamp
234+
235+
- do:
236+
index:
237+
index: test
238+
type: test
239+
id: 1
240+
pipeline: "my_pipeline"
241+
body: {date_source_field: "2020-33"}
242+
243+
- do:
244+
get:
245+
index: test
246+
type: test
247+
id: 1
248+
- match: { _source.date_source_field: "2020-33" }
249+
- match: { _source.date_target_field: "2020-08-09T00:00:00.000Z" }

0 commit comments

Comments
 (0)