Skip to content

Commit ae13c61

Browse files
Enable the use of nested field type with index.mode=time_series (elastic#122224) (elastic#122520)
This patch removes the check that fails requests that attempt to use fields of type: nested within indices with mode time_series. This patch also updates TimeSeriesIdFieldMapper#postParse to set the _id field on child documents once it's calculated. Closes elastic#120874 (cherry picked from commit 5315088) # Conflicts: # rest-api-spec/build.gradle
1 parent b363183 commit ae13c61

File tree

10 files changed

+266
-43
lines changed

10 files changed

+266
-43
lines changed

docs/changelog/122224.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 122224
2+
summary: Enable the use of nested field type with index.mode=time_series
3+
area: Mapping
4+
type: enhancement
5+
issues:
6+
- 120874

rest-api-spec/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ tasks.named("yamlRestTestV7CompatTransform").configure({ task ->
250250
task.skipTest("search/330_fetch_fields/Test search rewrite", "warning does not exist for compatibility")
251251
task.skipTest("tsdb/20_mapping/stored source is supported", "no longer serialize source_mode")
252252
task.skipTest("tsdb/20_mapping/Synthetic source", "no longer serialize source_mode")
253+
task.skipTest("tsdb/20_mapping/nested fields", "nested field support in tsdb indices is now supported")
253254
task.skipTest("logsdb/10_settings/create logs index", "no longer serialize source_mode")
254255
task.skipTest("logsdb/20_source_mapping/stored _source mode is supported", "no longer serialize source_mode")
255256
task.skipTest("logsdb/20_source_mapping/include/exclude is supported with stored _source", "no longer serialize source_mode")
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
setup:
2+
- requires:
3+
cluster_features: ["mapper.tsdb_nested_field_support"]
4+
reason: "tsdb index with nested field support enabled"
5+
6+
---
7+
"Create TSDB index with field of nested type":
8+
- do:
9+
indices.create:
10+
index: test
11+
body:
12+
settings:
13+
index:
14+
mode: time_series
15+
number_of_replicas: 1
16+
number_of_shards: 1
17+
routing_path: [department]
18+
time_series:
19+
start_time: 2021-04-28T00:00:00Z
20+
end_time: 2021-04-29T00:00:00Z
21+
mappings:
22+
properties:
23+
"@timestamp":
24+
type: date
25+
department:
26+
type: keyword
27+
time_series_dimension: true
28+
staff:
29+
type: integer
30+
courses:
31+
type: nested
32+
properties:
33+
name:
34+
type: keyword
35+
credits:
36+
type: integer
37+
38+
- do:
39+
index:
40+
index: test
41+
body: { "@timestamp": "2021-04-28T01:00:00Z", "department": "compsci", "staff": 12, "courses": [ { "name": "Object Oriented Programming", "credits": 3 }, { "name": "Theory of Computation", "credits": 4 } ] }
42+
43+
- do:
44+
index:
45+
index: test
46+
body: { "@timestamp": "2021-04-28T02:00:00Z", "department": "math", "staff": 20, "courses": [ { "name": "Precalculus", "credits": 1 }, { "name": "Linear Algebra", "credits": 3 } ] }
47+
48+
- do:
49+
indices.refresh:
50+
index: [ test ]
51+
52+
- do:
53+
search:
54+
index: test
55+
body:
56+
size: 0
57+
query:
58+
nested:
59+
path: "courses"
60+
query:
61+
bool:
62+
must:
63+
- term:
64+
courses.name: Precalculus
65+
- term:
66+
courses.credits: 3
67+
68+
- match: { hits.total.value: 0 }
69+
70+
- do:
71+
search:
72+
index: test
73+
body:
74+
query:
75+
nested:
76+
path: "courses"
77+
query:
78+
bool:
79+
must:
80+
- term:
81+
courses.name: "Object Oriented Programming"
82+
- term:
83+
courses.credits: 3
84+
85+
- match: { hits.total.value: 1 }
86+
- match: { "hits.hits.0._source.@timestamp": "2021-04-28T01:00:00.000Z" }
87+
- match: { hits.hits.0._source.department: "compsci" }
88+
- match: { hits.hits.0._source.courses: [ { "name": "Object Oriented Programming", "credits": 3 }, { "name": "Theory of Computation", "credits": 4, } ] }
89+
90+
---
91+
92+
"TSDB index with multi-level nested fields":
93+
- do:
94+
indices.create:
95+
index: test
96+
body:
97+
settings:
98+
index:
99+
mode: time_series
100+
number_of_replicas: 1
101+
number_of_shards: 1
102+
routing_path: [department]
103+
time_series:
104+
start_time: 2021-04-28T00:00:00Z
105+
end_time: 2021-04-29T00:00:00Z
106+
mappings:
107+
properties:
108+
"@timestamp":
109+
type: date
110+
department:
111+
type: keyword
112+
time_series_dimension: true
113+
staff:
114+
type: integer
115+
courses:
116+
type: nested
117+
properties:
118+
name:
119+
type: keyword
120+
credits:
121+
type: integer
122+
students:
123+
type: nested
124+
properties:
125+
name:
126+
type: text
127+
major:
128+
type: keyword
129+
130+
- do:
131+
index:
132+
index: test
133+
body:
134+
"@timestamp": "2021-04-28T01:00:00Z"
135+
department: "compsci"
136+
staff: 12
137+
courses:
138+
- name: "Object Oriented Programming"
139+
credits: 3
140+
students:
141+
- name: "Kimora Tanner"
142+
major: "Computer Science"
143+
- name: "Bruno Garrett"
144+
major: "Software Engineering"
145+
- name: "Theory of Computation"
146+
credits: 4
147+
students:
148+
- name: "Elliott Booker"
149+
major: "Computer Engineering"
150+
- name: "Kimora Tanner"
151+
major: "Software Engineering"
152+
153+
- do:
154+
index:
155+
index: test
156+
body:
157+
"@timestamp": "2021-04-28T02:00:00Z"
158+
department: "math"
159+
staff: 20
160+
courses:
161+
- name: "Precalculus"
162+
credits: 4
163+
students:
164+
- name: "Elliott Ayers"
165+
major: "Software Engineering"
166+
- name: "Sylvie Howe"
167+
major: "Computer Engineering"
168+
- name: "Linear Algebra"
169+
credits: 3
170+
students:
171+
- name: "Kimora Tanner"
172+
major: "Computer Science"
173+
- name: "Bruno Garett"
174+
major: "Software Engineering"
175+
- name: "Amelia Booker"
176+
major: "Psychology"
177+
178+
- do:
179+
index:
180+
index: test
181+
body:
182+
"@timestamp": "2021-04-28T03:00:00Z"
183+
department: "compsci"
184+
staff: 12
185+
courses:
186+
- name: "Object Oriented Programming"
187+
credits: 3
188+
students:
189+
- name: "Kimora Tanner"
190+
major: "Computer Science"
191+
- name: "Bruno Garrett"
192+
major: "Software Engineering"
193+
- name: "Elliott Booker"
194+
major: "Computer Engineering"
195+
- name: "Theory of Computation"
196+
credits: 4
197+
students:
198+
- name: "Kimora Tanner"
199+
major: "Software Engineering"
200+
- name: "Elliott Ayers"
201+
major: "Software Engineering"
202+
- name: "Apollo Pittman"
203+
major: "Computer Engineering"
204+
205+
- do:
206+
indices.refresh:
207+
index: [ test ]
208+
209+
- do:
210+
search:
211+
index: test
212+
body:
213+
query:
214+
nested:
215+
path: "courses"
216+
query:
217+
bool:
218+
must:
219+
- nested:
220+
path: "courses.students"
221+
query:
222+
bool:
223+
must:
224+
- match:
225+
courses.students.name: "Elliott"
226+
- term:
227+
courses.students.major: "Computer Engineering"
228+
- term:
229+
courses.name: "Theory of Computation"
230+
231+
- match: { hits.total.value: 1 }
232+
- match: { hits.hits.0._source.department: "compsci" }
233+
- match: { "hits.hits.0._source.@timestamp": "2021-04-28T01:00:00.000Z" }

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/20_mapping.yml

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -344,37 +344,6 @@ nested dimensions:
344344
type: keyword
345345
time_series_dimension: true
346346

347-
---
348-
nested fields:
349-
- requires:
350-
cluster_features: ["gte_v8.2.0"]
351-
reason: message changed in 8.2.0
352-
353-
- do:
354-
catch: /cannot have nested fields when index is in \[index.mode=time_series\]/
355-
indices.create:
356-
index: test
357-
body:
358-
settings:
359-
index:
360-
mode: time_series
361-
routing_path: [dim]
362-
time_series:
363-
start_time: 2021-04-28T00:00:00Z
364-
end_time: 2021-04-29T00:00:00Z
365-
mappings:
366-
properties:
367-
"@timestamp":
368-
type: date
369-
dim:
370-
type: keyword
371-
time_series_dimension: true
372-
nested:
373-
type: nested
374-
properties:
375-
foo:
376-
type: keyword
377-
378347
---
379348
"Unable to define a metric type for a runtime field":
380349
- requires:

server/src/main/java/org/elasticsearch/index/IndexMode.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import org.elasticsearch.index.mapper.MapperService;
3030
import org.elasticsearch.index.mapper.MappingLookup;
3131
import org.elasticsearch.index.mapper.MetadataFieldMapper;
32-
import org.elasticsearch.index.mapper.NestedLookup;
3332
import org.elasticsearch.index.mapper.ProvidedIdFieldMapper;
3433
import org.elasticsearch.index.mapper.RoutingFieldMapper;
3534
import org.elasticsearch.index.mapper.RoutingFields;
@@ -156,9 +155,6 @@ private static String error(Setting<?> unsupported) {
156155

157156
@Override
158157
public void validateMapping(MappingLookup lookup) {
159-
if (lookup.nestedLookup() != NestedLookup.EMPTY) {
160-
throw new IllegalArgumentException("cannot have nested fields when index is in " + tsdbMode());
161-
}
162158
if (((RoutingFieldMapper) lookup.getMapper(RoutingFieldMapper.NAME)).required()) {
163159
throw new IllegalArgumentException(routingRequiredBad());
164160
}

server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -665,8 +665,14 @@ public final DocumentParserContext createNestedContext(NestedObjectMapper nested
665665
if (idField != null) {
666666
// We just need to store the id as indexed field, so that IndexWriter#deleteDocuments(term) can then
667667
// delete it when the root document is deleted too.
668-
// NOTE: we don't support nested fields in tsdb so it's safe to assume the standard id mapper.
669668
doc.add(new StringField(IdFieldMapper.NAME, idField.binaryValue(), Field.Store.NO));
669+
} else if (indexSettings().getMode() == IndexMode.TIME_SERIES) {
670+
// For time series indices, the _id is generated from the _tsid, which in turn is generated from the values of the configured
671+
// routing fields. At this point in document parsing, we can't guarantee that we've parsed all the routing fields yet, so the
672+
// parent document's _id is not yet available.
673+
// So we just add the child document without the parent _id, then in TimeSeriesIdFieldMapper#postParse we set the _id on all
674+
// child documents once we've calculated it.
675+
assert getRoutingFields().equals(RoutingFields.Noop.INSTANCE) == false;
670676
} else {
671677
throw new IllegalStateException("The root document of a nested document should have an _id field");
672678
}

server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ public Set<NodeFeature> getFeatures() {
5959
"mapper.counted_keyword.synthetic_source_native_support"
6060
);
6161

62+
public static final NodeFeature TSDB_NESTED_FIELD_SUPPORT = new NodeFeature("mapper.tsdb_nested_field_support");
6263
public static final NodeFeature META_FETCH_FIELDS_ERROR_CODE_CHANGED = new NodeFeature("meta_fetch_fields_error_code_changed");
6364
public static final NodeFeature SPARSE_VECTOR_STORE_SUPPORT = new NodeFeature("mapper.sparse_vector.store_support");
6465
public static final NodeFeature SORT_FIELDS_CHECK_FOR_NESTED_OBJECT_FIX = new NodeFeature("mapper.nested.sorting_fields_check_fix");
@@ -80,6 +81,7 @@ public Set<NodeFeature> getTestFeatures() {
8081
COUNTED_KEYWORD_SYNTHETIC_SOURCE_NATIVE_SUPPORT,
8182
SORT_FIELDS_CHECK_FOR_NESTED_OBJECT_FIX,
8283
DYNAMIC_HANDLING_IN_COPY_TO,
84+
TSDB_NESTED_FIELD_SUPPORT,
8385
SourceFieldMapper.SYNTHETIC_RECOVERY_SOURCE,
8486
ObjectMapper.SUBOBJECTS_FALSE_MAPPING_UPDATE_FIX
8587
);

server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@
99

1010
package org.elasticsearch.index.mapper;
1111

12+
import org.apache.lucene.document.Field;
1213
import org.apache.lucene.document.SortedDocValuesField;
14+
import org.apache.lucene.document.StringField;
1315
import org.apache.lucene.search.Query;
1416
import org.apache.lucene.util.BytesRef;
1517
import org.elasticsearch.common.bytes.BytesReference;
@@ -136,13 +138,21 @@ public void postParse(DocumentParserContext context) throws IOException {
136138
}
137139
context.doc().add(new SortedDocValuesField(fieldType().name(), timeSeriesId));
138140

139-
TsidExtractingIdFieldMapper.createField(
141+
BytesRef uidEncoded = TsidExtractingIdFieldMapper.createField(
140142
context,
141143
getIndexVersionCreated(context).before(IndexVersions.TIME_SERIES_ROUTING_HASH_IN_ID)
142144
? routingPathFields.routingBuilder()
143145
: null,
144146
timeSeriesId
145147
);
148+
149+
// We need to add the uid or id to nested Lucene documents so that when a document gets deleted, the nested documents are
150+
// also deleted. Usually this happens when the nested document is created (in DocumentParserContext#createNestedContext), but
151+
// for time-series indices the _id isn't available at that point.
152+
for (LuceneDocument doc : context.nonRootDocuments()) {
153+
assert doc.getField(IdFieldMapper.NAME) == null;
154+
doc.add(new StringField(IdFieldMapper.NAME, uidEncoded, Field.Store.NO));
155+
}
146156
}
147157

148158
private IndexVersion getIndexVersionCreated(final DocumentParserContext context) {

server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,11 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext
4646

4747
private static final long SEED = 0;
4848

49-
public static void createField(DocumentParserContext context, IndexRouting.ExtractFromSource.Builder routingBuilder, BytesRef tsid) {
49+
public static BytesRef createField(
50+
DocumentParserContext context,
51+
IndexRouting.ExtractFromSource.Builder routingBuilder,
52+
BytesRef tsid
53+
) {
5054
final long timestamp = DataStreamTimestampFieldMapper.extractTimestampValue(context.doc());
5155
String id;
5256
if (routingBuilder != null) {
@@ -94,6 +98,7 @@ public static void createField(DocumentParserContext context, IndexRouting.Extra
9498

9599
BytesRef uidEncoded = Uid.encodeId(context.id());
96100
context.doc().add(new StringField(NAME, uidEncoded, Field.Store.YES));
101+
return uidEncoded;
97102
}
98103

99104
public static String createId(int routingHash, BytesRef tsid, long timestamp) {

0 commit comments

Comments
 (0)