diff --git a/docs/changelog/122224.yaml b/docs/changelog/122224.yaml new file mode 100644 index 0000000000000..41ae8c6578600 --- /dev/null +++ b/docs/changelog/122224.yaml @@ -0,0 +1,6 @@ +pr: 122224 +summary: Enable the use of nested field type with index.mode=time_series +area: Mapping +type: enhancement +issues: + - 120874 diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle index f0df33877a965..03ee3c7b52764 100644 --- a/rest-api-spec/build.gradle +++ b/rest-api-spec/build.gradle @@ -250,6 +250,7 @@ tasks.named("yamlRestTestV7CompatTransform").configure({ task -> task.skipTest("search/330_fetch_fields/Test search rewrite", "warning does not exist for compatibility") task.skipTest("tsdb/20_mapping/stored source is supported", "no longer serialize source_mode") task.skipTest("tsdb/20_mapping/Synthetic source", "no longer serialize source_mode") + task.skipTest("tsdb/20_mapping/nested fields", "nested field support in tsdb indices is now supported") task.skipTest("logsdb/10_settings/create logs index", "no longer serialize source_mode") task.skipTest("logsdb/20_source_mapping/stored _source mode is supported", "no longer serialize source_mode") task.skipTest("logsdb/20_source_mapping/include/exclude is supported with stored _source", "no longer serialize source_mode") diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/160_nested_fields.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/160_nested_fields.yml new file mode 100644 index 0000000000000..f4aca5ab264e8 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/160_nested_fields.yml @@ -0,0 +1,233 @@ +setup: + - requires: + cluster_features: ["mapper.tsdb_nested_field_support"] + reason: "tsdb index with nested field support enabled" + +--- +"Create TSDB index with field of nested type": + - do: + indices.create: + index: test + body: + settings: + index: + mode: time_series + number_of_replicas: 1 + number_of_shards: 1 + routing_path: [department] + time_series: + start_time: 2021-04-28T00:00:00Z + end_time: 2021-04-29T00:00:00Z + mappings: + properties: + "@timestamp": + type: date + department: + type: keyword + time_series_dimension: true + staff: + type: integer + courses: + type: nested + properties: + name: + type: keyword + credits: + type: integer + + - do: + index: + index: test + body: { "@timestamp": "2021-04-28T01:00:00Z", "department": "compsci", "staff": 12, "courses": [ { "name": "Object Oriented Programming", "credits": 3 }, { "name": "Theory of Computation", "credits": 4 } ] } + + - do: + index: + index: test + body: { "@timestamp": "2021-04-28T02:00:00Z", "department": "math", "staff": 20, "courses": [ { "name": "Precalculus", "credits": 1 }, { "name": "Linear Algebra", "credits": 3 } ] } + + - do: + indices.refresh: + index: [ test ] + + - do: + search: + index: test + body: + size: 0 + query: + nested: + path: "courses" + query: + bool: + must: + - term: + courses.name: Precalculus + - term: + courses.credits: 3 + + - match: { hits.total.value: 0 } + + - do: + search: + index: test + body: + query: + nested: + path: "courses" + query: + bool: + must: + - term: + courses.name: "Object Oriented Programming" + - term: + courses.credits: 3 + + - match: { hits.total.value: 1 } + - match: { "hits.hits.0._source.@timestamp": "2021-04-28T01:00:00.000Z" } + - match: { hits.hits.0._source.department: "compsci" } + - match: { hits.hits.0._source.courses: [ { "name": "Object Oriented Programming", "credits": 3 }, { "name": "Theory of Computation", "credits": 4, } ] } + +--- + +"TSDB index with multi-level nested fields": + - do: + indices.create: + index: test + body: + settings: + index: + mode: time_series + number_of_replicas: 1 + number_of_shards: 1 + routing_path: [department] + time_series: + start_time: 2021-04-28T00:00:00Z + end_time: 2021-04-29T00:00:00Z + mappings: + properties: + "@timestamp": + type: date + department: + type: keyword + time_series_dimension: true + staff: + type: integer + courses: + type: nested + properties: + name: + type: keyword + credits: + type: integer + students: + type: nested + properties: + name: + type: text + major: + type: keyword + + - do: + index: + index: test + body: + "@timestamp": "2021-04-28T01:00:00Z" + department: "compsci" + staff: 12 + courses: + - name: "Object Oriented Programming" + credits: 3 + students: + - name: "Kimora Tanner" + major: "Computer Science" + - name: "Bruno Garrett" + major: "Software Engineering" + - name: "Theory of Computation" + credits: 4 + students: + - name: "Elliott Booker" + major: "Computer Engineering" + - name: "Kimora Tanner" + major: "Software Engineering" + + - do: + index: + index: test + body: + "@timestamp": "2021-04-28T02:00:00Z" + department: "math" + staff: 20 + courses: + - name: "Precalculus" + credits: 4 + students: + - name: "Elliott Ayers" + major: "Software Engineering" + - name: "Sylvie Howe" + major: "Computer Engineering" + - name: "Linear Algebra" + credits: 3 + students: + - name: "Kimora Tanner" + major: "Computer Science" + - name: "Bruno Garett" + major: "Software Engineering" + - name: "Amelia Booker" + major: "Psychology" + + - do: + index: + index: test + body: + "@timestamp": "2021-04-28T03:00:00Z" + department: "compsci" + staff: 12 + courses: + - name: "Object Oriented Programming" + credits: 3 + students: + - name: "Kimora Tanner" + major: "Computer Science" + - name: "Bruno Garrett" + major: "Software Engineering" + - name: "Elliott Booker" + major: "Computer Engineering" + - name: "Theory of Computation" + credits: 4 + students: + - name: "Kimora Tanner" + major: "Software Engineering" + - name: "Elliott Ayers" + major: "Software Engineering" + - name: "Apollo Pittman" + major: "Computer Engineering" + + - do: + indices.refresh: + index: [ test ] + + - do: + search: + index: test + body: + query: + nested: + path: "courses" + query: + bool: + must: + - nested: + path: "courses.students" + query: + bool: + must: + - match: + courses.students.name: "Elliott" + - term: + courses.students.major: "Computer Engineering" + - term: + courses.name: "Theory of Computation" + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._source.department: "compsci" } + - match: { "hits.hits.0._source.@timestamp": "2021-04-28T01:00:00.000Z" } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/20_mapping.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/20_mapping.yml index f25601fc2e228..5963ddb46e0b3 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/20_mapping.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/20_mapping.yml @@ -344,37 +344,6 @@ nested dimensions: type: keyword time_series_dimension: true ---- -nested fields: - - requires: - cluster_features: ["gte_v8.2.0"] - reason: message changed in 8.2.0 - - - do: - catch: /cannot have nested fields when index is in \[index.mode=time_series\]/ - indices.create: - index: test - body: - settings: - index: - mode: time_series - routing_path: [dim] - time_series: - start_time: 2021-04-28T00:00:00Z - end_time: 2021-04-29T00:00:00Z - mappings: - properties: - "@timestamp": - type: date - dim: - type: keyword - time_series_dimension: true - nested: - type: nested - properties: - foo: - type: keyword - --- "Unable to define a metric type for a runtime field": - requires: diff --git a/server/src/main/java/org/elasticsearch/index/IndexMode.java b/server/src/main/java/org/elasticsearch/index/IndexMode.java index 7287a0bf307b9..04bf820926660 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexMode.java +++ b/server/src/main/java/org/elasticsearch/index/IndexMode.java @@ -29,7 +29,6 @@ import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MappingLookup; import org.elasticsearch.index.mapper.MetadataFieldMapper; -import org.elasticsearch.index.mapper.NestedLookup; import org.elasticsearch.index.mapper.ProvidedIdFieldMapper; import org.elasticsearch.index.mapper.RoutingFieldMapper; import org.elasticsearch.index.mapper.RoutingFields; @@ -156,9 +155,6 @@ private static String error(Setting unsupported) { @Override public void validateMapping(MappingLookup lookup) { - if (lookup.nestedLookup() != NestedLookup.EMPTY) { - throw new IllegalArgumentException("cannot have nested fields when index is in " + tsdbMode()); - } if (((RoutingFieldMapper) lookup.getMapper(RoutingFieldMapper.NAME)).required()) { throw new IllegalArgumentException(routingRequiredBad()); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index 361a31cbe0c81..9ab6265257aa2 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -665,8 +665,14 @@ public final DocumentParserContext createNestedContext(NestedObjectMapper nested if (idField != null) { // We just need to store the id as indexed field, so that IndexWriter#deleteDocuments(term) can then // delete it when the root document is deleted too. - // NOTE: we don't support nested fields in tsdb so it's safe to assume the standard id mapper. doc.add(new StringField(IdFieldMapper.NAME, idField.binaryValue(), Field.Store.NO)); + } else if (indexSettings().getMode() == IndexMode.TIME_SERIES) { + // For time series indices, the _id is generated from the _tsid, which in turn is generated from the values of the configured + // routing fields. At this point in document parsing, we can't guarantee that we've parsed all the routing fields yet, so the + // parent document's _id is not yet available. + // So we just add the child document without the parent _id, then in TimeSeriesIdFieldMapper#postParse we set the _id on all + // child documents once we've calculated it. + assert getRoutingFields().equals(RoutingFields.Noop.INSTANCE) == false; } else { throw new IllegalStateException("The root document of a nested document should have an _id field"); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index 08548cb36d4e3..f03f109107ee7 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -59,6 +59,7 @@ public Set getFeatures() { "mapper.counted_keyword.synthetic_source_native_support" ); + public static final NodeFeature TSDB_NESTED_FIELD_SUPPORT = new NodeFeature("mapper.tsdb_nested_field_support"); public static final NodeFeature META_FETCH_FIELDS_ERROR_CODE_CHANGED = new NodeFeature("meta_fetch_fields_error_code_changed"); public static final NodeFeature SPARSE_VECTOR_STORE_SUPPORT = new NodeFeature("mapper.sparse_vector.store_support"); public static final NodeFeature SORT_FIELDS_CHECK_FOR_NESTED_OBJECT_FIX = new NodeFeature("mapper.nested.sorting_fields_check_fix"); @@ -80,6 +81,7 @@ public Set getTestFeatures() { COUNTED_KEYWORD_SYNTHETIC_SOURCE_NATIVE_SUPPORT, SORT_FIELDS_CHECK_FOR_NESTED_OBJECT_FIX, DYNAMIC_HANDLING_IN_COPY_TO, + TSDB_NESTED_FIELD_SUPPORT, SourceFieldMapper.SYNTHETIC_RECOVERY_SOURCE, ObjectMapper.SUBOBJECTS_FALSE_MAPPING_UPDATE_FIX ); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java index e9f53c6e29dc9..ea441c52a40b0 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java @@ -9,7 +9,9 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.StringField; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.bytes.BytesReference; @@ -136,13 +138,21 @@ public void postParse(DocumentParserContext context) throws IOException { } context.doc().add(new SortedDocValuesField(fieldType().name(), timeSeriesId)); - TsidExtractingIdFieldMapper.createField( + BytesRef uidEncoded = TsidExtractingIdFieldMapper.createField( context, getIndexVersionCreated(context).before(IndexVersions.TIME_SERIES_ROUTING_HASH_IN_ID) ? routingPathFields.routingBuilder() : null, timeSeriesId ); + + // We need to add the uid or id to nested Lucene documents so that when a document gets deleted, the nested documents are + // also deleted. Usually this happens when the nested document is created (in DocumentParserContext#createNestedContext), but + // for time-series indices the _id isn't available at that point. + for (LuceneDocument doc : context.nonRootDocuments()) { + assert doc.getField(IdFieldMapper.NAME) == null; + doc.add(new StringField(IdFieldMapper.NAME, uidEncoded, Field.Store.NO)); + } } private IndexVersion getIndexVersionCreated(final DocumentParserContext context) { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java index 5ac9631916acd..6af9580fac150 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TsidExtractingIdFieldMapper.java @@ -46,7 +46,11 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext private static final long SEED = 0; - public static void createField(DocumentParserContext context, IndexRouting.ExtractFromSource.Builder routingBuilder, BytesRef tsid) { + public static BytesRef createField( + DocumentParserContext context, + IndexRouting.ExtractFromSource.Builder routingBuilder, + BytesRef tsid + ) { final long timestamp = DataStreamTimestampFieldMapper.extractTimestampValue(context.doc()); String id; if (routingBuilder != null) { @@ -94,6 +98,7 @@ public static void createField(DocumentParserContext context, IndexRouting.Extra BytesRef uidEncoded = Uid.encodeId(context.id()); context.doc().add(new StringField(NAME, uidEncoded, Field.Store.YES)); + return uidEncoded; } public static String createId(int routingHash, BytesRef tsid, long timestamp) { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java index d78b27607b91b..ff0935cefff42 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java @@ -131,11 +131,6 @@ protected void minimalMapping(XContentBuilder b) throws IOException { b.field("type", "semantic_text"); } - @Override - protected String minimalIsInvalidRoutingPathErrorMessage(Mapper mapper) { - return "cannot have nested fields when index is in [index.mode=time_series]"; - } - @Override protected void metaMapping(XContentBuilder b) throws IOException { super.metaMapping(b);