Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
59b2d57
Add REST test with nested field in time-series index
jordan-powers Feb 11, 2025
412f12e
Enable nested fields in time-series mode indices
jordan-powers Feb 11, 2025
c6097c7
Update docs/changelog/122224.yaml
jordan-powers Feb 11, 2025
3c669c0
Merge remote-tracking branch 'upstream/main' into fix_120874
jordan-powers Feb 11, 2025
ed6f965
Use nested query in nested field test
jordan-powers Feb 11, 2025
40334db
Iter
jordan-powers Feb 11, 2025
42a20f9
Add test for tsdb with multi-level nested fields
jordan-powers Feb 11, 2025
ab7935d
Update tests to support nested fields
jordan-powers Feb 11, 2025
22c9ea3
Merge remote-tracking branch 'upstream/main' into fix_120874
jordan-powers Feb 11, 2025
d7acbe0
Add cluster feature
jordan-powers Feb 11, 2025
117217c
Add cluster feature to 20_mapping tests
jordan-powers Feb 11, 2025
e181096
Merge remote-tracking branch 'upstream/main' into fix_120874
jordan-powers Feb 11, 2025
ff1a4ea
Fix yaml=tsdb/20_mapping/nested dimensions
jordan-powers Feb 11, 2025
6907664
Merge remote-tracking branch 'upstream/main' into fix_120874
jordan-powers Feb 11, 2025
07b7a57
Avoid use of LuceneDocument::getField
jordan-powers Feb 12, 2025
84bd7dd
Remove redundant test {tsdb/20_mapping/nested fields}
jordan-powers Feb 12, 2025
b5f8642
Merge remote-tracking branch 'upstream/main' into fix_120874
jordan-powers Feb 12, 2025
138759a
Merge remote-tracking branch 'upstream/main' into fix_120874
jordan-powers Feb 13, 2025
905534f
Mute tsdb/20_mapping/nested fields
jordan-powers Feb 13, 2025
7b4a860
Merge remote-tracking branch 'upstream/main' into fix_120874
jordan-powers Feb 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/122224.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 122224
summary: Enable the use of nested field type with index.mode=time_series
area: Mapping
type: enhancement
issues:
- 120874
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
setup:
- requires:
cluster_features: ["mapper.tsdb_nested_field_support"]
reason: "tsdb index with nested field support enabled"

---
"Create TSDB index with field of nested type":
- do:
indices.create:
index: test
body:
settings:
index:
mode: time_series
number_of_replicas: 1
number_of_shards: 1
routing_path: [department]
time_series:
start_time: 2021-04-28T00:00:00Z
end_time: 2021-04-29T00:00:00Z
mappings:
properties:
"@timestamp":
type: date
department:
type: keyword
time_series_dimension: true
staff:
type: integer
courses:
type: nested
properties:
name:
type: keyword
credits:
type: integer

- do:
index:
index: test
body: { "@timestamp": "2021-04-28T01:00:00Z", "department": "compsci", "staff": 12, "courses": [ { "name": "Object Oriented Programming", "credits": 3 }, { "name": "Theory of Computation", "credits": 4 } ] }

- do:
index:
index: test
body: { "@timestamp": "2021-04-28T02:00:00Z", "department": "math", "staff": 20, "courses": [ { "name": "Precalculus", "credits": 1 }, { "name": "Linear Algebra", "credits": 3 } ] }

- do:
indices.refresh:
index: [ test ]

- do:
search:
index: test
body:
size: 0
query:
nested:
path: "courses"
query:
bool:
must:
- term:
courses.name: Precalculus
- term:
courses.credits: 3

- match: { hits.total.value: 0 }

- do:
search:
index: test
body:
query:
nested:
path: "courses"
query:
bool:
must:
- term:
courses.name: "Object Oriented Programming"
- term:
courses.credits: 3

- match: { hits.total.value: 1 }
- match: { "hits.hits.0._source.@timestamp": "2021-04-28T01:00:00.000Z" }
- match: { hits.hits.0._source.department: "compsci" }
- match: { hits.hits.0._source.courses: [ { "name": "Object Oriented Programming", "credits": 3 }, { "name": "Theory of Computation", "credits": 4, } ] }

---

"TSDB index with multi-level nested fields":
- do:
indices.create:
index: test
body:
settings:
index:
mode: time_series
number_of_replicas: 1
number_of_shards: 1
routing_path: [department]
time_series:
start_time: 2021-04-28T00:00:00Z
end_time: 2021-04-29T00:00:00Z
mappings:
properties:
"@timestamp":
type: date
department:
type: keyword
time_series_dimension: true
staff:
type: integer
courses:
type: nested
properties:
name:
type: keyword
credits:
type: integer
students:
type: nested
properties:
name:
type: text
major:
type: keyword

- do:
index:
index: test
body:
"@timestamp": "2021-04-28T01:00:00Z"
department: "compsci"
staff: 12
courses:
- name: "Object Oriented Programming"
credits: 3
students:
- name: "Kimora Tanner"
major: "Computer Science"
- name: "Bruno Garrett"
major: "Software Engineering"
- name: "Theory of Computation"
credits: 4
students:
- name: "Elliott Booker"
major: "Computer Engineering"
- name: "Kimora Tanner"
major: "Software Engineering"

- do:
index:
index: test
body:
"@timestamp": "2021-04-28T02:00:00Z"
department: "math"
staff: 20
courses:
- name: "Precalculus"
credits: 4
students:
- name: "Elliott Ayers"
major: "Software Engineering"
- name: "Sylvie Howe"
major: "Computer Engineering"
- name: "Linear Algebra"
credits: 3
students:
- name: "Kimora Tanner"
major: "Computer Science"
- name: "Bruno Garett"
major: "Software Engineering"
- name: "Amelia Booker"
major: "Psychology"

- do:
index:
index: test
body:
"@timestamp": "2021-04-28T03:00:00Z"
department: "compsci"
staff: 12
courses:
- name: "Object Oriented Programming"
credits: 3
students:
- name: "Kimora Tanner"
major: "Computer Science"
- name: "Bruno Garrett"
major: "Software Engineering"
- name: "Elliott Booker"
major: "Computer Engineering"
- name: "Theory of Computation"
credits: 4
students:
- name: "Kimora Tanner"
major: "Software Engineering"
- name: "Elliott Ayers"
major: "Software Engineering"
- name: "Apollo Pittman"
major: "Computer Engineering"

- do:
indices.refresh:
index: [ test ]

- do:
search:
index: test
body:
query:
nested:
path: "courses"
query:
bool:
must:
- nested:
path: "courses.students"
query:
bool:
must:
- match:
courses.students.name: "Elliott"
- term:
courses.students.major: "Computer Engineering"
- term:
courses.name: "Theory of Computation"

- match: { hits.total.value: 1 }
- match: { hits.hits.0._source.department: "compsci" }
- match: { "hits.hits.0._source.@timestamp": "2021-04-28T01:00:00.000Z" }
Original file line number Diff line number Diff line change
Expand Up @@ -347,11 +347,10 @@ nested dimensions:
---
nested fields:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need this test? i think it repeats tests above

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it repeats any tests above since this is the only test in this file with a nested non-time_series_dimension field. But it is definitely redundant with the tests I added in 160_nested_fields.yml, so I'll take it out.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant above in the PR, sorry.

- requires:
cluster_features: ["gte_v8.2.0"]
cluster_features: ["gte_v8.2.0", "mapper.tsdb_nested_field_support"]
reason: message changed in 8.2.0

- do:
catch: /cannot have nested fields when index is in \[index.mode=time_series\]/
indices.create:
index: test
body:
Expand Down
4 changes: 0 additions & 4 deletions server/src/main/java/org/elasticsearch/index/IndexMode.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.MappingLookup;
import org.elasticsearch.index.mapper.MetadataFieldMapper;
import org.elasticsearch.index.mapper.NestedLookup;
import org.elasticsearch.index.mapper.ProvidedIdFieldMapper;
import org.elasticsearch.index.mapper.RoutingFieldMapper;
import org.elasticsearch.index.mapper.RoutingFields;
Expand Down Expand Up @@ -156,9 +155,6 @@ private static String error(Setting<?> unsupported) {

@Override
public void validateMapping(MappingLookup lookup) {
if (lookup.nestedLookup() != NestedLookup.EMPTY) {
throw new IllegalArgumentException("cannot have nested fields when index is in " + tsdbMode());
}
if (((RoutingFieldMapper) lookup.getMapper(RoutingFieldMapper.NAME)).required()) {
throw new IllegalArgumentException(routingRequiredBad());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -665,8 +665,14 @@ public final DocumentParserContext createNestedContext(NestedObjectMapper nested
if (idField != null) {
// We just need to store the id as indexed field, so that IndexWriter#deleteDocuments(term) can then
// delete it when the root document is deleted too.
// NOTE: we don't support nested fields in tsdb so it's safe to assume the standard id mapper.
doc.add(new StringField(IdFieldMapper.NAME, idField.binaryValue(), Field.Store.NO));
} else if (indexSettings().getMode() == IndexMode.TIME_SERIES) {
// For time series indices, the _id is generated from the _tsid, which in turn is generated from the values of the configured
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should add an assert that getRoutingFields() doesn't return a reference to RoutingFields.Noop#INSTANCE? Just to make sure we are able to collect dimension values in order to generate _tsid / _id at a later stage?

// routing fields. At this point in document parsing, we can't guarantee that we've parsed all the routing fields yet, so the
// parent document's _id is not yet available.
// So we just add the child document without the parent _id, then in TimeSeriesIdFieldMapper#postParse we set the _id on all
// child documents once we've calculated it.
assert getRoutingFields().equals(RoutingFields.Noop.INSTANCE) == false;
} else {
throw new IllegalStateException("The root document of a nested document should have an _id field");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ public class MapperFeatures implements FeatureSpecification {
"mapper.counted_keyword.synthetic_source_native_support"
);

public static final NodeFeature TSDB_NESTED_FIELD_SUPPORT = new NodeFeature("mapper.tsdb_nested_field_support");
public static final NodeFeature META_FETCH_FIELDS_ERROR_CODE_CHANGED = new NodeFeature("meta_fetch_fields_error_code_changed");
public static final NodeFeature SPARSE_VECTOR_STORE_SUPPORT = new NodeFeature("mapper.sparse_vector.store_support");
public static final NodeFeature SORT_FIELDS_CHECK_FOR_NESTED_OBJECT_FIX = new NodeFeature("mapper.nested.sorting_fields_check_fix");
Expand All @@ -49,6 +50,7 @@ public Set<NodeFeature> getTestFeatures() {
COUNTED_KEYWORD_SYNTHETIC_SOURCE_NATIVE_SUPPORT,
SORT_FIELDS_CHECK_FOR_NESTED_OBJECT_FIX,
DYNAMIC_HANDLING_IN_COPY_TO,
TSDB_NESTED_FIELD_SUPPORT,
SourceFieldMapper.SYNTHETIC_RECOVERY_SOURCE,
ObjectMapper.SUBOBJECTS_FALSE_MAPPING_UPDATE_FIX
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@

package org.elasticsearch.index.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Strings;
Expand Down Expand Up @@ -142,6 +144,15 @@ public void postParse(DocumentParserContext context) throws IOException {
: null,
timeSeriesId
);

// We need to add the uid or id to nested Lucene documents so that when a document gets deleted, the nested documents are
// also deleted. Usually this happens when the nested document is created (in DocumentParserContext#createNestedContext), but
// for time-series indices the _id isn't available at that point.
var binaryId = context.doc().getField(IdFieldMapper.NAME).binaryValue();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getField is kind of expensive since it iterates over all fields. Let's do this only when there are non root documents. Or maybe we can return the id from TsidExtractingIdFieldMapper above.

for (LuceneDocument doc : context.nonRootDocuments()) {
assert doc.getField(IdFieldMapper.NAME) == null;
doc.add(new StringField(IdFieldMapper.NAME, binaryId, Field.Store.NO));
}
}

private IndexVersion getIndexVersionCreated(final DocumentParserContext context) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,6 @@ protected void minimalMapping(XContentBuilder b) throws IOException {
b.field("type", "semantic_text");
}

@Override
protected String minimalIsInvalidRoutingPathErrorMessage(Mapper mapper) {
return "cannot have nested fields when index is in [index.mode=time_series]";
}

@Override
protected void metaMapping(XContentBuilder b) throws IOException {
super.metaMapping(b);
Expand Down