-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Enable the use of nested field type with index.mode=time_series #122224
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 14 commits
59b2d57
412f12e
c6097c7
3c669c0
ed6f965
40334db
42a20f9
ab7935d
22c9ea3
d7acbe0
117217c
e181096
ff1a4ea
6907664
07b7a57
84bd7dd
b5f8642
138759a
905534f
7b4a860
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
pr: 122224 | ||
summary: Enable the use of nested field type with index.mode=time_series | ||
area: Mapping | ||
type: enhancement | ||
issues: | ||
- 120874 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,233 @@ | ||
setup: | ||
- requires: | ||
cluster_features: ["mapper.tsdb_nested_field_support"] | ||
reason: "tsdb index with nested field support enabled" | ||
|
||
--- | ||
"Create TSDB index with field of nested type": | ||
- do: | ||
indices.create: | ||
index: test | ||
body: | ||
settings: | ||
index: | ||
mode: time_series | ||
number_of_replicas: 1 | ||
number_of_shards: 1 | ||
routing_path: [department] | ||
time_series: | ||
start_time: 2021-04-28T00:00:00Z | ||
end_time: 2021-04-29T00:00:00Z | ||
mappings: | ||
properties: | ||
"@timestamp": | ||
type: date | ||
department: | ||
type: keyword | ||
time_series_dimension: true | ||
staff: | ||
type: integer | ||
courses: | ||
type: nested | ||
properties: | ||
name: | ||
type: keyword | ||
credits: | ||
type: integer | ||
|
||
- do: | ||
index: | ||
index: test | ||
body: { "@timestamp": "2021-04-28T01:00:00Z", "department": "compsci", "staff": 12, "courses": [ { "name": "Object Oriented Programming", "credits": 3 }, { "name": "Theory of Computation", "credits": 4 } ] } | ||
|
||
- do: | ||
index: | ||
index: test | ||
body: { "@timestamp": "2021-04-28T02:00:00Z", "department": "math", "staff": 20, "courses": [ { "name": "Precalculus", "credits": 1 }, { "name": "Linear Algebra", "credits": 3 } ] } | ||
|
||
- do: | ||
indices.refresh: | ||
index: [ test ] | ||
|
||
- do: | ||
search: | ||
index: test | ||
body: | ||
size: 0 | ||
query: | ||
nested: | ||
path: "courses" | ||
query: | ||
bool: | ||
must: | ||
- term: | ||
courses.name: Precalculus | ||
- term: | ||
courses.credits: 3 | ||
|
||
- match: { hits.total.value: 0 } | ||
|
||
- do: | ||
search: | ||
index: test | ||
body: | ||
query: | ||
nested: | ||
path: "courses" | ||
query: | ||
bool: | ||
must: | ||
- term: | ||
courses.name: "Object Oriented Programming" | ||
- term: | ||
courses.credits: 3 | ||
|
||
- match: { hits.total.value: 1 } | ||
- match: { "hits.hits.0._source.@timestamp": "2021-04-28T01:00:00.000Z" } | ||
- match: { hits.hits.0._source.department: "compsci" } | ||
- match: { hits.hits.0._source.courses: [ { "name": "Object Oriented Programming", "credits": 3 }, { "name": "Theory of Computation", "credits": 4, } ] } | ||
|
||
--- | ||
|
||
"TSDB index with multi-level nested fields": | ||
- do: | ||
indices.create: | ||
index: test | ||
body: | ||
settings: | ||
index: | ||
mode: time_series | ||
number_of_replicas: 1 | ||
number_of_shards: 1 | ||
routing_path: [department] | ||
time_series: | ||
start_time: 2021-04-28T00:00:00Z | ||
end_time: 2021-04-29T00:00:00Z | ||
mappings: | ||
properties: | ||
"@timestamp": | ||
type: date | ||
department: | ||
type: keyword | ||
time_series_dimension: true | ||
staff: | ||
type: integer | ||
courses: | ||
type: nested | ||
properties: | ||
name: | ||
type: keyword | ||
credits: | ||
type: integer | ||
students: | ||
type: nested | ||
properties: | ||
name: | ||
type: text | ||
major: | ||
type: keyword | ||
|
||
- do: | ||
index: | ||
index: test | ||
body: | ||
"@timestamp": "2021-04-28T01:00:00Z" | ||
department: "compsci" | ||
staff: 12 | ||
courses: | ||
- name: "Object Oriented Programming" | ||
credits: 3 | ||
students: | ||
- name: "Kimora Tanner" | ||
major: "Computer Science" | ||
- name: "Bruno Garrett" | ||
major: "Software Engineering" | ||
- name: "Theory of Computation" | ||
credits: 4 | ||
students: | ||
- name: "Elliott Booker" | ||
major: "Computer Engineering" | ||
- name: "Kimora Tanner" | ||
major: "Software Engineering" | ||
|
||
- do: | ||
index: | ||
index: test | ||
body: | ||
"@timestamp": "2021-04-28T02:00:00Z" | ||
department: "math" | ||
staff: 20 | ||
courses: | ||
- name: "Precalculus" | ||
credits: 4 | ||
students: | ||
- name: "Elliott Ayers" | ||
major: "Software Engineering" | ||
- name: "Sylvie Howe" | ||
major: "Computer Engineering" | ||
- name: "Linear Algebra" | ||
credits: 3 | ||
students: | ||
- name: "Kimora Tanner" | ||
major: "Computer Science" | ||
- name: "Bruno Garett" | ||
major: "Software Engineering" | ||
- name: "Amelia Booker" | ||
major: "Psychology" | ||
|
||
- do: | ||
index: | ||
index: test | ||
body: | ||
"@timestamp": "2021-04-28T03:00:00Z" | ||
department: "compsci" | ||
staff: 12 | ||
courses: | ||
- name: "Object Oriented Programming" | ||
credits: 3 | ||
students: | ||
- name: "Kimora Tanner" | ||
major: "Computer Science" | ||
- name: "Bruno Garrett" | ||
major: "Software Engineering" | ||
- name: "Elliott Booker" | ||
major: "Computer Engineering" | ||
- name: "Theory of Computation" | ||
credits: 4 | ||
students: | ||
- name: "Kimora Tanner" | ||
major: "Software Engineering" | ||
- name: "Elliott Ayers" | ||
major: "Software Engineering" | ||
- name: "Apollo Pittman" | ||
major: "Computer Engineering" | ||
|
||
- do: | ||
indices.refresh: | ||
index: [ test ] | ||
|
||
- do: | ||
search: | ||
index: test | ||
body: | ||
query: | ||
nested: | ||
path: "courses" | ||
query: | ||
bool: | ||
must: | ||
- nested: | ||
path: "courses.students" | ||
query: | ||
bool: | ||
must: | ||
- match: | ||
courses.students.name: "Elliott" | ||
- term: | ||
courses.students.major: "Computer Engineering" | ||
- term: | ||
courses.name: "Theory of Computation" | ||
|
||
- match: { hits.total.value: 1 } | ||
- match: { hits.hits.0._source.department: "compsci" } | ||
- match: { "hits.hits.0._source.@timestamp": "2021-04-28T01:00:00.000Z" } |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -665,8 +665,14 @@ public final DocumentParserContext createNestedContext(NestedObjectMapper nested | |
if (idField != null) { | ||
// We just need to store the id as indexed field, so that IndexWriter#deleteDocuments(term) can then | ||
// delete it when the root document is deleted too. | ||
// NOTE: we don't support nested fields in tsdb so it's safe to assume the standard id mapper. | ||
doc.add(new StringField(IdFieldMapper.NAME, idField.binaryValue(), Field.Store.NO)); | ||
} else if (indexSettings().getMode() == IndexMode.TIME_SERIES) { | ||
// For time series indices, the _id is generated from the _tsid, which in turn is generated from the values of the configured | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we should add an assert that |
||
// routing fields. At this point in document parsing, we can't guarantee that we've parsed all the routing fields yet, so the | ||
// parent document's _id is not yet available. | ||
// So we just add the child document without the parent _id, then in TimeSeriesIdFieldMapper#postParse we set the _id on all | ||
// child documents once we've calculated it. | ||
assert getRoutingFields().equals(RoutingFields.Noop.INSTANCE) == false; | ||
} else { | ||
throw new IllegalStateException("The root document of a nested document should have an _id field"); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,9 @@ | |
|
||
package org.elasticsearch.index.mapper; | ||
|
||
import org.apache.lucene.document.Field; | ||
import org.apache.lucene.document.SortedDocValuesField; | ||
import org.apache.lucene.document.StringField; | ||
import org.apache.lucene.search.Query; | ||
import org.apache.lucene.util.BytesRef; | ||
import org.elasticsearch.common.Strings; | ||
|
@@ -142,6 +144,15 @@ public void postParse(DocumentParserContext context) throws IOException { | |
: null, | ||
timeSeriesId | ||
); | ||
|
||
// We need to add the uid or id to nested Lucene documents so that when a document gets deleted, the nested documents are | ||
// also deleted. Usually this happens when the nested document is created (in DocumentParserContext#createNestedContext), but | ||
// for time-series indices the _id isn't available at that point. | ||
var binaryId = context.doc().getField(IdFieldMapper.NAME).binaryValue(); | ||
|
||
for (LuceneDocument doc : context.nonRootDocuments()) { | ||
assert doc.getField(IdFieldMapper.NAME) == null; | ||
doc.add(new StringField(IdFieldMapper.NAME, binaryId, Field.Store.NO)); | ||
} | ||
} | ||
|
||
private IndexVersion getIndexVersionCreated(final DocumentParserContext context) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we need this test? i think it repeats tests above
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think it repeats any tests above since this is the only test in this file with a nested non-time_series_dimension field. But it is definitely redundant with the tests I added in 160_nested_fields.yml, so I'll take it out.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I meant above in the PR, sorry.