From 14bd04879e29e6eb9eeb48df56d80ea9b4b26e22 Mon Sep 17 00:00:00 2001 From: Kostas Krikellas Date: Fri, 27 Sep 2024 12:14:01 +0300 Subject: [PATCH] Fix dynamic values and object handling in `subobjects: auto` --- .../indices.create/20_synthetic_source.yml | 995 +---------------- .../22_synthetic_source_copy_to.yml | 997 ++++++++++++++++++ .../index/mapper/DocumentParser.java | 4 + .../mapper/DotExpandingXContentParser.java | 3 + .../index/mapper/ObjectMapper.java | 11 +- 5 files changed, 1038 insertions(+), 972 deletions(-) create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/22_synthetic_source_copy_to.yml diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index 41d9fcc30a880..be5a9d653481d 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -1087,10 +1087,10 @@ subobjects auto with dynamic template: --- -synthetic_source with copy_to: +subobjects auto with overlapping dynamic object: - requires: - cluster_features: ["mapper.source.synthetic_source_copy_to_fix"] - reason: requires copy_to support in synthetic source + cluster_features: ["mapper.subobjects_auto_fixes"] + reason: requires tracking ignored source and supporting subobjects auto setting - do: indices.create: @@ -1099,59 +1099,15 @@ synthetic_source with copy_to: mappings: _source: mode: synthetic + subobjects: auto properties: - number: + id: type: integer - copy_to: number_copy - number_copy: - type: keyword - boolean: - type: boolean - copy_to: boolean_copy - boolean_copy: - type: keyword - keyword: - type: keyword - copy_to: keyword_copy - keyword_copy: - type: keyword - date: - type: date - copy_to: date_copy - date_copy: - type: keyword - text: - type: text - copy_to: text_copy - text_copy: - type: keyword - ip: - type: ip - copy_to: ip_copy - ip_copy: - type: keyword - ip_range: - type: ip_range - copy_to: ip_range_copy - ip_range_copy: - type: keyword - geo_point: - type: geo_point - copy_to: geo_point_copy - geo_point_copy: - type: keyword - binary: - type: binary - copy_to: binary_copy - binary_copy: - type: keyword - scaled_float: - type: scaled_float - scaling_factor: 10 - copy_to: scaled_float_copy - scaled_float_copy: - type: keyword - + my.attributes: + subobjects: auto + properties: + foo: + type: integer - do: bulk: @@ -1159,927 +1115,26 @@ synthetic_source with copy_to: refresh: true body: - '{ "create": { } }' - - >- - { - "number": 100, - "boolean": false, - "keyword": "hello_keyword", - "date": "2015-01-01T12:10:30Z", - "text": "hello_text", - "match_only_text": "hello_match_only_text", - "ip": "192.168.1.1", - "ip_range": "10.0.0.0/24", - "geo_point": "POINT (-71.34 41.12)", - "binary": "aGVsbG8gY3VyaW91cyBwZXJzb24=", - "scaled_float": 1.5 - } - - - match: { errors: false } - - - do: - search: - index: test - body: - fields: ["number_copy", "boolean_copy", "keyword_copy", "date_copy", "text_copy", "ip_copy", "ip_range_copy", "geo_point_copy", "binary_copy", "scaled_float_copy"] - - - match: { hits.hits.0._source.number: 100 } - - match: { hits.hits.0._source.number_copy: null } - - match: { hits.hits.0.fields.number_copy.0: "100" } - - - match: { hits.hits.0._source.boolean: false } - - match: { hits.hits.0._source.boolean_copy: null } - - match: { hits.hits.0.fields.boolean_copy.0: "false" } - - - match: { hits.hits.0._source.keyword: "hello_keyword" } - - match: { hits.hits.0._source.keyword_copy: null } - - match: { hits.hits.0.fields.keyword_copy.0: "hello_keyword" } - - - match: { hits.hits.0._source.date: "2015-01-01T12:10:30Z" } - - match: { hits.hits.0._source.date_copy: null } - - match: { hits.hits.0.fields.date_copy.0: "2015-01-01T12:10:30Z" } - - - match: { hits.hits.0._source.text: "hello_text" } - - match: { hits.hits.0._source.text_copy: null } - - match: { hits.hits.0.fields.text_copy.0: "hello_text" } - - - match: { hits.hits.0._source.ip: "192.168.1.1" } - - match: { hits.hits.0._source.ip_copy: null } - - match: { hits.hits.0.fields.ip_copy.0: "192.168.1.1" } - - - match: { hits.hits.0._source.ip_range: "10.0.0.0/24" } - - match: { hits.hits.0._source.ip_range_copy: null } - - match: { hits.hits.0.fields.ip_range_copy.0: "10.0.0.0/24" } - - - match: { hits.hits.0._source.geo_point: "POINT (-71.34 41.12)" } - - match: { hits.hits.0._source.geo_point_copy: null } - - match: { hits.hits.0.fields.geo_point_copy.0: "POINT (-71.34 41.12)" } - - - match: { hits.hits.0._source.binary: "aGVsbG8gY3VyaW91cyBwZXJzb24=" } - - match: { hits.hits.0._source.binary_copy: null } - - match: { hits.hits.0.fields.binary_copy.0: "aGVsbG8gY3VyaW91cyBwZXJzb24=" } - - - match: { hits.hits.0._source.scaled_float: 1.5 } - - match: { hits.hits.0._source.scaled_float_copy: null } - - match: { hits.hits.0.fields.scaled_float_copy.0: "1.5" } - ---- -synthetic_source with disabled doc_values: - - requires: - cluster_features: ["mapper.source.synthetic_source_with_copy_to_and_doc_values_false"] - reason: requires disabled doc_values support in synthetic source - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - properties: - number: - type: integer - doc_values: false - boolean: - type: boolean - doc_values: false - keyword: - type: keyword - doc_values: false - date: - type: date - doc_values: false - ip: - type: ip - doc_values: false - ip_range: - type: ip_range - doc_values: false - flattened: - type: flattened - doc_values: false - geo_point: - type: geo_point - doc_values: false - binary: - type: binary - doc_values: false - scaled_float: - type: scaled_float - scaling_factor: 10 - doc_values: false - - - do: - bulk: - index: test - refresh: true - body: + - '{ "id": 1, "my": { "attributes": { "foo": 10 } } }' + - '{ "create": { } }' + - '{ "id": 2, "my": { "attributes": { "foo": 20 } } }' - '{ "create": { } }' - - >- - { - "number": 100, - "boolean": false, - "keyword": "hello_keyword", - "date": "2015-01-01T12:10:30Z", - "ip": "192.168.1.1", - "ip_range": "10.0.0.0/24", - "flattened": { "f": "hey" }, - "geo_point": "POINT (-71.34 41.12)", - "binary": "aGVsbG8gY3VyaW91cyBwZXJzb24=", - "scaled_float": 1.5 - } + - '{ "id": 3, "my": { "attributes": { "foo": 30 } } }' + - '{ "create": { } }' + - '{ "id": 4, "my": { "attributes": { "foo": 40 } } }' - match: { errors: false } - do: search: index: test + sort: id - - match: { hits.hits.0._source.number: 100 } - - match: { hits.hits.0._source.boolean: false } - - match: { hits.hits.0._source.keyword: "hello_keyword" } - - match: { hits.hits.0._source.date: "2015-01-01T12:10:30Z" } - - match: { hits.hits.0._source.ip: "192.168.1.1" } - - match: { hits.hits.0._source.ip_range: "10.0.0.0/24" } - - match: { hits.hits.0._source.flattened.f: "hey" } - - match: { hits.hits.0._source.geo_point: "POINT (-71.34 41.12)" } - - match: { hits.hits.0._source.binary: "aGVsbG8gY3VyaW91cyBwZXJzb24=" } - - match: { hits.hits.0._source.scaled_float: 1.5 } - ---- -fallback synthetic_source for text field: - - requires: - cluster_features: ["mapper.source.synthetic_source_with_copy_to_and_doc_values_false"] - reason: requires disabled doc_values support in synthetic source - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - properties: - text: - type: text - store: false - - - do: - index: - index: test - id: 1 - refresh: true - body: - text: [ "world", "hello", "world" ] - - - do: - search: - index: test - - - match: - hits.hits.0._source: - text: [ "world", "hello", "world" ] - ---- -synthetic_source with copy_to and ignored values: - - requires: - cluster_features: ["mapper.source.synthetic_source_copy_to_fix"] - reason: requires copy_to support in synthetic source - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - properties: - name: - type: keyword - k: - type: keyword - ignore_above: 1 - copy_to: copy - long: - type: long - ignore_malformed: true - copy_to: copy - copy: - type: keyword - - - do: - index: - index: test - id: 1 - refresh: true - body: - name: "A" - k: "hello" - long: "world" - - - do: - index: - index: test - id: 2 - refresh: true - body: - name: "B" - k: ["55", "66"] - long: ["77", "88"] - - - do: - search: - index: test - sort: name - body: - docvalue_fields: [ "copy" ] - - - match: - hits.hits.0._source: - name: "A" - k: "hello" - long: "world" - - match: { hits.hits.0.fields.copy: ["hello", "world"] } - - - match: - hits.hits.1._source: - name: "B" - k: ["55", "66"] - long: ["77", "88"] - - match: { hits.hits.1.fields.copy: ["55", "66", "77", "88"] } - ---- -synthetic_source with copy_to field having values in source: - - requires: - cluster_features: ["mapper.source.synthetic_source_copy_to_fix"] - reason: requires copy_to support in synthetic source - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - properties: - name: - type: keyword - k: - type: keyword - copy_to: copy - copy: - type: keyword - - - do: - index: - index: test - id: 1 - refresh: true - body: - name: "A" - copy: "world" - k: "hello" - - - do: - index: - index: test - id: 2 - refresh: true - body: - name: "B" - k: ["5", "6"] - copy: ["7", "8"] - - - do: - search: - index: test - sort: name - body: - docvalue_fields: [ "copy" ] - - - match: - hits.hits.0._source: - name: "A" - k: "hello" - copy: "world" - - match: { hits.hits.0.fields.copy: ["hello", "world"] } - - - match: - hits.hits.1._source: - name: "B" - k: ["5", "6"] - copy: ["7", "8"] - - match: { hits.hits.1.fields.copy: ["5", "6", "7", "8"] } - ---- -synthetic_source with ignored source field using copy_to: - - requires: - cluster_features: ["mapper.source.synthetic_source_copy_to_fix"] - reason: requires copy_to support in synthetic source - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - properties: - name: - type: keyword - k: - type: keyword - doc_values: false - copy_to: copy - copy: - type: keyword - - - do: - index: - index: test - id: 1 - refresh: true - body: - name: "A" - copy: "world" - k: "hello" - - - do: - index: - index: test - id: 2 - refresh: true - body: - name: "B" - k: ["5", "6"] - copy: ["7", "8"] - - - do: - search: - index: test - sort: name - body: - docvalue_fields: [ "copy" ] - - - match: - hits.hits.0._source: - name: "A" - k: "hello" - copy: "world" - - match: { hits.hits.0.fields.copy: ["hello", "world"] } - - - match: - hits.hits.1._source: - name: "B" - k: ["5", "6"] - copy: ["7", "8"] - - match: { hits.hits.1.fields.copy: ["5", "6", "7", "8"] } - ---- -synthetic_source with copy_to field from dynamic template having values in source: - - requires: - cluster_features: ["mapper.source.synthetic_source_copy_to_fix"] - reason: requires copy_to support in synthetic source - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - dynamic_templates: - - copy_template: - match: "k" - mapping: - type: keyword - copy_to: copy - properties: - name: - type: keyword - copy: - type: keyword - - - do: - index: - index: test - id: 1 - refresh: true - body: - name: "A" - k: "hello" - - - do: - index: - index: test - id: 2 - refresh: true - body: - name: "B" - copy: "world" - k: "hello" - - - do: - index: - index: test - id: 3 - refresh: true - body: - name: "C" - k: ["5", "6"] - - - do: - index: - index: test - id: 4 - refresh: true - body: - name: "D" - k: ["5", "6"] - copy: ["7", "8"] - - - do: - search: - index: test - sort: name - body: - docvalue_fields: [ "copy" ] - - - match: - hits.hits.0._source: - name: "A" - k: "hello" - - match: { hits.hits.0.fields.copy: ["hello"] } - - - match: - hits.hits.1._source: - name: "B" - k: "hello" - copy: "world" - - match: { hits.hits.1.fields.copy: ["hello", "world"] } - - - match: - hits.hits.2._source: - name: "C" - k: ["5", "6"] - - match: { hits.hits.2.fields.copy: ["5", "6"] } - - - match: - hits.hits.3._source: - name: "D" - k: ["5", "6"] - copy: ["7", "8"] - - match: { hits.hits.3.fields.copy: ["5", "6", "7", "8"] } - ---- -synthetic_source with copy_to and invalid values for copy: - - requires: - cluster_features: ["mapper.source.synthetic_source_copy_to_fix"] - reason: requires copy_to support in synthetic source - test_runner_features: "contains" - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - properties: - name: - type: keyword - p: - type: long_range - copy_to: copy - copy: - type: keyword - - - do: - catch: bad_request - index: - index: test - id: 1 - refresh: true - body: - name: "A" - p: - gte: 10 - - - match: { error.type: "document_parsing_exception" } - - contains: { error.reason: "Copy-to currently only works for value-type fields" } - ---- -synthetic_source with copy_to pointing inside object: - - requires: - cluster_features: ["mapper.source.synthetic_source_copy_to_inside_objects_fix"] - reason: requires copy_to support in synthetic source - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - properties: - name: - type: keyword - my_values: - properties: - k: - type: keyword - ignore_above: 1 - copy_to: c.copy - long: - type: long - copy_to: c.copy - c: - properties: - copy: - type: keyword - - - do: - index: - index: test - id: 1 - refresh: true - body: - name: "A" - my_values: - k: "hello" - long: 100 - - - do: - index: - index: test - id: 2 - refresh: true - body: - name: "B" - my_values: - k: ["55", "66"] - long: [77, 88] - - - do: - index: - index: test - id: 3 - refresh: true - body: - name: "C" - my_values: - k: "hello" - long: 100 - c: - copy: "zap" - - - do: - search: - index: test - sort: name - body: - docvalue_fields: [ "c.copy" ] - - - match: - hits.hits.0._source: - name: "A" - my_values: - k: "hello" - long: 100 - - match: - hits.hits.0.fields: - c.copy: [ "100", "hello" ] - - - match: - hits.hits.1._source: - name: "B" - my_values: - k: ["55", "66"] - long: [77, 88] - - match: - hits.hits.1.fields: - c.copy: ["55", "66", "77", "88"] - - - match: - hits.hits.2._source: - name: "C" - my_values: - k: "hello" - long: 100 - c: - copy: "zap" - - match: - hits.hits.2.fields: - c.copy: [ "100", "hello", "zap" ] - ---- -synthetic_source with copy_to pointing to ambiguous field: - - requires: - cluster_features: ["mapper.source.synthetic_source_copy_to_inside_objects_fix"] - reason: requires copy_to support in synthetic source - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - properties: - k: - type: keyword - copy_to: a.b.c - a: - properties: - b: - properties: - c: - type: keyword - b.c: - type: keyword - - - do: - index: - index: test - id: 1 - refresh: true - body: - k: "hey" - - - do: - search: - index: test - body: - docvalue_fields: [ "a.b.c" ] - - - match: - hits.hits.0._source: - k: "hey" - - match: - hits.hits.0.fields: - a.b.c: [ "hey" ] - ---- -synthetic_source with copy_to pointing to ambiguous field and subobjects false: - - requires: - cluster_features: ["mapper.source.synthetic_source_copy_to_inside_objects_fix"] - reason: requires copy_to support in synthetic source - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - subobjects: false - properties: - k: - type: keyword - copy_to: a.b.c - a: - properties: - b: - properties: - c: - type: keyword - b.c: - type: keyword - - - do: - index: - index: test - id: 1 - refresh: true - body: - k: "hey" - - - do: - search: - index: test - body: - docvalue_fields: [ "a.b.c" ] - - - match: - hits.hits.0._source: - k: "hey" - - match: - hits.hits.0.fields: - a.b.c: [ "hey" ] - ---- -synthetic_source with copy_to pointing to ambiguous field and subobjects auto: - - requires: - cluster_features: ["mapper.subobjects_auto_fixes"] - reason: requires copy_to support in synthetic source - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - subobjects: auto - properties: - k: - type: keyword - copy_to: a.b.c - a: - properties: - b: - properties: - c: - type: keyword - b.c: - type: keyword - - - do: - index: - index: test - id: 1 - refresh: true - body: - k: "hey" - - - do: - search: - index: test - body: - docvalue_fields: [ "a.b.c" ] - - - match: - hits.hits.0._source: - k: "hey" - - match: - hits.hits.0.fields: - a.b.c: [ "hey" ] - ---- -synthetic_source with copy_to pointing at dynamic field: - - requires: - test_runner_features: contains - cluster_features: ["mapper.source.synthetic_source_copy_to_inside_objects_fix"] - reason: requires copy_to support in synthetic source - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - properties: - name: - type: keyword - k: - type: keyword - copy_to: c.copy - c: - properties: - f: - type: float - - - do: - index: - index: test - id: 1 - refresh: true - body: - name: "A" - k: "hello" - - - do: - index: - index: test - id: 2 - refresh: true - body: - name: "B" - k: ["55", "66"] - - - do: - index: - index: test - id: 3 - refresh: true - body: - name: "C" - k: "hello" - c: - copy: "zap" - - - do: - search: - index: test - sort: name - body: - docvalue_fields: [ "c.copy.keyword" ] - - - match: - hits.hits.0._source: - name: "A" - k: "hello" - - match: - hits.hits.0.fields: - c.copy.keyword: [ "hello" ] - - - match: - hits.hits.1._source: - name: "B" - k: ["55", "66"] - - match: - hits.hits.1.fields: - c.copy.keyword: [ "55", "66" ] - - - match: - hits.hits.2._source: - name: "C" - k: "hello" - c: - copy: "zap" - - match: - hits.hits.2.fields: - c.copy.keyword: [ "hello", "zap" ] - ---- -synthetic_source with copy_to pointing inside dynamic object: - - requires: - cluster_features: ["mapper.source.synthetic_source_copy_to_inside_objects_fix"] - reason: requires copy_to support in synthetic source - - - do: - indices.create: - index: test - body: - mappings: - _source: - mode: synthetic - properties: - name: - type: keyword - k: - type: keyword - copy_to: c.copy - - - do: - index: - index: test - id: 1 - refresh: true - body: - name: "A" - k: "hello" - - - do: - index: - index: test - id: 2 - refresh: true - body: - name: "B" - k: ["55", "66"] - - - do: - index: - index: test - id: 3 - refresh: true - body: - name: "C" - k: "hello" - c: - copy: "zap" - - - do: - search: - index: test - sort: name - body: - docvalue_fields: [ "c.copy.keyword" ] - - - match: - hits.hits.0._source: - name: "A" - k: "hello" - - match: - hits.hits.0.fields: - c.copy.keyword: [ "hello" ] - - - match: - hits.hits.1._source: - name: "B" - k: ["55", "66"] - - match: - hits.hits.1.fields: - c.copy.keyword: [ "55", "66" ] - - - match: - hits.hits.2._source: - name: "C" - k: "hello" - c: - copy: "zap" - - match: - hits.hits.2.fields: - c.copy.keyword: [ "hello", "zap" ] - + - match: { hits.hits.0._source.id: 1 } + - match: { hits.hits.0._source.my\.attributes.foo: 10 } + - match: { hits.hits.1._source.id: 2 } + - match: { hits.hits.1._source.my\.attributes.foo: 20 } + - match: { hits.hits.2._source.id: 3 } + - match: { hits.hits.2._source.my\.attributes.foo: 30 } + - match: { hits.hits.3._source.id: 4 } + - match: { hits.hits.3._source.my\.attributes.foo: 40 } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/22_synthetic_source_copy_to.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/22_synthetic_source_copy_to.yml new file mode 100644 index 0000000000000..6f964458654eb --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/22_synthetic_source_copy_to.yml @@ -0,0 +1,997 @@ +--- +synthetic_source with copy_to: + - requires: + cluster_features: ["mapper.source.synthetic_source_copy_to_fix"] + reason: requires copy_to support in synthetic source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + number: + type: integer + copy_to: number_copy + number_copy: + type: keyword + boolean: + type: boolean + copy_to: boolean_copy + boolean_copy: + type: keyword + keyword: + type: keyword + copy_to: keyword_copy + keyword_copy: + type: keyword + date: + type: date + copy_to: date_copy + date_copy: + type: keyword + text: + type: text + copy_to: text_copy + text_copy: + type: keyword + ip: + type: ip + copy_to: ip_copy + ip_copy: + type: keyword + ip_range: + type: ip_range + copy_to: ip_range_copy + ip_range_copy: + type: keyword + geo_point: + type: geo_point + copy_to: geo_point_copy + geo_point_copy: + type: keyword + binary: + type: binary + copy_to: binary_copy + binary_copy: + type: keyword + scaled_float: + type: scaled_float + scaling_factor: 10 + copy_to: scaled_float_copy + scaled_float_copy: + type: keyword + + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - >- + { + "number": 100, + "boolean": false, + "keyword": "hello_keyword", + "date": "2015-01-01T12:10:30Z", + "text": "hello_text", + "match_only_text": "hello_match_only_text", + "ip": "192.168.1.1", + "ip_range": "10.0.0.0/24", + "geo_point": "POINT (-71.34 41.12)", + "binary": "aGVsbG8gY3VyaW91cyBwZXJzb24=", + "scaled_float": 1.5 + } + + - match: { errors: false } + + - do: + search: + index: test + body: + fields: ["number_copy", "boolean_copy", "keyword_copy", "date_copy", "text_copy", "ip_copy", "ip_range_copy", "geo_point_copy", "binary_copy", "scaled_float_copy"] + + - match: { hits.hits.0._source.number: 100 } + - match: { hits.hits.0._source.number_copy: null } + - match: { hits.hits.0.fields.number_copy.0: "100" } + + - match: { hits.hits.0._source.boolean: false } + - match: { hits.hits.0._source.boolean_copy: null } + - match: { hits.hits.0.fields.boolean_copy.0: "false" } + + - match: { hits.hits.0._source.keyword: "hello_keyword" } + - match: { hits.hits.0._source.keyword_copy: null } + - match: { hits.hits.0.fields.keyword_copy.0: "hello_keyword" } + + - match: { hits.hits.0._source.date: "2015-01-01T12:10:30Z" } + - match: { hits.hits.0._source.date_copy: null } + - match: { hits.hits.0.fields.date_copy.0: "2015-01-01T12:10:30Z" } + + - match: { hits.hits.0._source.text: "hello_text" } + - match: { hits.hits.0._source.text_copy: null } + - match: { hits.hits.0.fields.text_copy.0: "hello_text" } + + - match: { hits.hits.0._source.ip: "192.168.1.1" } + - match: { hits.hits.0._source.ip_copy: null } + - match: { hits.hits.0.fields.ip_copy.0: "192.168.1.1" } + + - match: { hits.hits.0._source.ip_range: "10.0.0.0/24" } + - match: { hits.hits.0._source.ip_range_copy: null } + - match: { hits.hits.0.fields.ip_range_copy.0: "10.0.0.0/24" } + + - match: { hits.hits.0._source.geo_point: "POINT (-71.34 41.12)" } + - match: { hits.hits.0._source.geo_point_copy: null } + - match: { hits.hits.0.fields.geo_point_copy.0: "POINT (-71.34 41.12)" } + + - match: { hits.hits.0._source.binary: "aGVsbG8gY3VyaW91cyBwZXJzb24=" } + - match: { hits.hits.0._source.binary_copy: null } + - match: { hits.hits.0.fields.binary_copy.0: "aGVsbG8gY3VyaW91cyBwZXJzb24=" } + + - match: { hits.hits.0._source.scaled_float: 1.5 } + - match: { hits.hits.0._source.scaled_float_copy: null } + - match: { hits.hits.0.fields.scaled_float_copy.0: "1.5" } + +--- +synthetic_source with disabled doc_values: + - requires: + cluster_features: ["mapper.source.synthetic_source_with_copy_to_and_doc_values_false"] + reason: requires disabled doc_values support in synthetic source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + number: + type: integer + doc_values: false + boolean: + type: boolean + doc_values: false + keyword: + type: keyword + doc_values: false + date: + type: date + doc_values: false + ip: + type: ip + doc_values: false + ip_range: + type: ip_range + doc_values: false + flattened: + type: flattened + doc_values: false + geo_point: + type: geo_point + doc_values: false + binary: + type: binary + doc_values: false + scaled_float: + type: scaled_float + scaling_factor: 10 + doc_values: false + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - >- + { + "number": 100, + "boolean": false, + "keyword": "hello_keyword", + "date": "2015-01-01T12:10:30Z", + "ip": "192.168.1.1", + "ip_range": "10.0.0.0/24", + "flattened": { "f": "hey" }, + "geo_point": "POINT (-71.34 41.12)", + "binary": "aGVsbG8gY3VyaW91cyBwZXJzb24=", + "scaled_float": 1.5 + } + + - match: { errors: false } + + - do: + search: + index: test + + - match: { hits.hits.0._source.number: 100 } + - match: { hits.hits.0._source.boolean: false } + - match: { hits.hits.0._source.keyword: "hello_keyword" } + - match: { hits.hits.0._source.date: "2015-01-01T12:10:30Z" } + - match: { hits.hits.0._source.ip: "192.168.1.1" } + - match: { hits.hits.0._source.ip_range: "10.0.0.0/24" } + - match: { hits.hits.0._source.flattened.f: "hey" } + - match: { hits.hits.0._source.geo_point: "POINT (-71.34 41.12)" } + - match: { hits.hits.0._source.binary: "aGVsbG8gY3VyaW91cyBwZXJzb24=" } + - match: { hits.hits.0._source.scaled_float: 1.5 } + +--- +fallback synthetic_source for text field: + - requires: + cluster_features: ["mapper.source.synthetic_source_with_copy_to_and_doc_values_false"] + reason: requires disabled doc_values support in synthetic source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + text: + type: text + store: false + + - do: + index: + index: test + id: 1 + refresh: true + body: + text: [ "world", "hello", "world" ] + + - do: + search: + index: test + + - match: + hits.hits.0._source: + text: [ "world", "hello", "world" ] + +--- +synthetic_source with copy_to and ignored values: + - requires: + cluster_features: ["mapper.source.synthetic_source_copy_to_fix"] + reason: requires copy_to support in synthetic source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + name: + type: keyword + k: + type: keyword + ignore_above: 1 + copy_to: copy + long: + type: long + ignore_malformed: true + copy_to: copy + copy: + type: keyword + + - do: + index: + index: test + id: 1 + refresh: true + body: + name: "A" + k: "hello" + long: "world" + + - do: + index: + index: test + id: 2 + refresh: true + body: + name: "B" + k: ["55", "66"] + long: ["77", "88"] + + - do: + search: + index: test + sort: name + body: + docvalue_fields: [ "copy" ] + + - match: + hits.hits.0._source: + name: "A" + k: "hello" + long: "world" + - match: { hits.hits.0.fields.copy: ["hello", "world"] } + + - match: + hits.hits.1._source: + name: "B" + k: ["55", "66"] + long: ["77", "88"] + - match: { hits.hits.1.fields.copy: ["55", "66", "77", "88"] } + +--- +synthetic_source with copy_to field having values in source: + - requires: + cluster_features: ["mapper.source.synthetic_source_copy_to_fix"] + reason: requires copy_to support in synthetic source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + name: + type: keyword + k: + type: keyword + copy_to: copy + copy: + type: keyword + + - do: + index: + index: test + id: 1 + refresh: true + body: + name: "A" + copy: "world" + k: "hello" + + - do: + index: + index: test + id: 2 + refresh: true + body: + name: "B" + k: ["5", "6"] + copy: ["7", "8"] + + - do: + search: + index: test + sort: name + body: + docvalue_fields: [ "copy" ] + + - match: + hits.hits.0._source: + name: "A" + k: "hello" + copy: "world" + - match: { hits.hits.0.fields.copy: ["hello", "world"] } + + - match: + hits.hits.1._source: + name: "B" + k: ["5", "6"] + copy: ["7", "8"] + - match: { hits.hits.1.fields.copy: ["5", "6", "7", "8"] } + +--- +synthetic_source with ignored source field using copy_to: + - requires: + cluster_features: ["mapper.source.synthetic_source_copy_to_fix"] + reason: requires copy_to support in synthetic source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + name: + type: keyword + k: + type: keyword + doc_values: false + copy_to: copy + copy: + type: keyword + + - do: + index: + index: test + id: 1 + refresh: true + body: + name: "A" + copy: "world" + k: "hello" + + - do: + index: + index: test + id: 2 + refresh: true + body: + name: "B" + k: ["5", "6"] + copy: ["7", "8"] + + - do: + search: + index: test + sort: name + body: + docvalue_fields: [ "copy" ] + + - match: + hits.hits.0._source: + name: "A" + k: "hello" + copy: "world" + - match: { hits.hits.0.fields.copy: ["hello", "world"] } + + - match: + hits.hits.1._source: + name: "B" + k: ["5", "6"] + copy: ["7", "8"] + - match: { hits.hits.1.fields.copy: ["5", "6", "7", "8"] } + +--- +synthetic_source with copy_to field from dynamic template having values in source: + - requires: + cluster_features: ["mapper.source.synthetic_source_copy_to_fix"] + reason: requires copy_to support in synthetic source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + dynamic_templates: + - copy_template: + match: "k" + mapping: + type: keyword + copy_to: copy + properties: + name: + type: keyword + copy: + type: keyword + + - do: + index: + index: test + id: 1 + refresh: true + body: + name: "A" + k: "hello" + + - do: + index: + index: test + id: 2 + refresh: true + body: + name: "B" + copy: "world" + k: "hello" + + - do: + index: + index: test + id: 3 + refresh: true + body: + name: "C" + k: ["5", "6"] + + - do: + index: + index: test + id: 4 + refresh: true + body: + name: "D" + k: ["5", "6"] + copy: ["7", "8"] + + - do: + search: + index: test + sort: name + body: + docvalue_fields: [ "copy" ] + + - match: + hits.hits.0._source: + name: "A" + k: "hello" + - match: { hits.hits.0.fields.copy: ["hello"] } + + - match: + hits.hits.1._source: + name: "B" + k: "hello" + copy: "world" + - match: { hits.hits.1.fields.copy: ["hello", "world"] } + + - match: + hits.hits.2._source: + name: "C" + k: ["5", "6"] + - match: { hits.hits.2.fields.copy: ["5", "6"] } + + - match: + hits.hits.3._source: + name: "D" + k: ["5", "6"] + copy: ["7", "8"] + - match: { hits.hits.3.fields.copy: ["5", "6", "7", "8"] } + +--- +synthetic_source with copy_to and invalid values for copy: + - requires: + cluster_features: ["mapper.source.synthetic_source_copy_to_fix"] + reason: requires copy_to support in synthetic source + test_runner_features: "contains" + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + name: + type: keyword + p: + type: long_range + copy_to: copy + copy: + type: keyword + + - do: + catch: bad_request + index: + index: test + id: 1 + refresh: true + body: + name: "A" + p: + gte: 10 + + - match: { error.type: "document_parsing_exception" } + - contains: { error.reason: "Copy-to currently only works for value-type fields" } + +--- +synthetic_source with copy_to pointing inside object: + - requires: + cluster_features: ["mapper.source.synthetic_source_copy_to_inside_objects_fix"] + reason: requires copy_to support in synthetic source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + name: + type: keyword + my_values: + properties: + k: + type: keyword + ignore_above: 1 + copy_to: c.copy + long: + type: long + copy_to: c.copy + c: + properties: + copy: + type: keyword + + - do: + index: + index: test + id: 1 + refresh: true + body: + name: "A" + my_values: + k: "hello" + long: 100 + + - do: + index: + index: test + id: 2 + refresh: true + body: + name: "B" + my_values: + k: ["55", "66"] + long: [77, 88] + + - do: + index: + index: test + id: 3 + refresh: true + body: + name: "C" + my_values: + k: "hello" + long: 100 + c: + copy: "zap" + + - do: + search: + index: test + sort: name + body: + docvalue_fields: [ "c.copy" ] + + - match: + hits.hits.0._source: + name: "A" + my_values: + k: "hello" + long: 100 + - match: + hits.hits.0.fields: + c.copy: [ "100", "hello" ] + + - match: + hits.hits.1._source: + name: "B" + my_values: + k: ["55", "66"] + long: [77, 88] + - match: + hits.hits.1.fields: + c.copy: ["55", "66", "77", "88"] + + - match: + hits.hits.2._source: + name: "C" + my_values: + k: "hello" + long: 100 + c: + copy: "zap" + - match: + hits.hits.2.fields: + c.copy: [ "100", "hello", "zap" ] + +--- +synthetic_source with copy_to pointing to ambiguous field: + - requires: + cluster_features: ["mapper.source.synthetic_source_copy_to_inside_objects_fix"] + reason: requires copy_to support in synthetic source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + k: + type: keyword + copy_to: a.b.c + a: + properties: + b: + properties: + c: + type: keyword + b.c: + type: keyword + + - do: + index: + index: test + id: 1 + refresh: true + body: + k: "hey" + + - do: + search: + index: test + body: + docvalue_fields: [ "a.b.c" ] + + - match: + hits.hits.0._source: + k: "hey" + - match: + hits.hits.0.fields: + a.b.c: [ "hey" ] + +--- +synthetic_source with copy_to pointing to ambiguous field and subobjects false: + - requires: + cluster_features: ["mapper.source.synthetic_source_copy_to_inside_objects_fix"] + reason: requires copy_to support in synthetic source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + subobjects: false + properties: + k: + type: keyword + copy_to: a.b.c + a: + properties: + b: + properties: + c: + type: keyword + b.c: + type: keyword + + - do: + index: + index: test + id: 1 + refresh: true + body: + k: "hey" + + - do: + search: + index: test + body: + docvalue_fields: [ "a.b.c" ] + + - match: + hits.hits.0._source: + k: "hey" + - match: + hits.hits.0.fields: + a.b.c: [ "hey" ] + +--- +synthetic_source with copy_to pointing to ambiguous field and subobjects auto: + - requires: + cluster_features: ["mapper.subobjects_auto_fixes"] + reason: requires copy_to support in synthetic source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + subobjects: auto + properties: + k: + type: keyword + copy_to: a.b.c + a: + properties: + b: + properties: + c: + type: keyword + b.c: + type: keyword + + - do: + index: + index: test + id: 1 + refresh: true + body: + k: "hey" + + - do: + search: + index: test + body: + docvalue_fields: [ "a.b.c" ] + + - match: + hits.hits.0._source: + k: "hey" + - match: + hits.hits.0.fields: + a.b.c: [ "hey" ] + +--- +synthetic_source with copy_to pointing at dynamic field: + - requires: + test_runner_features: contains + cluster_features: ["mapper.source.synthetic_source_copy_to_inside_objects_fix"] + reason: requires copy_to support in synthetic source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + name: + type: keyword + k: + type: keyword + copy_to: c.copy + c: + properties: + f: + type: float + + - do: + index: + index: test + id: 1 + refresh: true + body: + name: "A" + k: "hello" + + - do: + index: + index: test + id: 2 + refresh: true + body: + name: "B" + k: ["55", "66"] + + - do: + index: + index: test + id: 3 + refresh: true + body: + name: "C" + k: "hello" + c: + copy: "zap" + + - do: + search: + index: test + sort: name + body: + docvalue_fields: [ "c.copy.keyword" ] + + - match: + hits.hits.0._source: + name: "A" + k: "hello" + - match: + hits.hits.0.fields: + c.copy.keyword: [ "hello" ] + + - match: + hits.hits.1._source: + name: "B" + k: ["55", "66"] + - match: + hits.hits.1.fields: + c.copy.keyword: [ "55", "66" ] + + - match: + hits.hits.2._source: + name: "C" + k: "hello" + c: + copy: "zap" + - match: + hits.hits.2.fields: + c.copy.keyword: [ "hello", "zap" ] + +--- +synthetic_source with copy_to pointing inside dynamic object: + - requires: + cluster_features: ["mapper.source.synthetic_source_copy_to_inside_objects_fix"] + reason: requires copy_to support in synthetic source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + name: + type: keyword + k: + type: keyword + copy_to: c.copy + + - do: + index: + index: test + id: 1 + refresh: true + body: + name: "A" + k: "hello" + + - do: + index: + index: test + id: 2 + refresh: true + body: + name: "B" + k: ["55", "66"] + + - do: + index: + index: test + id: 3 + refresh: true + body: + name: "C" + k: "hello" + c: + copy: "zap" + + - do: + search: + index: test + sort: name + body: + docvalue_fields: [ "c.copy.keyword" ] + + - match: + hits.hits.0._source: + name: "A" + k: "hello" + - match: + hits.hits.0.fields: + c.copy.keyword: [ "hello" ] + + - match: + hits.hits.1._source: + name: "B" + k: ["55", "66"] + - match: + hits.hits.1.fields: + c.copy.keyword: [ "55", "66" ] + + - match: + hits.hits.2._source: + name: "C" + k: "hello" + c: + copy: "zap" + - match: + hits.hits.2.fields: + c.copy.keyword: [ "hello", "zap" ] + diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index 7f9b59d427656..6e170c8ef3487 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -904,6 +904,10 @@ private static void parseValue(final DocumentParserContext context, String curre throwOnNoFieldName(context); } Mapper mapper = getLeafMapper(context, currentFieldName); + if (mapper == null) { + // Check if there's an existing mapper for the same path, due to object auto-flattening. + mapper = context.mappingLookup().getMapper(context.path().pathAsText(currentFieldName)); + } if (mapper != null) { parseObjectOrField(context, mapper); } else { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java index 728c7ac6f25ac..221526a3d38e9 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java @@ -424,6 +424,9 @@ public Token nextToken() throws IOException { static List maybeFlattenPaths(List subpaths, DocumentParserContext context, ContentPath contentPath) { String prefixWithDots = contentPath.pathAsText(""); + if (prefixWithDots.equals(".")) { + prefixWithDots = ""; + } ObjectMapper parent = contentPath.length() == 0 ? context.root() : context.findObject(prefixWithDots.substring(0, prefixWithDots.length() - 1)); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java index b9b611d8c62f9..7b9276d23f89f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java @@ -217,8 +217,15 @@ && isFlatteningCandidate(subobjects, objectMapper) candidateObject.append(fullPathTokens[i]); String candidateFullObject = candidateObjectPrefix.isEmpty() ? candidateObject.toString() - : candidateObjectPrefix + candidateObject.toString(); - ObjectMapper parent = context.findObject(candidateFullObject); + : candidateObjectPrefix + candidateObject; + ObjectMapper parent = context.mappingLookup().objectMappers().get(candidateFullObject); + if (parent == null) { + parent = context.getDynamicObjectMapper(candidateFullObject); + if (parent != null && parent.mappers.isEmpty()) { + // Flatten empty dynamic object. + parent = null; + } + } if (parent != null) { var parentBuilder = parent.newBuilder(context.indexSettings().getIndexVersionCreated()); parentBuilder.addDynamic(name.substring(candidateObject.length() + 1), candidateFullObject, mapper, context);