From 08e6fe5fce7e18d3baf92e997b52d5cfb8ab91be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Cea=20Fontenla?= Date: Fri, 23 May 2025 19:56:07 +0200 Subject: [PATCH 1/9] Added YAML test suite for significant-terms with nested fields, and failing case --- .../test/aggregations/sig_terms_nested.yml | 177 ++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml diff --git a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml new file mode 100644 index 0000000000000..929acd5239560 --- /dev/null +++ b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml @@ -0,0 +1,177 @@ +setup: + - requires: + test_runner_features: close_to + + - do: + indices.create: + index: test + body: + settings: + number_of_shards: "1" + mappings: + properties: + nested: + type: nested + properties: + type: + type: keyword + value: + type: integer + + # Type:normal has many "1" and just one "2". Type:outlier has the same amount of "1" and "2" + - do: + bulk: + index: test + refresh: true + body: + - '{ "index": {} }' + - '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }' + - '{ "index": {} }' + - '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }' + - '{ "index": {} }' + - '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }' + - '{ "index": {} }' + - '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }' + - '{ "index": {} }' + - '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }' + - '{ "index": {} }' + - '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }' + - '{ "index": {} }' + - '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }' + - '{ "index": {} }' + - '{ "type": "normal", "value": 2, "nested": { "type": "normal", "value": 2 } }' + + - '{ "index": {} }' + - '{ "type": "outlier", "value": 1, "nested": { "type": "outlier", "value": 1 } }' + - '{ "index": {} }' + - '{ "type": "outlier", "value": 1, "nested": { "type": "outlier", "value": 1 } }' + - '{ "index": {} }' + - '{ "type": "outlier", "value": 1, "nested": { "type": "outlier", "value": 1 } }' + - '{ "index": {} }' + - '{ "type": "outlier", "value": 1, "nested": { "type": "outlier", "value": 1 } }' + - '{ "index": {} }' + - '{ "type": "outlier", "value": 2, "nested": { "type": "outlier", "value": 2 } }' + - '{ "index": {} }' + - '{ "type": "outlier", "value": 2, "nested": { "type": "outlier", "value": 2 } }' + - '{ "index": {} }' + - '{ "type": "outlier", "value": 2, "nested": { "type": "outlier", "value": 2 } }' + - '{ "index": {} }' + - '{ "type": "outlier", "value": 2, "nested": { "type": "outlier", "value": 2 } }' + +--- +"Data checks": + - do: + search: + rest_total_hits_as_int: true + index: test + - match: {hits.total: 16} + + - do: + search: + rest_total_hits_as_int: true + index: test + body: { + "aggs": { + "value_terms": { + "terms": { + "field": "value" + } + }, + "nested": { + "nested": { + "path": "nested" + }, + "aggs": { + "nested_value_terms": { + "terms": { + "field": "nested.value" + } + } + } + } + } + } + + - match: {aggregations.value_terms.buckets.0.key: 1} + - match: {aggregations.value_terms.buckets.0.doc_count: 11} + - match: {aggregations.value_terms.buckets.1.key: 2} + - match: {aggregations.value_terms.buckets.1.doc_count: 5} + + - match: {aggregations.nested.doc_count: 16} + - match: {aggregations.nested.nested_value_terms.buckets.0.key: 1} + - match: {aggregations.nested.nested_value_terms.buckets.0.doc_count: 11} + - match: {aggregations.nested.nested_value_terms.buckets.1.key: 2} + - match: {aggregations.nested.nested_value_terms.buckets.1.doc_count: 5} + +--- +"Normal fields": + - do: + search: + rest_total_hits_as_int: true + index: test + - match: {hits.total: 16} + + - do: + search: + rest_total_hits_as_int: true + index: test + body: { + "query": { + "terms": { "type": [ "outlier" ] } + }, + "aggs": { + "significant_terms": { + "significant_terms": { + "field": "value" + } + } + } + } + + - match: {aggregations.significant_terms.doc_count: 8} + - match: {aggregations.significant_terms.bg_count: 16} + - length: {aggregations.significant_terms.buckets: 1} + - match: {aggregations.significant_terms.buckets.0.key: 2} + - match: {aggregations.significant_terms.buckets.0.doc_count: 4} + - match: {aggregations.significant_terms.buckets.0.bg_count: 5} + - close_to: { aggregations.significant_terms.buckets.0.score: {value: 0.3, error: 0.000001 }} + +--- +"Nested fields": + - do: + search: + rest_total_hits_as_int: true + index: test + - match: {hits.total: 16} + + - do: + search: + rest_total_hits_as_int: true + index: test + body: { + "query": { + "terms": { "type": [ "outlier" ] } + }, + "aggs": { + "nested": { + "nested": { + "path": "nested" + }, + "aggs": { + "significant_terms": { + "significant_terms": { + "field": "nested.value" + } + } + } + } + } + } + + - match: {aggregations.nested.significant_terms.doc_count: 8} + - match: {aggregations.nested.significant_terms.bg_count: 16} + - length: {aggregations.nested.significant_terms.buckets: 1} + - match: {aggregations.nested.significant_terms.buckets.0.key: 2} + - match: {aggregations.nested.significant_terms.buckets.0.doc_count: 4} + - match: {aggregations.nested.significant_terms.buckets.0.bg_count: 5} + - close_to: { aggregations.nested.significant_terms.buckets.0.score: {value: 0.3, error: 0.000001 }} From a441675b0c3b38176871d74e6a561bc6fb41eaef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Cea=20Fontenla?= Date: Mon, 26 May 2025 17:22:30 +0200 Subject: [PATCH 2/9] Fixed SignificantTerms on nested fields not finding any background document --- .../test/aggregations/sig_terms_nested.yml | 186 +++++++++++++++--- .../bucket/terms/SignificanceLookup.java | 17 +- 2 files changed, 174 insertions(+), 29 deletions(-) diff --git a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml index 929acd5239560..9ee0dab7d044c 100644 --- a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml +++ b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml @@ -10,13 +10,24 @@ setup: number_of_shards: "1" mappings: properties: + value: + type: integer + value_keyword: + type: keyword nested: type: nested properties: - type: - type: keyword value: type: integer + value_keyword: + type: keyword + nested: + type: nested + properties: + value: + type: integer + value_keyword: + type: keyword # Type:normal has many "1" and just one "2". Type:outlier has the same amount of "1" and "2" - do: @@ -25,38 +36,38 @@ setup: refresh: true body: - '{ "index": {} }' - - '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }' + - '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' - '{ "index": {} }' - - '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }' + - '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' - '{ "index": {} }' - - '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }' + - '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' - '{ "index": {} }' - - '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }' + - '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' - '{ "index": {} }' - - '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }' + - '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' - '{ "index": {} }' - - '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }' + - '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' - '{ "index": {} }' - - '{ "type": "normal", "value": 1, "nested": { "type": "normal", "value": 1 } }' + - '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' - '{ "index": {} }' - - '{ "type": "normal", "value": 2, "nested": { "type": "normal", "value": 2 } }' + - '{ "type": "normal", "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2" } } }' - '{ "index": {} }' - - '{ "type": "outlier", "value": 1, "nested": { "type": "outlier", "value": 1 } }' + - '{ "type": "outlier", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' - '{ "index": {} }' - - '{ "type": "outlier", "value": 1, "nested": { "type": "outlier", "value": 1 } }' + - '{ "type": "outlier", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' - '{ "index": {} }' - - '{ "type": "outlier", "value": 1, "nested": { "type": "outlier", "value": 1 } }' + - '{ "type": "outlier", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' - '{ "index": {} }' - - '{ "type": "outlier", "value": 1, "nested": { "type": "outlier", "value": 1 } }' + - '{ "type": "outlier", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' - '{ "index": {} }' - - '{ "type": "outlier", "value": 2, "nested": { "type": "outlier", "value": 2 } }' + - '{ "type": "outlier", "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2" } } }' - '{ "index": {} }' - - '{ "type": "outlier", "value": 2, "nested": { "type": "outlier", "value": 2 } }' + - '{ "type": "outlier", "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2" } } }' - '{ "index": {} }' - - '{ "type": "outlier", "value": 2, "nested": { "type": "outlier", "value": 2 } }' + - '{ "type": "outlier", "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2" } } }' - '{ "index": {} }' - - '{ "type": "outlier", "value": 2, "nested": { "type": "outlier", "value": 2 } }' + - '{ "type": "outlier", "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2" } } }' --- "Data checks": @@ -68,6 +79,7 @@ setup: - do: search: + size: 0 rest_total_hits_as_int: true index: test body: { @@ -77,6 +89,11 @@ setup: "field": "value" } }, + "value_keyword_terms": { + "terms": { + "field": "value_keyword" + } + }, "nested": { "nested": { "path": "nested" @@ -86,33 +103,79 @@ setup: "terms": { "field": "nested.value" } + }, + "nested_value_keyword_terms": { + "terms": { + "field": "nested.value_keyword" + } + }, + "nested": { + "nested": { + "path": "nested.nested" + }, + "aggs": { + "nested_value_terms": { + "terms": { + "field": "nested.nested.value" + } + }, + "nested_value_keyword_terms": { + "terms": { + "field": "nested.nested.value_keyword" + } + } + } } } } } } + # Check value - match: {aggregations.value_terms.buckets.0.key: 1} - match: {aggregations.value_terms.buckets.0.doc_count: 11} - match: {aggregations.value_terms.buckets.1.key: 2} - match: {aggregations.value_terms.buckets.1.doc_count: 5} + # Check value_keyword + - match: {aggregations.value_keyword_terms.buckets.0.key: "1"} + - match: {aggregations.value_keyword_terms.buckets.0.doc_count: 11} + - match: {aggregations.value_keyword_terms.buckets.1.key: "2"} + - match: {aggregations.value_keyword_terms.buckets.1.doc_count: 5} + + # Nested - match: {aggregations.nested.doc_count: 16} + # Check nested value - match: {aggregations.nested.nested_value_terms.buckets.0.key: 1} - match: {aggregations.nested.nested_value_terms.buckets.0.doc_count: 11} - match: {aggregations.nested.nested_value_terms.buckets.1.key: 2} - match: {aggregations.nested.nested_value_terms.buckets.1.doc_count: 5} + # Check nested value_keyword + - match: {aggregations.nested.nested_value_keyword_terms.buckets.0.key: "1"} + - match: {aggregations.nested.nested_value_keyword_terms.buckets.0.doc_count: 11} + - match: {aggregations.nested.nested_value_keyword_terms.buckets.1.key: "2"} + - match: {aggregations.nested.nested_value_keyword_terms.buckets.1.doc_count: 5} + + # Nested>nested + - match: {aggregations.nested.nested.doc_count: 16} + # Check nested value + - match: {aggregations.nested.nested.nested_value_terms.buckets.0.key: 1} + - match: {aggregations.nested.nested.nested_value_terms.buckets.0.doc_count: 11} + - match: {aggregations.nested.nested.nested_value_terms.buckets.1.key: 2} + - match: {aggregations.nested.nested.nested_value_terms.buckets.1.doc_count: 5} + + # Check nested value_keyword + - match: {aggregations.nested.nested.nested_value_keyword_terms.buckets.0.key: "1"} + - match: {aggregations.nested.nested.nested_value_keyword_terms.buckets.0.doc_count: 11} + - match: {aggregations.nested.nested.nested_value_keyword_terms.buckets.1.key: "2"} + - match: {aggregations.nested.nested.nested_value_keyword_terms.buckets.1.doc_count: 5} + --- "Normal fields": - do: search: - rest_total_hits_as_int: true - index: test - - match: {hits.total: 16} - - - do: - search: + size: 0 rest_total_hits_as_int: true index: test body: { @@ -124,6 +187,11 @@ setup: "significant_terms": { "field": "value" } + }, + "significant_terms_keyword": { + "significant_terms": { + "field": "value_keyword" + } } } } @@ -136,16 +204,67 @@ setup: - match: {aggregations.significant_terms.buckets.0.bg_count: 5} - close_to: { aggregations.significant_terms.buckets.0.score: {value: 0.3, error: 0.000001 }} + - match: {aggregations.significant_terms_keyword.doc_count: 8} + - match: {aggregations.significant_terms_keyword.bg_count: 16} + - length: {aggregations.significant_terms_keyword.buckets: 1} + - match: {aggregations.significant_terms_keyword.buckets.0.key: "2"} + - match: {aggregations.significant_terms_keyword.buckets.0.doc_count: 4} + - match: {aggregations.significant_terms_keyword.buckets.0.bg_count: 5} + - close_to: { aggregations.significant_terms_keyword.buckets.0.score: {value: 0.3, error: 0.000001 }} + --- "Nested fields": - do: search: + size: 0 rest_total_hits_as_int: true index: test - - match: {hits.total: 16} + body: { + "query": { + "terms": { "type": [ "outlier" ] } + }, + "aggs": { + "nested": { + "nested": { + "path": "nested" + }, + "aggs": { + "significant_terms": { + "significant_terms": { + "field": "nested.value" + } + }, + "significant_terms_keyword": { + "significant_terms": { + "field": "nested.value_keyword" + } + } + } + } + } + } + - match: {aggregations.nested.significant_terms.doc_count: 8} + - match: {aggregations.nested.significant_terms.bg_count: 16} + - length: {aggregations.nested.significant_terms.buckets: 1} + - match: {aggregations.nested.significant_terms.buckets.0.key: 2} + - match: {aggregations.nested.significant_terms.buckets.0.doc_count: 4} + - match: {aggregations.nested.significant_terms.buckets.0.bg_count: 5} + - close_to: { aggregations.nested.significant_terms.buckets.0.score: {value: 0.3, error: 0.000001 }} + + - match: {aggregations.nested.significant_terms_keyword.doc_count: 8} + - match: {aggregations.nested.significant_terms_keyword.bg_count: 16} + - length: {aggregations.nested.significant_terms_keyword.buckets: 1} + - match: {aggregations.nested.significant_terms_keyword.buckets.0.key: "2"} + - match: {aggregations.nested.significant_terms_keyword.buckets.0.doc_count: 4} + - match: {aggregations.nested.significant_terms_keyword.buckets.0.bg_count: 5} + - close_to: { aggregations.nested.significant_terms_keyword.buckets.0.score: {value: 0.3, error: 0.000001 }} + +--- +"Doubly nested fields": - do: search: + size: 0 rest_total_hits_as_int: true index: test body: { @@ -155,12 +274,17 @@ setup: "aggs": { "nested": { "nested": { - "path": "nested" + "path": "nested.nested" }, "aggs": { "significant_terms": { "significant_terms": { - "field": "nested.value" + "field": "nested.nested.value" + } + }, + "significant_terms_keyword": { + "significant_terms": { + "field": "nested.nested.value_keyword" } } } @@ -175,3 +299,11 @@ setup: - match: {aggregations.nested.significant_terms.buckets.0.doc_count: 4} - match: {aggregations.nested.significant_terms.buckets.0.bg_count: 5} - close_to: { aggregations.nested.significant_terms.buckets.0.score: {value: 0.3, error: 0.000001 }} + + - match: {aggregations.nested.significant_terms_keyword.doc_count: 8} + - match: {aggregations.nested.significant_terms_keyword.bg_count: 16} + - length: {aggregations.nested.significant_terms_keyword.buckets: 1} + - match: {aggregations.nested.significant_terms_keyword.buckets.0.key: "2"} + - match: {aggregations.nested.significant_terms_keyword.buckets.0.doc_count: 4} + - match: {aggregations.nested.significant_terms_keyword.buckets.0.bg_count: 5} + - close_to: { aggregations.nested.significant_terms_keyword.buckets.0.score: {value: 0.3, error: 0.000001 }} diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificanceLookup.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificanceLookup.java index 9bd0e7eac35a0..91b7a0df72c32 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificanceLookup.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificanceLookup.java @@ -19,6 +19,7 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.join.ScoreMode; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.lucene.index.FilterableTermsEnum; import org.elasticsearch.common.util.BigArrays; @@ -28,6 +29,7 @@ import org.elasticsearch.core.Releasable; import org.elasticsearch.core.Releasables; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.query.NestedQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.search.DocValueFormat; @@ -159,7 +161,7 @@ public void close() { * Get the background frequency of a {@link BytesRef} term. */ private long getBackgroundFrequency(BytesRef term) throws IOException { - return getBackgroundFrequency(context.buildQuery(new TermQueryBuilder(fieldType.name(), format.format(term).toString()))); + return getBackgroundFrequency(context.buildQuery(makeBackgroundFrequencyQuery(format.format(term).toString()))); } /** @@ -214,7 +216,18 @@ public void close() { * Get the background frequency of a {@code long} term. */ private long getBackgroundFrequency(long term) throws IOException { - return getBackgroundFrequency(context.buildQuery(new TermQueryBuilder(fieldType.name(), format.format(term).toString()))); + return getBackgroundFrequency(context.buildQuery(makeBackgroundFrequencyQuery(format.format(term).toString()))); + } + + private QueryBuilder makeBackgroundFrequencyQuery(String value) { + var nestedParentField = context.nestedLookup().getNestedParent(fieldType.name()); + QueryBuilder queryBuilder = new TermQueryBuilder(fieldType.name(), value); + + if (nestedParentField != null) { + queryBuilder = new NestedQueryBuilder(nestedParentField, queryBuilder, ScoreMode.Avg); + } + + return queryBuilder; } private long getBackgroundFrequency(Query query) throws IOException { From b734f00087a2696be9cdf7da0c5a6c6bb393ace2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Cea=20Fontenla?= Date: Mon, 26 May 2025 17:27:17 +0200 Subject: [PATCH 3/9] Update docs/changelog/128472.yaml --- docs/changelog/128472.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 docs/changelog/128472.yaml diff --git a/docs/changelog/128472.yaml b/docs/changelog/128472.yaml new file mode 100644 index 0000000000000..1213e099e3a06 --- /dev/null +++ b/docs/changelog/128472.yaml @@ -0,0 +1,6 @@ +pr: 128472 +summary: "Aggs: Fix significant terms not finding background docuemnts for nested\ + \ fields" +area: Aggregations +type: bug +issues: [] From 2f723f34610d65f2657c20865a4f94548016cb07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Cea=20Fontenla?= Date: Mon, 26 May 2025 18:11:38 +0200 Subject: [PATCH 4/9] Improved tests --- .../rest-api-spec/test/aggregations/sig_terms_nested.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml index 9ee0dab7d044c..b283aa5f6afae 100644 --- a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml +++ b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml @@ -132,12 +132,13 @@ setup: } # Check value + - length: {aggregations.value_terms.buckets: 2} - match: {aggregations.value_terms.buckets.0.key: 1} - match: {aggregations.value_terms.buckets.0.doc_count: 11} - match: {aggregations.value_terms.buckets.1.key: 2} - match: {aggregations.value_terms.buckets.1.doc_count: 5} - # Check value_keyword + - length: {aggregations.value_keyword_terms.buckets: 2} - match: {aggregations.value_keyword_terms.buckets.0.key: "1"} - match: {aggregations.value_keyword_terms.buckets.0.doc_count: 11} - match: {aggregations.value_keyword_terms.buckets.1.key: "2"} @@ -146,12 +147,13 @@ setup: # Nested - match: {aggregations.nested.doc_count: 16} # Check nested value + - length: {aggregations.nested.nested_value_terms.buckets: 2} - match: {aggregations.nested.nested_value_terms.buckets.0.key: 1} - match: {aggregations.nested.nested_value_terms.buckets.0.doc_count: 11} - match: {aggregations.nested.nested_value_terms.buckets.1.key: 2} - match: {aggregations.nested.nested_value_terms.buckets.1.doc_count: 5} - # Check nested value_keyword + - length: {aggregations.nested.nested_value_keyword_terms.buckets: 2} - match: {aggregations.nested.nested_value_keyword_terms.buckets.0.key: "1"} - match: {aggregations.nested.nested_value_keyword_terms.buckets.0.doc_count: 11} - match: {aggregations.nested.nested_value_keyword_terms.buckets.1.key: "2"} @@ -160,12 +162,13 @@ setup: # Nested>nested - match: {aggregations.nested.nested.doc_count: 16} # Check nested value + - length: {aggregations.nested.nested.nested_value_terms.buckets: 2} - match: {aggregations.nested.nested.nested_value_terms.buckets.0.key: 1} - match: {aggregations.nested.nested.nested_value_terms.buckets.0.doc_count: 11} - match: {aggregations.nested.nested.nested_value_terms.buckets.1.key: 2} - match: {aggregations.nested.nested.nested_value_terms.buckets.1.doc_count: 5} - # Check nested value_keyword + - length: {aggregations.nested.nested.nested_value_keyword_terms.buckets: 2} - match: {aggregations.nested.nested.nested_value_keyword_terms.buckets.0.key: "1"} - match: {aggregations.nested.nested.nested_value_keyword_terms.buckets.0.doc_count: 11} - match: {aggregations.nested.nested.nested_value_keyword_terms.buckets.1.key: "2"} From 138fb6fdd3e8761b4ae051f78bbf8a410b95b5a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Cea=20Fontenla?= Date: Tue, 27 May 2025 13:56:28 +0200 Subject: [PATCH 5/9] Added extra tests with background filters --- .../test/aggregations/sig_terms_nested.yml | 186 ++++++++++++++++++ 1 file changed, 186 insertions(+) diff --git a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml index b283aa5f6afae..af69408c96084 100644 --- a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml +++ b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml @@ -215,6 +215,92 @@ setup: - match: {aggregations.significant_terms_keyword.buckets.0.bg_count: 5} - close_to: { aggregations.significant_terms_keyword.buckets.0.score: {value: 0.3, error: 0.000001 }} +--- +"Normal fields with neutral background filter": + - do: + search: + size: 0 + rest_total_hits_as_int: true + index: test + body: { + "query": { + "terms": { "type": [ "outlier" ] } + }, + "aggs": { + "significant_terms": { + "significant_terms": { + "field": "value", + "background_filter": { + "terms": { "type": ["normal", "outlier"] } + } + } + }, + "significant_terms_keyword": { + "significant_terms": { + "field": "value_keyword", + "background_filter": { + "terms": { "type": ["normal", "outlier"] } + } + } + } + } + } + + - match: {aggregations.significant_terms.doc_count: 8} + - match: {aggregations.significant_terms.bg_count: 16} + - length: {aggregations.significant_terms.buckets: 1} + - match: {aggregations.significant_terms.buckets.0.key: 2} + - match: {aggregations.significant_terms.buckets.0.doc_count: 4} + - match: {aggregations.significant_terms.buckets.0.bg_count: 5} + - close_to: { aggregations.significant_terms.buckets.0.score: {value: 0.3, error: 0.000001 }} + + - match: {aggregations.significant_terms_keyword.doc_count: 8} + - match: {aggregations.significant_terms_keyword.bg_count: 16} + - length: {aggregations.significant_terms_keyword.buckets: 1} + - match: {aggregations.significant_terms_keyword.buckets.0.key: "2"} + - match: {aggregations.significant_terms_keyword.buckets.0.doc_count: 4} + - match: {aggregations.significant_terms_keyword.buckets.0.bg_count: 5} + - close_to: { aggregations.significant_terms_keyword.buckets.0.score: {value: 0.3, error: 0.000001 }} + +--- +"Normal fields with background filter": + - do: + search: + size: 0 + rest_total_hits_as_int: true + index: test + body: { + "query": { + "terms": { "type": [ "outlier" ] } + }, + "aggs": { + "significant_terms": { + "significant_terms": { + "field": "value", + "background_filter": { + "terms": { "type": ["outlier"] } + } + } + }, + "significant_terms_keyword": { + "significant_terms": { + "field": "value_keyword", + "background_filter": { + "terms": { "type": ["outlier"] } + } + } + } + } + } + + - match: {aggregations.significant_terms.doc_count: 8} + - match: {aggregations.significant_terms.bg_count: 8} + - length: {aggregations.significant_terms.buckets: 0} + + - match: {aggregations.significant_terms_keyword.doc_count: 8} + - match: {aggregations.significant_terms_keyword.bg_count: 8} + - length: {aggregations.significant_terms_keyword.buckets: 0} + --- "Nested fields": - do: @@ -263,6 +349,106 @@ setup: - match: {aggregations.nested.significant_terms_keyword.buckets.0.bg_count: 5} - close_to: { aggregations.nested.significant_terms_keyword.buckets.0.score: {value: 0.3, error: 0.000001 }} +--- +"Nested fields with neutral background filter": + - do: + search: + size: 0 + rest_total_hits_as_int: true + index: test + body: { + "query": { + "terms": { "type": [ "outlier" ] } + }, + "aggs": { + "nested": { + "nested": { + "path": "nested" + }, + "aggs": { + "significant_terms": { + "significant_terms": { + "field": "nested.value", + "background_filter": { + "terms": { "type": ["normal", "outlier"] } + } + } + }, + "significant_terms_keyword": { + "significant_terms": { + "field": "nested.value_keyword", + "background_filter": { + "terms": { "type": ["normal", "outlier"] } + } + } + } + } + } + } + } + + - match: {aggregations.nested.significant_terms.doc_count: 8} + - match: {aggregations.nested.significant_terms.bg_count: 16} + - length: {aggregations.nested.significant_terms.buckets: 1} + - match: {aggregations.nested.significant_terms.buckets.0.key: 2} + - match: {aggregations.nested.significant_terms.buckets.0.doc_count: 4} + - match: {aggregations.nested.significant_terms.buckets.0.bg_count: 5} + - close_to: { aggregations.nested.significant_terms.buckets.0.score: {value: 0.3, error: 0.000001 }} + + - match: {aggregations.nested.significant_terms_keyword.doc_count: 8} + - match: {aggregations.nested.significant_terms_keyword.bg_count: 16} + - length: {aggregations.nested.significant_terms_keyword.buckets: 1} + - match: {aggregations.nested.significant_terms_keyword.buckets.0.key: "2"} + - match: {aggregations.nested.significant_terms_keyword.buckets.0.doc_count: 4} + - match: {aggregations.nested.significant_terms_keyword.buckets.0.bg_count: 5} + - close_to: { aggregations.nested.significant_terms_keyword.buckets.0.score: {value: 0.3, error: 0.000001 }} + +--- +"Nested fields with background filter": + - do: + search: + size: 0 + rest_total_hits_as_int: true + index: test + body: { + "query": { + "terms": { "type": [ "outlier" ] } + }, + "aggs": { + "nested": { + "nested": { + "path": "nested" + }, + "aggs": { + "significant_terms": { + "significant_terms": { + "field": "nested.value", + "background_filter": { + "terms": { "type": ["outlier"] } + } + } + }, + "significant_terms_keyword": { + "significant_terms": { + "field": "nested.value_keyword", + "background_filter": { + "terms": { "type": ["outlier"] } + } + } + } + } + } + } + } + + - match: {aggregations.nested.significant_terms.doc_count: 8} + - match: {aggregations.nested.significant_terms.bg_count: 8} + - length: {aggregations.nested.significant_terms.buckets: 0} + + - match: {aggregations.nested.significant_terms_keyword.doc_count: 8} + - match: {aggregations.nested.significant_terms_keyword.bg_count: 8} + - length: {aggregations.nested.significant_terms_keyword.buckets: 0} + --- "Doubly nested fields": - do: From 0e22aaa762845c4ad3d163c77125546969067dbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Cea=20Fontenla?= Date: Tue, 27 May 2025 14:03:50 +0200 Subject: [PATCH 6/9] Added missing "type" field mapping --- .../rest-api-spec/test/aggregations/sig_terms_nested.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml index af69408c96084..da783ed7542da 100644 --- a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml +++ b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml @@ -9,7 +9,10 @@ setup: settings: number_of_shards: "1" mappings: + dynamic: "strict" properties: + type: + type: keyword value: type: integer value_keyword: From 4725ac5b0283fd4b84f68a80721e9eccd90266dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Cea=20Fontenla?= Date: Tue, 27 May 2025 14:09:11 +0200 Subject: [PATCH 7/9] Minor style refactor --- .../search/aggregations/bucket/terms/SignificanceLookup.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificanceLookup.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificanceLookup.java index 91b7a0df72c32..3395f61cd51ed 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificanceLookup.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificanceLookup.java @@ -220,9 +220,9 @@ private long getBackgroundFrequency(long term) throws IOException { } private QueryBuilder makeBackgroundFrequencyQuery(String value) { - var nestedParentField = context.nestedLookup().getNestedParent(fieldType.name()); QueryBuilder queryBuilder = new TermQueryBuilder(fieldType.name(), value); + var nestedParentField = context.nestedLookup().getNestedParent(fieldType.name()); if (nestedParentField != null) { queryBuilder = new NestedQueryBuilder(nestedParentField, queryBuilder, ScoreMode.Avg); } From 85a7b9cea50cdb5ddf28b0421984dacd2e713a84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Cea=20Fontenla?= Date: Tue, 27 May 2025 15:04:32 +0200 Subject: [PATCH 8/9] Update docs/changelog/128472.yaml --- docs/changelog/128472.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/changelog/128472.yaml b/docs/changelog/128472.yaml index 1213e099e3a06..ad15127cab432 100644 --- a/docs/changelog/128472.yaml +++ b/docs/changelog/128472.yaml @@ -3,4 +3,5 @@ summary: "Aggs: Fix significant terms not finding background docuemnts for neste \ fields" area: Aggregations type: bug -issues: [] +issues: + - 101163 From 43b0087e7168c36f0fa0cc97cce7207739ec9424 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Cea=20Fontenla?= Date: Thu, 29 May 2025 14:17:31 +0200 Subject: [PATCH 9/9] Added capability to avoid running tests in clusters without the fix --- .../rest-api-spec/test/aggregations/sig_terms_nested.yml | 7 ++++++- .../rest/action/search/SearchCapabilities.java | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml index da783ed7542da..97e2c94b9f22f 100644 --- a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml +++ b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml @@ -1,6 +1,11 @@ setup: - requires: - test_runner_features: close_to + capabilities: + - method: POST + path: /_search + capabilities: [ significant_terms_on_nested_fields ] + test_runner_features: [capabilities, close_to] + reason: "bug fix" - do: indices.create: diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java index f8d3a3a65abc5..8b6b48cb5b077 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java @@ -50,6 +50,8 @@ private SearchCapabilities() {} private static final String SIGNIFICANT_TERMS_BACKGROUND_FILTER_AS_SUB = "significant_terms_background_filter_as_sub"; + private static final String SIGNIFICANT_TERMS_ON_NESTED_FIELDS = "significant_terms_on_nested_fields"; + public static final Set CAPABILITIES; static { HashSet capabilities = new HashSet<>(); @@ -69,6 +71,7 @@ private SearchCapabilities() {} capabilities.add(HIGHLIGHT_MAX_ANALYZED_OFFSET_DEFAULT); capabilities.add(INDEX_SELECTOR_SYNTAX); capabilities.add(SIGNIFICANT_TERMS_BACKGROUND_FILTER_AS_SUB); + capabilities.add(SIGNIFICANT_TERMS_ON_NESTED_FIELDS); CAPABILITIES = Set.copyOf(capabilities); } }