diff --git a/docs/changelog/128472.yaml b/docs/changelog/128472.yaml new file mode 100644 index 0000000000000..ad15127cab432 --- /dev/null +++ b/docs/changelog/128472.yaml @@ -0,0 +1,7 @@ +pr: 128472 +summary: "Aggs: Fix significant terms not finding background docuemnts for nested\ + \ fields" +area: Aggregations +type: bug +issues: + - 101163 diff --git a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml new file mode 100644 index 0000000000000..97e2c94b9f22f --- /dev/null +++ b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/sig_terms_nested.yml @@ -0,0 +1,506 @@ +setup: + - requires: + capabilities: + - method: POST + path: /_search + capabilities: [ significant_terms_on_nested_fields ] + test_runner_features: [capabilities, close_to] + reason: "bug fix" + + - do: + indices.create: + index: test + body: + settings: + number_of_shards: "1" + mappings: + dynamic: "strict" + properties: + type: + type: keyword + value: + type: integer + value_keyword: + type: keyword + nested: + type: nested + properties: + value: + type: integer + value_keyword: + type: keyword + nested: + type: nested + properties: + value: + type: integer + value_keyword: + type: keyword + + # Type:normal has many "1" and just one "2". Type:outlier has the same amount of "1" and "2" + - do: + bulk: + index: test + refresh: true + body: + - '{ "index": {} }' + - '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' + - '{ "index": {} }' + - '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' + - '{ "index": {} }' + - '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' + - '{ "index": {} }' + - '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' + - '{ "index": {} }' + - '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' + - '{ "index": {} }' + - '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' + - '{ "index": {} }' + - '{ "type": "normal", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' + - '{ "index": {} }' + - '{ "type": "normal", "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2" } } }' + + - '{ "index": {} }' + - '{ "type": "outlier", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' + - '{ "index": {} }' + - '{ "type": "outlier", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' + - '{ "index": {} }' + - '{ "type": "outlier", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' + - '{ "index": {} }' + - '{ "type": "outlier", "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1", "nested": { "value": 1, "value_keyword": "1" } } }' + - '{ "index": {} }' + - '{ "type": "outlier", "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2" } } }' + - '{ "index": {} }' + - '{ "type": "outlier", "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2" } } }' + - '{ "index": {} }' + - '{ "type": "outlier", "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2" } } }' + - '{ "index": {} }' + - '{ "type": "outlier", "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2", "nested": { "value": 2, "value_keyword": "2" } } }' + +--- +"Data checks": + - do: + search: + rest_total_hits_as_int: true + index: test + - match: {hits.total: 16} + + - do: + search: + size: 0 + rest_total_hits_as_int: true + index: test + body: { + "aggs": { + "value_terms": { + "terms": { + "field": "value" + } + }, + "value_keyword_terms": { + "terms": { + "field": "value_keyword" + } + }, + "nested": { + "nested": { + "path": "nested" + }, + "aggs": { + "nested_value_terms": { + "terms": { + "field": "nested.value" + } + }, + "nested_value_keyword_terms": { + "terms": { + "field": "nested.value_keyword" + } + }, + "nested": { + "nested": { + "path": "nested.nested" + }, + "aggs": { + "nested_value_terms": { + "terms": { + "field": "nested.nested.value" + } + }, + "nested_value_keyword_terms": { + "terms": { + "field": "nested.nested.value_keyword" + } + } + } + } + } + } + } + } + + # Check value + - length: {aggregations.value_terms.buckets: 2} + - match: {aggregations.value_terms.buckets.0.key: 1} + - match: {aggregations.value_terms.buckets.0.doc_count: 11} + - match: {aggregations.value_terms.buckets.1.key: 2} + - match: {aggregations.value_terms.buckets.1.doc_count: 5} + # Check value_keyword + - length: {aggregations.value_keyword_terms.buckets: 2} + - match: {aggregations.value_keyword_terms.buckets.0.key: "1"} + - match: {aggregations.value_keyword_terms.buckets.0.doc_count: 11} + - match: {aggregations.value_keyword_terms.buckets.1.key: "2"} + - match: {aggregations.value_keyword_terms.buckets.1.doc_count: 5} + + # Nested + - match: {aggregations.nested.doc_count: 16} + # Check nested value + - length: {aggregations.nested.nested_value_terms.buckets: 2} + - match: {aggregations.nested.nested_value_terms.buckets.0.key: 1} + - match: {aggregations.nested.nested_value_terms.buckets.0.doc_count: 11} + - match: {aggregations.nested.nested_value_terms.buckets.1.key: 2} + - match: {aggregations.nested.nested_value_terms.buckets.1.doc_count: 5} + # Check nested value_keyword + - length: {aggregations.nested.nested_value_keyword_terms.buckets: 2} + - match: {aggregations.nested.nested_value_keyword_terms.buckets.0.key: "1"} + - match: {aggregations.nested.nested_value_keyword_terms.buckets.0.doc_count: 11} + - match: {aggregations.nested.nested_value_keyword_terms.buckets.1.key: "2"} + - match: {aggregations.nested.nested_value_keyword_terms.buckets.1.doc_count: 5} + + # Nested>nested + - match: {aggregations.nested.nested.doc_count: 16} + # Check nested value + - length: {aggregations.nested.nested.nested_value_terms.buckets: 2} + - match: {aggregations.nested.nested.nested_value_terms.buckets.0.key: 1} + - match: {aggregations.nested.nested.nested_value_terms.buckets.0.doc_count: 11} + - match: {aggregations.nested.nested.nested_value_terms.buckets.1.key: 2} + - match: {aggregations.nested.nested.nested_value_terms.buckets.1.doc_count: 5} + # Check nested value_keyword + - length: {aggregations.nested.nested.nested_value_keyword_terms.buckets: 2} + - match: {aggregations.nested.nested.nested_value_keyword_terms.buckets.0.key: "1"} + - match: {aggregations.nested.nested.nested_value_keyword_terms.buckets.0.doc_count: 11} + - match: {aggregations.nested.nested.nested_value_keyword_terms.buckets.1.key: "2"} + - match: {aggregations.nested.nested.nested_value_keyword_terms.buckets.1.doc_count: 5} + +--- +"Normal fields": + - do: + search: + size: 0 + rest_total_hits_as_int: true + index: test + body: { + "query": { + "terms": { "type": [ "outlier" ] } + }, + "aggs": { + "significant_terms": { + "significant_terms": { + "field": "value" + } + }, + "significant_terms_keyword": { + "significant_terms": { + "field": "value_keyword" + } + } + } + } + + - match: {aggregations.significant_terms.doc_count: 8} + - match: {aggregations.significant_terms.bg_count: 16} + - length: {aggregations.significant_terms.buckets: 1} + - match: {aggregations.significant_terms.buckets.0.key: 2} + - match: {aggregations.significant_terms.buckets.0.doc_count: 4} + - match: {aggregations.significant_terms.buckets.0.bg_count: 5} + - close_to: { aggregations.significant_terms.buckets.0.score: {value: 0.3, error: 0.000001 }} + + - match: {aggregations.significant_terms_keyword.doc_count: 8} + - match: {aggregations.significant_terms_keyword.bg_count: 16} + - length: {aggregations.significant_terms_keyword.buckets: 1} + - match: {aggregations.significant_terms_keyword.buckets.0.key: "2"} + - match: {aggregations.significant_terms_keyword.buckets.0.doc_count: 4} + - match: {aggregations.significant_terms_keyword.buckets.0.bg_count: 5} + - close_to: { aggregations.significant_terms_keyword.buckets.0.score: {value: 0.3, error: 0.000001 }} + +--- +"Normal fields with neutral background filter": + - do: + search: + size: 0 + rest_total_hits_as_int: true + index: test + body: { + "query": { + "terms": { "type": [ "outlier" ] } + }, + "aggs": { + "significant_terms": { + "significant_terms": { + "field": "value", + "background_filter": { + "terms": { "type": ["normal", "outlier"] } + } + } + }, + "significant_terms_keyword": { + "significant_terms": { + "field": "value_keyword", + "background_filter": { + "terms": { "type": ["normal", "outlier"] } + } + } + } + } + } + + - match: {aggregations.significant_terms.doc_count: 8} + - match: {aggregations.significant_terms.bg_count: 16} + - length: {aggregations.significant_terms.buckets: 1} + - match: {aggregations.significant_terms.buckets.0.key: 2} + - match: {aggregations.significant_terms.buckets.0.doc_count: 4} + - match: {aggregations.significant_terms.buckets.0.bg_count: 5} + - close_to: { aggregations.significant_terms.buckets.0.score: {value: 0.3, error: 0.000001 }} + + - match: {aggregations.significant_terms_keyword.doc_count: 8} + - match: {aggregations.significant_terms_keyword.bg_count: 16} + - length: {aggregations.significant_terms_keyword.buckets: 1} + - match: {aggregations.significant_terms_keyword.buckets.0.key: "2"} + - match: {aggregations.significant_terms_keyword.buckets.0.doc_count: 4} + - match: {aggregations.significant_terms_keyword.buckets.0.bg_count: 5} + - close_to: { aggregations.significant_terms_keyword.buckets.0.score: {value: 0.3, error: 0.000001 }} + +--- +"Normal fields with background filter": + - do: + search: + size: 0 + rest_total_hits_as_int: true + index: test + body: { + "query": { + "terms": { "type": [ "outlier" ] } + }, + "aggs": { + "significant_terms": { + "significant_terms": { + "field": "value", + "background_filter": { + "terms": { "type": ["outlier"] } + } + } + }, + "significant_terms_keyword": { + "significant_terms": { + "field": "value_keyword", + "background_filter": { + "terms": { "type": ["outlier"] } + } + } + } + } + } + + - match: {aggregations.significant_terms.doc_count: 8} + - match: {aggregations.significant_terms.bg_count: 8} + - length: {aggregations.significant_terms.buckets: 0} + + - match: {aggregations.significant_terms_keyword.doc_count: 8} + - match: {aggregations.significant_terms_keyword.bg_count: 8} + - length: {aggregations.significant_terms_keyword.buckets: 0} + +--- +"Nested fields": + - do: + search: + size: 0 + rest_total_hits_as_int: true + index: test + body: { + "query": { + "terms": { "type": [ "outlier" ] } + }, + "aggs": { + "nested": { + "nested": { + "path": "nested" + }, + "aggs": { + "significant_terms": { + "significant_terms": { + "field": "nested.value" + } + }, + "significant_terms_keyword": { + "significant_terms": { + "field": "nested.value_keyword" + } + } + } + } + } + } + + - match: {aggregations.nested.significant_terms.doc_count: 8} + - match: {aggregations.nested.significant_terms.bg_count: 16} + - length: {aggregations.nested.significant_terms.buckets: 1} + - match: {aggregations.nested.significant_terms.buckets.0.key: 2} + - match: {aggregations.nested.significant_terms.buckets.0.doc_count: 4} + - match: {aggregations.nested.significant_terms.buckets.0.bg_count: 5} + - close_to: { aggregations.nested.significant_terms.buckets.0.score: {value: 0.3, error: 0.000001 }} + + - match: {aggregations.nested.significant_terms_keyword.doc_count: 8} + - match: {aggregations.nested.significant_terms_keyword.bg_count: 16} + - length: {aggregations.nested.significant_terms_keyword.buckets: 1} + - match: {aggregations.nested.significant_terms_keyword.buckets.0.key: "2"} + - match: {aggregations.nested.significant_terms_keyword.buckets.0.doc_count: 4} + - match: {aggregations.nested.significant_terms_keyword.buckets.0.bg_count: 5} + - close_to: { aggregations.nested.significant_terms_keyword.buckets.0.score: {value: 0.3, error: 0.000001 }} + +--- +"Nested fields with neutral background filter": + - do: + search: + size: 0 + rest_total_hits_as_int: true + index: test + body: { + "query": { + "terms": { "type": [ "outlier" ] } + }, + "aggs": { + "nested": { + "nested": { + "path": "nested" + }, + "aggs": { + "significant_terms": { + "significant_terms": { + "field": "nested.value", + "background_filter": { + "terms": { "type": ["normal", "outlier"] } + } + } + }, + "significant_terms_keyword": { + "significant_terms": { + "field": "nested.value_keyword", + "background_filter": { + "terms": { "type": ["normal", "outlier"] } + } + } + } + } + } + } + } + + - match: {aggregations.nested.significant_terms.doc_count: 8} + - match: {aggregations.nested.significant_terms.bg_count: 16} + - length: {aggregations.nested.significant_terms.buckets: 1} + - match: {aggregations.nested.significant_terms.buckets.0.key: 2} + - match: {aggregations.nested.significant_terms.buckets.0.doc_count: 4} + - match: {aggregations.nested.significant_terms.buckets.0.bg_count: 5} + - close_to: { aggregations.nested.significant_terms.buckets.0.score: {value: 0.3, error: 0.000001 }} + + - match: {aggregations.nested.significant_terms_keyword.doc_count: 8} + - match: {aggregations.nested.significant_terms_keyword.bg_count: 16} + - length: {aggregations.nested.significant_terms_keyword.buckets: 1} + - match: {aggregations.nested.significant_terms_keyword.buckets.0.key: "2"} + - match: {aggregations.nested.significant_terms_keyword.buckets.0.doc_count: 4} + - match: {aggregations.nested.significant_terms_keyword.buckets.0.bg_count: 5} + - close_to: { aggregations.nested.significant_terms_keyword.buckets.0.score: {value: 0.3, error: 0.000001 }} + +--- +"Nested fields with background filter": + - do: + search: + size: 0 + rest_total_hits_as_int: true + index: test + body: { + "query": { + "terms": { "type": [ "outlier" ] } + }, + "aggs": { + "nested": { + "nested": { + "path": "nested" + }, + "aggs": { + "significant_terms": { + "significant_terms": { + "field": "nested.value", + "background_filter": { + "terms": { "type": ["outlier"] } + } + } + }, + "significant_terms_keyword": { + "significant_terms": { + "field": "nested.value_keyword", + "background_filter": { + "terms": { "type": ["outlier"] } + } + } + } + } + } + } + } + + - match: {aggregations.nested.significant_terms.doc_count: 8} + - match: {aggregations.nested.significant_terms.bg_count: 8} + - length: {aggregations.nested.significant_terms.buckets: 0} + + - match: {aggregations.nested.significant_terms_keyword.doc_count: 8} + - match: {aggregations.nested.significant_terms_keyword.bg_count: 8} + - length: {aggregations.nested.significant_terms_keyword.buckets: 0} + +--- +"Doubly nested fields": + - do: + search: + size: 0 + rest_total_hits_as_int: true + index: test + body: { + "query": { + "terms": { "type": [ "outlier" ] } + }, + "aggs": { + "nested": { + "nested": { + "path": "nested.nested" + }, + "aggs": { + "significant_terms": { + "significant_terms": { + "field": "nested.nested.value" + } + }, + "significant_terms_keyword": { + "significant_terms": { + "field": "nested.nested.value_keyword" + } + } + } + } + } + } + + - match: {aggregations.nested.significant_terms.doc_count: 8} + - match: {aggregations.nested.significant_terms.bg_count: 16} + - length: {aggregations.nested.significant_terms.buckets: 1} + - match: {aggregations.nested.significant_terms.buckets.0.key: 2} + - match: {aggregations.nested.significant_terms.buckets.0.doc_count: 4} + - match: {aggregations.nested.significant_terms.buckets.0.bg_count: 5} + - close_to: { aggregations.nested.significant_terms.buckets.0.score: {value: 0.3, error: 0.000001 }} + + - match: {aggregations.nested.significant_terms_keyword.doc_count: 8} + - match: {aggregations.nested.significant_terms_keyword.bg_count: 16} + - length: {aggregations.nested.significant_terms_keyword.buckets: 1} + - match: {aggregations.nested.significant_terms_keyword.buckets.0.key: "2"} + - match: {aggregations.nested.significant_terms_keyword.buckets.0.doc_count: 4} + - match: {aggregations.nested.significant_terms_keyword.buckets.0.bg_count: 5} + - close_to: { aggregations.nested.significant_terms_keyword.buckets.0.score: {value: 0.3, error: 0.000001 }} diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java index f8d3a3a65abc5..8b6b48cb5b077 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/SearchCapabilities.java @@ -50,6 +50,8 @@ private SearchCapabilities() {} private static final String SIGNIFICANT_TERMS_BACKGROUND_FILTER_AS_SUB = "significant_terms_background_filter_as_sub"; + private static final String SIGNIFICANT_TERMS_ON_NESTED_FIELDS = "significant_terms_on_nested_fields"; + public static final Set CAPABILITIES; static { HashSet capabilities = new HashSet<>(); @@ -69,6 +71,7 @@ private SearchCapabilities() {} capabilities.add(HIGHLIGHT_MAX_ANALYZED_OFFSET_DEFAULT); capabilities.add(INDEX_SELECTOR_SYNTAX); capabilities.add(SIGNIFICANT_TERMS_BACKGROUND_FILTER_AS_SUB); + capabilities.add(SIGNIFICANT_TERMS_ON_NESTED_FIELDS); CAPABILITIES = Set.copyOf(capabilities); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificanceLookup.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificanceLookup.java index 9bd0e7eac35a0..3395f61cd51ed 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificanceLookup.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificanceLookup.java @@ -19,6 +19,7 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.join.ScoreMode; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.lucene.index.FilterableTermsEnum; import org.elasticsearch.common.util.BigArrays; @@ -28,6 +29,7 @@ import org.elasticsearch.core.Releasable; import org.elasticsearch.core.Releasables; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.query.NestedQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.search.DocValueFormat; @@ -159,7 +161,7 @@ public void close() { * Get the background frequency of a {@link BytesRef} term. */ private long getBackgroundFrequency(BytesRef term) throws IOException { - return getBackgroundFrequency(context.buildQuery(new TermQueryBuilder(fieldType.name(), format.format(term).toString()))); + return getBackgroundFrequency(context.buildQuery(makeBackgroundFrequencyQuery(format.format(term).toString()))); } /** @@ -214,7 +216,18 @@ public void close() { * Get the background frequency of a {@code long} term. */ private long getBackgroundFrequency(long term) throws IOException { - return getBackgroundFrequency(context.buildQuery(new TermQueryBuilder(fieldType.name(), format.format(term).toString()))); + return getBackgroundFrequency(context.buildQuery(makeBackgroundFrequencyQuery(format.format(term).toString()))); + } + + private QueryBuilder makeBackgroundFrequencyQuery(String value) { + QueryBuilder queryBuilder = new TermQueryBuilder(fieldType.name(), value); + + var nestedParentField = context.nestedLookup().getNestedParent(fieldType.name()); + if (nestedParentField != null) { + queryBuilder = new NestedQueryBuilder(nestedParentField, queryBuilder, ScoreMode.Avg); + } + + return queryBuilder; } private long getBackgroundFrequency(Query query) throws IOException {