From 18b1a13ec5a3323594a23ec6e83fad499cd7575c Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 21 Mar 2025 13:00:25 -0400 Subject: [PATCH] Aggs: Let terms run in global ords mode no match (#124782) Allows the `terms` agg to run with global ords if the top level query matches no docs *but* the agg is configured to collect buckets with 0 docs. --- docs/changelog/124782.yaml | 5 + .../bucket/terms/TermsAggregatorFactory.java | 6 +- .../bucket/terms/TermsAggregatorTests.java | 107 ++++++++++++++++-- .../aggregations/AggregatorTestCase.java | 37 +++++- 4 files changed, 146 insertions(+), 9 deletions(-) create mode 100644 docs/changelog/124782.yaml diff --git a/docs/changelog/124782.yaml b/docs/changelog/124782.yaml new file mode 100644 index 0000000000000..638a5a48e0578 --- /dev/null +++ b/docs/changelog/124782.yaml @@ -0,0 +1,5 @@ +pr: 124782 +summary: "Aggs: Let terms run in global ords mode no match" +area: Aggregations +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index da5ae37b08228..61a640e94d2d5 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -109,9 +109,13 @@ private static TermsAggregatorSupplier bytesSupplier() { ExecutionMode execution = null; if (executionHint != null) { execution = ExecutionMode.fromString(executionHint); + } else { + if (matchNoDocs(context, parent) && bucketCountThresholds.getMinDocCount() > 0) { + execution = ExecutionMode.MAP; + } } // In some cases, using ordinals is just not supported: override it - if (valuesSource.hasOrdinals() == false || matchNoDocs(context, parent)) { + if (valuesSource.hasOrdinals() == false) { execution = ExecutionMode.MAP; } if (execution == null) { diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java index b267cb2e656b6..7b5b6a65d61ad 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java @@ -141,6 +141,7 @@ import static org.elasticsearch.search.aggregations.PipelineAggregatorBuilders.bucketScript; import static org.elasticsearch.test.MapMatcher.assertMap; import static org.elasticsearch.test.MapMatcher.matchesMap; +import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.either; import static org.hamcrest.Matchers.equalTo; @@ -286,8 +287,18 @@ public void testSimple() throws Exception { } public void testMatchNoDocsQuery() throws Exception { + for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) { + testMatchNoDocsQuery(executionMode); + } + testMatchNoDocsQuery(null); + } + + private void testMatchNoDocsQuery(TermsAggregatorFactory.ExecutionMode hint) throws Exception { MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("string", randomBoolean(), true, Collections.emptyMap()); TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name").field("string"); + if (hint != null) { + aggregationBuilder.executionHint(hint.toString()); + } CheckedConsumer createIndex = iw -> { iw.addDocument(doc(fieldType, "a", "b")); iw.addDocument(doc(fieldType, "", "c", "a")); @@ -298,11 +309,8 @@ public void testMatchNoDocsQuery() throws Exception { createIndex, (InternalTerms result) -> { assertEquals(0, result.getBuckets().size()); }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new MatchNoDocsQuery()) + .withCheckAggregator(checkTopLevelAggregator(hint == null ? TermsAggregatorFactory.ExecutionMode.MAP : hint)) ); - - debugTestCase(aggregationBuilder, new MatchNoDocsQuery(), createIndex, (result, impl, debug) -> { - assertEquals(impl, MapStringTermsAggregator.class); - }, fieldType); } public void testStringShardMinDocCount() throws IOException { @@ -337,7 +345,6 @@ public void testStringShardZeroMinDocCount() throws IOException { .executionHint(executionMode.toString()) .size(2) .minDocCount(0) - .executionHint("map") .excludeDeletedDocs(true) .order(BucketOrder.key(true)); @@ -360,7 +367,80 @@ public void testStringShardZeroMinDocCount() throws IOException { assertEquals("b", result.getBuckets().get(1).getKeyAsString()); } assertEquals(0L, result.getBuckets().get(1).getDocCount()); - }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e")))); + }, + new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e"))) + .withCheckAggregator(checkTopLevelAggregator(executionMode)) + ); + } + + { + boolean delete = randomBoolean(); + // force single shard/segment + testCase(iw -> { + // force single shard/segment + iw.addDocuments( + Arrays.asList(doc(fieldType, "a"), doc(fieldType, "c", "d"), doc(fieldType, "b", "d"), doc(fieldType, "b")) + ); + if (delete) { + iw.deleteDocuments(new TermQuery(new Term("string", "b"))); + } + }, (InternalTerms result) -> { + assertEquals(2, result.getBuckets().size()); + assertEquals("a", result.getBuckets().get(0).getKeyAsString()); + assertEquals(0L, result.getBuckets().get(0).getDocCount()); + if (delete) { + assertEquals("c", result.getBuckets().get(1).getKeyAsString()); + } else { + assertEquals("b", result.getBuckets().get(1).getKeyAsString()); + } + assertEquals(0L, result.getBuckets().get(1).getDocCount()); + }, + new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e"))) + .withCheckAggregator(checkTopLevelAggregator(executionMode)) + ); + } + } + } + + /** + * Asserts that sane output for {@code min_doc_count: 0} when the top level + * query is {@code match_none}. Especially important is that we respect the + * execution hint in this case. It's a lot faster to collect {@code min_doc_count: 0} + * via ordinals. + */ + public void testStringZeroMinDocCountMatchNone() throws IOException { + MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("string", true, true, Collections.emptyMap()); + for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) { + TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name").field("string") + .executionHint(executionMode.toString()) + .size(2) + .minDocCount(0) + .excludeDeletedDocs(true) + .order(BucketOrder.key(true)); + + { + boolean delete = randomBoolean(); + // force single shard/segment + testCase(iw -> { + // force single shard/segment + iw.addDocuments(Arrays.asList(doc(fieldType, "a"), doc(fieldType, "b"), doc(fieldType, "c"), doc(fieldType, "d"))); + if (delete) { + iw.deleteDocuments(new TermQuery(new Term("string", "b"))); + } + }, (InternalTerms result) -> { + assertEquals(2, result.getBuckets().size()); + assertEquals("a", result.getBuckets().get(0).getKeyAsString()); + assertEquals(0L, result.getBuckets().get(0).getDocCount()); + if (delete) { + assertEquals("c", result.getBuckets().get(1).getKeyAsString()); + } else { + assertEquals("b", result.getBuckets().get(1).getKeyAsString()); + } + assertEquals(0L, result.getBuckets().get(1).getDocCount()); + }, + new AggTestConfig(aggregationBuilder, fieldType).withQuery(new MatchNoDocsQuery()) + .withCheckAggregator(checkTopLevelAggregator(executionMode)) + ); } { @@ -384,7 +464,10 @@ public void testStringShardZeroMinDocCount() throws IOException { assertEquals("b", result.getBuckets().get(1).getKeyAsString()); } assertEquals(0L, result.getBuckets().get(1).getDocCount()); - }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e")))); + }, + new AggTestConfig(aggregationBuilder, fieldType).withQuery(new MatchNoDocsQuery()) + .withCheckAggregator(checkTopLevelAggregator(executionMode)) + ); } } } @@ -2279,4 +2362,14 @@ protected List objectMappers() { private String randomHint() { return randomFrom(TermsAggregatorFactory.ExecutionMode.values()).toString(); } + + private Consumer checkTopLevelAggregator(TermsAggregatorFactory.ExecutionMode executionMode) { + return switch (executionMode) { + case MAP -> a -> assertThat(a, instanceOf(MapStringTermsAggregator.class)); + case GLOBAL_ORDINALS -> a -> assertThat( + a, + anyOf(instanceOf(GlobalOrdinalsStringTermsAggregator.class), instanceOf(StringTermsAggregatorFromFilters.class)) + ); + }; + } } diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index d034e6e6679c1..7f92d5e5e47dc 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -678,6 +678,7 @@ private A searchAndReduce( } assertEquals(shouldBeCached, context.isCacheable()); List internalAggregations = List.of(a.buildTopLevel()); + aggTestConfig.checkAggregator().accept(a); assertRoundTrip(internalAggregations); internalAggs.add(InternalAggregations.from(internalAggregations)); } finally { @@ -1696,11 +1697,23 @@ public record AggTestConfig( boolean useLogDocMergePolicy, boolean testReductionCancellation, + Consumer checkAggregator, MappedFieldType... fieldTypes ) { public AggTestConfig(AggregationBuilder builder, MappedFieldType... fieldTypes) { - this(new MatchAllDocsQuery(), builder, DEFAULT_MAX_BUCKETS, randomBoolean(), true, randomBoolean(), false, true, fieldTypes); + this( + new MatchAllDocsQuery(), + builder, + DEFAULT_MAX_BUCKETS, + randomBoolean(), + true, + randomBoolean(), + false, + true, + a -> {}, + fieldTypes + ); } public AggTestConfig withQuery(Query query) { @@ -1713,6 +1726,7 @@ public AggTestConfig withQuery(Query query) { incrementalReduce, useLogDocMergePolicy, testReductionCancellation, + checkAggregator, fieldTypes ); } @@ -1727,6 +1741,7 @@ public AggTestConfig withSplitLeavesIntoSeperateAggregators(boolean splitLeavesI incrementalReduce, useLogDocMergePolicy, testReductionCancellation, + checkAggregator, fieldTypes ); } @@ -1741,6 +1756,7 @@ public AggTestConfig withShouldBeCached(boolean shouldBeCached) { incrementalReduce, useLogDocMergePolicy, testReductionCancellation, + checkAggregator, fieldTypes ); } @@ -1755,6 +1771,7 @@ public AggTestConfig withMaxBuckets(int maxBuckets) { incrementalReduce, useLogDocMergePolicy, testReductionCancellation, + checkAggregator, fieldTypes ); } @@ -1769,6 +1786,7 @@ public AggTestConfig withIncrementalReduce(boolean incrementalReduce) { incrementalReduce, useLogDocMergePolicy, testReductionCancellation, + checkAggregator, fieldTypes ); } @@ -1783,6 +1801,7 @@ public AggTestConfig withLogDocMergePolicy() { incrementalReduce, true, testReductionCancellation, + checkAggregator, fieldTypes ); } @@ -1797,6 +1816,22 @@ public AggTestConfig noReductionCancellation() { incrementalReduce, useLogDocMergePolicy, false, + checkAggregator, + fieldTypes + ); + } + + public AggTestConfig withCheckAggregator(Consumer checkAggregator) { + return new AggTestConfig( + query, + builder, + maxBuckets, + splitLeavesIntoSeparateAggregators, + shouldBeCached, + incrementalReduce, + useLogDocMergePolicy, + testReductionCancellation, + checkAggregator, fieldTypes ); }