From 91283ed31a229b1d22dba1ee4de758234cba7cdf Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 13 Mar 2025 12:43:01 -0400 Subject: [PATCH 1/3] Aggs: Let terms run in global ords mode no match Allows the `terms` agg to run with global ords if the top level query matches no docs *but* the agg is configured to collect buckets with 0 docs. --- .../bucket/terms/TermsAggregatorFactory.java | 5 +- .../bucket/terms/TermsAggregatorTests.java | 89 ++++++++++++++++++- .../aggregations/AggregatorTestCase.java | 37 +++++++- 3 files changed, 126 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index 5a692b934a41c..101b3698e3788 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -111,7 +111,10 @@ private static TermsAggregatorSupplier bytesSupplier() { execution = ExecutionMode.fromString(executionHint); } // In some cases, using ordinals is just not supported: override it - if (valuesSource.hasOrdinals() == false || matchNoDocs(context, parent)) { + if (valuesSource.hasOrdinals() == false) { + execution = ExecutionMode.MAP; + } + if (matchNoDocs(context, parent) && bucketCountThresholds.getMinDocCount() > 0) { execution = ExecutionMode.MAP; } if (execution == null) { diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java index b267cb2e656b6..33429d038002e 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java @@ -298,6 +298,7 @@ public void testMatchNoDocsQuery() throws Exception { createIndex, (InternalTerms result) -> { assertEquals(0, result.getBuckets().size()); }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new MatchNoDocsQuery()) + .withCheckAggregator(checkTopLevelAggregatorNoRewrites(TermsAggregatorFactory.ExecutionMode.MAP)) ); debugTestCase(aggregationBuilder, new MatchNoDocsQuery(), createIndex, (result, impl, debug) -> { @@ -337,7 +338,6 @@ public void testStringShardZeroMinDocCount() throws IOException { .executionHint(executionMode.toString()) .size(2) .minDocCount(0) - .executionHint("map") .excludeDeletedDocs(true) .order(BucketOrder.key(true)); @@ -360,7 +360,80 @@ public void testStringShardZeroMinDocCount() throws IOException { assertEquals("b", result.getBuckets().get(1).getKeyAsString()); } assertEquals(0L, result.getBuckets().get(1).getDocCount()); - }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e")))); + }, + new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e"))) + .withCheckAggregator(checkTopLevelAggregatorNoRewrites(executionMode)) + ); + } + + { + boolean delete = randomBoolean(); + // force single shard/segment + testCase(iw -> { + // force single shard/segment + iw.addDocuments( + Arrays.asList(doc(fieldType, "a"), doc(fieldType, "c", "d"), doc(fieldType, "b", "d"), doc(fieldType, "b")) + ); + if (delete) { + iw.deleteDocuments(new TermQuery(new Term("string", "b"))); + } + }, (InternalTerms result) -> { + assertEquals(2, result.getBuckets().size()); + assertEquals("a", result.getBuckets().get(0).getKeyAsString()); + assertEquals(0L, result.getBuckets().get(0).getDocCount()); + if (delete) { + assertEquals("c", result.getBuckets().get(1).getKeyAsString()); + } else { + assertEquals("b", result.getBuckets().get(1).getKeyAsString()); + } + assertEquals(0L, result.getBuckets().get(1).getDocCount()); + }, + new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e"))) + .withCheckAggregator(checkTopLevelAggregatorNoRewrites(executionMode)) + ); + } + } + } + + /** + * Asserts that sane output for {@code min_doc_count: 0} when the top level + * query is {@code match_none}. Especially important is that we respect the + * execution hint in this case. It's a lot faster to collect {@code min_doc_count: 0} + * via ordinals. + */ + public void testStringZeroMinDocCountMatchNone() throws IOException { + MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("string", true, true, Collections.emptyMap()); + for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) { + TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name").field("string") + .executionHint(executionMode.toString()) + .size(2) + .minDocCount(0) + .excludeDeletedDocs(true) + .order(BucketOrder.key(true)); + + { + boolean delete = randomBoolean(); + // force single shard/segment + testCase(iw -> { + // force single shard/segment + iw.addDocuments(Arrays.asList(doc(fieldType, "a"), doc(fieldType, "b"), doc(fieldType, "c"), doc(fieldType, "d"))); + if (delete) { + iw.deleteDocuments(new TermQuery(new Term("string", "b"))); + } + }, (InternalTerms result) -> { + assertEquals(2, result.getBuckets().size()); + assertEquals("a", result.getBuckets().get(0).getKeyAsString()); + assertEquals(0L, result.getBuckets().get(0).getDocCount()); + if (delete) { + assertEquals("c", result.getBuckets().get(1).getKeyAsString()); + } else { + assertEquals("b", result.getBuckets().get(1).getKeyAsString()); + } + assertEquals(0L, result.getBuckets().get(1).getDocCount()); + }, + new AggTestConfig(aggregationBuilder, fieldType).withQuery(new MatchNoDocsQuery()) + .withCheckAggregator(checkTopLevelAggregatorNoRewrites(executionMode)) + ); } { @@ -384,7 +457,10 @@ public void testStringShardZeroMinDocCount() throws IOException { assertEquals("b", result.getBuckets().get(1).getKeyAsString()); } assertEquals(0L, result.getBuckets().get(1).getDocCount()); - }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e")))); + }, + new AggTestConfig(aggregationBuilder, fieldType).withQuery(new MatchNoDocsQuery()) + .withCheckAggregator(checkTopLevelAggregatorNoRewrites(executionMode)) + ); } } } @@ -2279,4 +2355,11 @@ protected List objectMappers() { private String randomHint() { return randomFrom(TermsAggregatorFactory.ExecutionMode.values()).toString(); } + + private Consumer checkTopLevelAggregatorNoRewrites(TermsAggregatorFactory.ExecutionMode executionMode) { + return switch (executionMode) { + case MAP -> a -> assertThat(a, instanceOf(MapStringTermsAggregator.class)); + case GLOBAL_ORDINALS -> a -> assertThat(a, instanceOf(GlobalOrdinalsStringTermsAggregator.class)); + }; + } } diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index 03921dfb4359b..e0c4be506969e 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -685,6 +685,7 @@ private A searchAndReduce( } assertEquals(shouldBeCached, context.isCacheable()); List internalAggregations = List.of(a.buildTopLevel()); + aggTestConfig.checkAggregator().accept(a); assertRoundTrip(internalAggregations); internalAggs.add(InternalAggregations.from(internalAggregations)); } finally { @@ -1703,11 +1704,23 @@ public record AggTestConfig( boolean useLogDocMergePolicy, boolean testReductionCancellation, + Consumer checkAggregator, MappedFieldType... fieldTypes ) { public AggTestConfig(AggregationBuilder builder, MappedFieldType... fieldTypes) { - this(new MatchAllDocsQuery(), builder, DEFAULT_MAX_BUCKETS, randomBoolean(), true, randomBoolean(), false, true, fieldTypes); + this( + new MatchAllDocsQuery(), + builder, + DEFAULT_MAX_BUCKETS, + randomBoolean(), + true, + randomBoolean(), + false, + true, + a -> {}, + fieldTypes + ); } public AggTestConfig withQuery(Query query) { @@ -1720,6 +1733,7 @@ public AggTestConfig withQuery(Query query) { incrementalReduce, useLogDocMergePolicy, testReductionCancellation, + checkAggregator, fieldTypes ); } @@ -1734,6 +1748,7 @@ public AggTestConfig withSplitLeavesIntoSeperateAggregators(boolean splitLeavesI incrementalReduce, useLogDocMergePolicy, testReductionCancellation, + checkAggregator, fieldTypes ); } @@ -1748,6 +1763,7 @@ public AggTestConfig withShouldBeCached(boolean shouldBeCached) { incrementalReduce, useLogDocMergePolicy, testReductionCancellation, + checkAggregator, fieldTypes ); } @@ -1762,6 +1778,7 @@ public AggTestConfig withMaxBuckets(int maxBuckets) { incrementalReduce, useLogDocMergePolicy, testReductionCancellation, + checkAggregator, fieldTypes ); } @@ -1776,6 +1793,7 @@ public AggTestConfig withIncrementalReduce(boolean incrementalReduce) { incrementalReduce, useLogDocMergePolicy, testReductionCancellation, + checkAggregator, fieldTypes ); } @@ -1790,6 +1808,7 @@ public AggTestConfig withLogDocMergePolicy() { incrementalReduce, true, testReductionCancellation, + checkAggregator, fieldTypes ); } @@ -1804,6 +1823,22 @@ public AggTestConfig noReductionCancellation() { incrementalReduce, useLogDocMergePolicy, false, + checkAggregator, + fieldTypes + ); + } + + public AggTestConfig withCheckAggregator(Consumer checkAggregator) { + return new AggTestConfig( + query, + builder, + maxBuckets, + splitLeavesIntoSeparateAggregators, + shouldBeCached, + incrementalReduce, + useLogDocMergePolicy, + testReductionCancellation, + checkAggregator, fieldTypes ); } From 607e1f23c71ff4d8bb9822e32a0fba69feeea546 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 13 Mar 2025 13:22:34 -0400 Subject: [PATCH 2/3] Update docs/changelog/124782.yaml --- docs/changelog/124782.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/124782.yaml diff --git a/docs/changelog/124782.yaml b/docs/changelog/124782.yaml new file mode 100644 index 0000000000000..638a5a48e0578 --- /dev/null +++ b/docs/changelog/124782.yaml @@ -0,0 +1,5 @@ +pr: 124782 +summary: "Aggs: Let terms run in global ords mode no match" +area: Aggregations +type: bug +issues: [] From 7482625b8ced7c12cd186e195e1b7124c2bb8193 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 14 Mar 2025 16:52:23 -0400 Subject: [PATCH 3/3] Move --- .../bucket/terms/TermsAggregatorFactory.java | 7 ++-- .../bucket/terms/TermsAggregatorTests.java | 32 ++++++++++++------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index 101b3698e3788..207746ae4571c 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -109,14 +109,15 @@ private static TermsAggregatorSupplier bytesSupplier() { ExecutionMode execution = null; if (executionHint != null) { execution = ExecutionMode.fromString(executionHint); + } else { + if (matchNoDocs(context, parent) && bucketCountThresholds.getMinDocCount() > 0) { + execution = ExecutionMode.MAP; + } } // In some cases, using ordinals is just not supported: override it if (valuesSource.hasOrdinals() == false) { execution = ExecutionMode.MAP; } - if (matchNoDocs(context, parent) && bucketCountThresholds.getMinDocCount() > 0) { - execution = ExecutionMode.MAP; - } if (execution == null) { execution = ExecutionMode.GLOBAL_ORDINALS; } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java index 33429d038002e..7b5b6a65d61ad 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java @@ -141,6 +141,7 @@ import static org.elasticsearch.search.aggregations.PipelineAggregatorBuilders.bucketScript; import static org.elasticsearch.test.MapMatcher.assertMap; import static org.elasticsearch.test.MapMatcher.matchesMap; +import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.either; import static org.hamcrest.Matchers.equalTo; @@ -286,8 +287,18 @@ public void testSimple() throws Exception { } public void testMatchNoDocsQuery() throws Exception { + for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) { + testMatchNoDocsQuery(executionMode); + } + testMatchNoDocsQuery(null); + } + + private void testMatchNoDocsQuery(TermsAggregatorFactory.ExecutionMode hint) throws Exception { MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("string", randomBoolean(), true, Collections.emptyMap()); TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name").field("string"); + if (hint != null) { + aggregationBuilder.executionHint(hint.toString()); + } CheckedConsumer createIndex = iw -> { iw.addDocument(doc(fieldType, "a", "b")); iw.addDocument(doc(fieldType, "", "c", "a")); @@ -298,12 +309,8 @@ public void testMatchNoDocsQuery() throws Exception { createIndex, (InternalTerms result) -> { assertEquals(0, result.getBuckets().size()); }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new MatchNoDocsQuery()) - .withCheckAggregator(checkTopLevelAggregatorNoRewrites(TermsAggregatorFactory.ExecutionMode.MAP)) + .withCheckAggregator(checkTopLevelAggregator(hint == null ? TermsAggregatorFactory.ExecutionMode.MAP : hint)) ); - - debugTestCase(aggregationBuilder, new MatchNoDocsQuery(), createIndex, (result, impl, debug) -> { - assertEquals(impl, MapStringTermsAggregator.class); - }, fieldType); } public void testStringShardMinDocCount() throws IOException { @@ -362,7 +369,7 @@ public void testStringShardZeroMinDocCount() throws IOException { assertEquals(0L, result.getBuckets().get(1).getDocCount()); }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e"))) - .withCheckAggregator(checkTopLevelAggregatorNoRewrites(executionMode)) + .withCheckAggregator(checkTopLevelAggregator(executionMode)) ); } @@ -389,7 +396,7 @@ public void testStringShardZeroMinDocCount() throws IOException { assertEquals(0L, result.getBuckets().get(1).getDocCount()); }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e"))) - .withCheckAggregator(checkTopLevelAggregatorNoRewrites(executionMode)) + .withCheckAggregator(checkTopLevelAggregator(executionMode)) ); } } @@ -432,7 +439,7 @@ public void testStringZeroMinDocCountMatchNone() throws IOException { assertEquals(0L, result.getBuckets().get(1).getDocCount()); }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new MatchNoDocsQuery()) - .withCheckAggregator(checkTopLevelAggregatorNoRewrites(executionMode)) + .withCheckAggregator(checkTopLevelAggregator(executionMode)) ); } @@ -459,7 +466,7 @@ public void testStringZeroMinDocCountMatchNone() throws IOException { assertEquals(0L, result.getBuckets().get(1).getDocCount()); }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new MatchNoDocsQuery()) - .withCheckAggregator(checkTopLevelAggregatorNoRewrites(executionMode)) + .withCheckAggregator(checkTopLevelAggregator(executionMode)) ); } } @@ -2356,10 +2363,13 @@ private String randomHint() { return randomFrom(TermsAggregatorFactory.ExecutionMode.values()).toString(); } - private Consumer checkTopLevelAggregatorNoRewrites(TermsAggregatorFactory.ExecutionMode executionMode) { + private Consumer checkTopLevelAggregator(TermsAggregatorFactory.ExecutionMode executionMode) { return switch (executionMode) { case MAP -> a -> assertThat(a, instanceOf(MapStringTermsAggregator.class)); - case GLOBAL_ORDINALS -> a -> assertThat(a, instanceOf(GlobalOrdinalsStringTermsAggregator.class)); + case GLOBAL_ORDINALS -> a -> assertThat( + a, + anyOf(instanceOf(GlobalOrdinalsStringTermsAggregator.class), instanceOf(StringTermsAggregatorFromFilters.class)) + ); }; } }