diff --git a/docs/changelog/124782.yaml b/docs/changelog/124782.yaml new file mode 100644 index 0000000000000..638a5a48e0578 --- /dev/null +++ b/docs/changelog/124782.yaml @@ -0,0 +1,5 @@ +pr: 124782 +summary: "Aggs: Let terms run in global ords mode no match" +area: Aggregations +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index 5a692b934a41c..207746ae4571c 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -109,9 +109,13 @@ private static TermsAggregatorSupplier bytesSupplier() { ExecutionMode execution = null; if (executionHint != null) { execution = ExecutionMode.fromString(executionHint); + } else { + if (matchNoDocs(context, parent) && bucketCountThresholds.getMinDocCount() > 0) { + execution = ExecutionMode.MAP; + } } // In some cases, using ordinals is just not supported: override it - if (valuesSource.hasOrdinals() == false || matchNoDocs(context, parent)) { + if (valuesSource.hasOrdinals() == false) { execution = ExecutionMode.MAP; } if (execution == null) { diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java index b267cb2e656b6..7b5b6a65d61ad 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java @@ -141,6 +141,7 @@ import static org.elasticsearch.search.aggregations.PipelineAggregatorBuilders.bucketScript; import static org.elasticsearch.test.MapMatcher.assertMap; import static org.elasticsearch.test.MapMatcher.matchesMap; +import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.either; import static org.hamcrest.Matchers.equalTo; @@ -286,8 +287,18 @@ public void testSimple() throws Exception { } public void testMatchNoDocsQuery() throws Exception { + for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) { + testMatchNoDocsQuery(executionMode); + } + testMatchNoDocsQuery(null); + } + + private void testMatchNoDocsQuery(TermsAggregatorFactory.ExecutionMode hint) throws Exception { MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("string", randomBoolean(), true, Collections.emptyMap()); TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name").field("string"); + if (hint != null) { + aggregationBuilder.executionHint(hint.toString()); + } CheckedConsumer createIndex = iw -> { iw.addDocument(doc(fieldType, "a", "b")); iw.addDocument(doc(fieldType, "", "c", "a")); @@ -298,11 +309,8 @@ public void testMatchNoDocsQuery() throws Exception { createIndex, (InternalTerms result) -> { assertEquals(0, result.getBuckets().size()); }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new MatchNoDocsQuery()) + .withCheckAggregator(checkTopLevelAggregator(hint == null ? TermsAggregatorFactory.ExecutionMode.MAP : hint)) ); - - debugTestCase(aggregationBuilder, new MatchNoDocsQuery(), createIndex, (result, impl, debug) -> { - assertEquals(impl, MapStringTermsAggregator.class); - }, fieldType); } public void testStringShardMinDocCount() throws IOException { @@ -337,7 +345,6 @@ public void testStringShardZeroMinDocCount() throws IOException { .executionHint(executionMode.toString()) .size(2) .minDocCount(0) - .executionHint("map") .excludeDeletedDocs(true) .order(BucketOrder.key(true)); @@ -360,7 +367,80 @@ public void testStringShardZeroMinDocCount() throws IOException { assertEquals("b", result.getBuckets().get(1).getKeyAsString()); } assertEquals(0L, result.getBuckets().get(1).getDocCount()); - }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e")))); + }, + new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e"))) + .withCheckAggregator(checkTopLevelAggregator(executionMode)) + ); + } + + { + boolean delete = randomBoolean(); + // force single shard/segment + testCase(iw -> { + // force single shard/segment + iw.addDocuments( + Arrays.asList(doc(fieldType, "a"), doc(fieldType, "c", "d"), doc(fieldType, "b", "d"), doc(fieldType, "b")) + ); + if (delete) { + iw.deleteDocuments(new TermQuery(new Term("string", "b"))); + } + }, (InternalTerms result) -> { + assertEquals(2, result.getBuckets().size()); + assertEquals("a", result.getBuckets().get(0).getKeyAsString()); + assertEquals(0L, result.getBuckets().get(0).getDocCount()); + if (delete) { + assertEquals("c", result.getBuckets().get(1).getKeyAsString()); + } else { + assertEquals("b", result.getBuckets().get(1).getKeyAsString()); + } + assertEquals(0L, result.getBuckets().get(1).getDocCount()); + }, + new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e"))) + .withCheckAggregator(checkTopLevelAggregator(executionMode)) + ); + } + } + } + + /** + * Asserts that sane output for {@code min_doc_count: 0} when the top level + * query is {@code match_none}. Especially important is that we respect the + * execution hint in this case. It's a lot faster to collect {@code min_doc_count: 0} + * via ordinals. + */ + public void testStringZeroMinDocCountMatchNone() throws IOException { + MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("string", true, true, Collections.emptyMap()); + for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) { + TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name").field("string") + .executionHint(executionMode.toString()) + .size(2) + .minDocCount(0) + .excludeDeletedDocs(true) + .order(BucketOrder.key(true)); + + { + boolean delete = randomBoolean(); + // force single shard/segment + testCase(iw -> { + // force single shard/segment + iw.addDocuments(Arrays.asList(doc(fieldType, "a"), doc(fieldType, "b"), doc(fieldType, "c"), doc(fieldType, "d"))); + if (delete) { + iw.deleteDocuments(new TermQuery(new Term("string", "b"))); + } + }, (InternalTerms result) -> { + assertEquals(2, result.getBuckets().size()); + assertEquals("a", result.getBuckets().get(0).getKeyAsString()); + assertEquals(0L, result.getBuckets().get(0).getDocCount()); + if (delete) { + assertEquals("c", result.getBuckets().get(1).getKeyAsString()); + } else { + assertEquals("b", result.getBuckets().get(1).getKeyAsString()); + } + assertEquals(0L, result.getBuckets().get(1).getDocCount()); + }, + new AggTestConfig(aggregationBuilder, fieldType).withQuery(new MatchNoDocsQuery()) + .withCheckAggregator(checkTopLevelAggregator(executionMode)) + ); } { @@ -384,7 +464,10 @@ public void testStringShardZeroMinDocCount() throws IOException { assertEquals("b", result.getBuckets().get(1).getKeyAsString()); } assertEquals(0L, result.getBuckets().get(1).getDocCount()); - }, new AggTestConfig(aggregationBuilder, fieldType).withQuery(new TermQuery(new Term("string", "e")))); + }, + new AggTestConfig(aggregationBuilder, fieldType).withQuery(new MatchNoDocsQuery()) + .withCheckAggregator(checkTopLevelAggregator(executionMode)) + ); } } } @@ -2279,4 +2362,14 @@ protected List objectMappers() { private String randomHint() { return randomFrom(TermsAggregatorFactory.ExecutionMode.values()).toString(); } + + private Consumer checkTopLevelAggregator(TermsAggregatorFactory.ExecutionMode executionMode) { + return switch (executionMode) { + case MAP -> a -> assertThat(a, instanceOf(MapStringTermsAggregator.class)); + case GLOBAL_ORDINALS -> a -> assertThat( + a, + anyOf(instanceOf(GlobalOrdinalsStringTermsAggregator.class), instanceOf(StringTermsAggregatorFromFilters.class)) + ); + }; + } } diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index 03921dfb4359b..e0c4be506969e 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -685,6 +685,7 @@ private A searchAndReduce( } assertEquals(shouldBeCached, context.isCacheable()); List internalAggregations = List.of(a.buildTopLevel()); + aggTestConfig.checkAggregator().accept(a); assertRoundTrip(internalAggregations); internalAggs.add(InternalAggregations.from(internalAggregations)); } finally { @@ -1703,11 +1704,23 @@ public record AggTestConfig( boolean useLogDocMergePolicy, boolean testReductionCancellation, + Consumer checkAggregator, MappedFieldType... fieldTypes ) { public AggTestConfig(AggregationBuilder builder, MappedFieldType... fieldTypes) { - this(new MatchAllDocsQuery(), builder, DEFAULT_MAX_BUCKETS, randomBoolean(), true, randomBoolean(), false, true, fieldTypes); + this( + new MatchAllDocsQuery(), + builder, + DEFAULT_MAX_BUCKETS, + randomBoolean(), + true, + randomBoolean(), + false, + true, + a -> {}, + fieldTypes + ); } public AggTestConfig withQuery(Query query) { @@ -1720,6 +1733,7 @@ public AggTestConfig withQuery(Query query) { incrementalReduce, useLogDocMergePolicy, testReductionCancellation, + checkAggregator, fieldTypes ); } @@ -1734,6 +1748,7 @@ public AggTestConfig withSplitLeavesIntoSeperateAggregators(boolean splitLeavesI incrementalReduce, useLogDocMergePolicy, testReductionCancellation, + checkAggregator, fieldTypes ); } @@ -1748,6 +1763,7 @@ public AggTestConfig withShouldBeCached(boolean shouldBeCached) { incrementalReduce, useLogDocMergePolicy, testReductionCancellation, + checkAggregator, fieldTypes ); } @@ -1762,6 +1778,7 @@ public AggTestConfig withMaxBuckets(int maxBuckets) { incrementalReduce, useLogDocMergePolicy, testReductionCancellation, + checkAggregator, fieldTypes ); } @@ -1776,6 +1793,7 @@ public AggTestConfig withIncrementalReduce(boolean incrementalReduce) { incrementalReduce, useLogDocMergePolicy, testReductionCancellation, + checkAggregator, fieldTypes ); } @@ -1790,6 +1808,7 @@ public AggTestConfig withLogDocMergePolicy() { incrementalReduce, true, testReductionCancellation, + checkAggregator, fieldTypes ); } @@ -1804,6 +1823,22 @@ public AggTestConfig noReductionCancellation() { incrementalReduce, useLogDocMergePolicy, false, + checkAggregator, + fieldTypes + ); + } + + public AggTestConfig withCheckAggregator(Consumer checkAggregator) { + return new AggTestConfig( + query, + builder, + maxBuckets, + splitLeavesIntoSeparateAggregators, + shouldBeCached, + incrementalReduce, + useLogDocMergePolicy, + testReductionCancellation, + checkAggregator, fieldTypes ); }