Skip to content

Commit ad39c3f

Browse files
authored
Add background filters of significant terms aggregations to can match query. (#106564) (#106797)
* Add background filters of significant terms aggregations to can match query. * Fix NPE * Unit tests * Update docs/changelog/106564.yaml * Update 106564.yaml * Make aggregation queries in can match phase more generic. * Copy source to preserve other relevant fields. * Replace copy constructor by shallowCopy
1 parent 79bfb88 commit ad39c3f

File tree

5 files changed

+167
-8
lines changed

5 files changed

+167
-8
lines changed

docs/changelog/106564.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 106564
2+
summary: Fix the background set of significant terms aggregations in case the data is in different shards than the foreground set
3+
area: Search
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/action/search/CanMatchNodeRequest.java

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,12 @@
1818
import org.elasticsearch.common.io.stream.Writeable;
1919
import org.elasticsearch.core.Nullable;
2020
import org.elasticsearch.core.TimeValue;
21+
import org.elasticsearch.index.query.QueryBuilder;
2122
import org.elasticsearch.index.shard.ShardId;
2223
import org.elasticsearch.search.Scroll;
24+
import org.elasticsearch.search.aggregations.AggregationBuilder;
2325
import org.elasticsearch.search.builder.SearchSourceBuilder;
26+
import org.elasticsearch.search.builder.SubSearchSourceBuilder;
2427
import org.elasticsearch.search.internal.AliasFilter;
2528
import org.elasticsearch.search.internal.ShardSearchContextId;
2629
import org.elasticsearch.search.internal.ShardSearchRequest;
@@ -31,6 +34,7 @@
3134
import java.io.IOException;
3235
import java.util.ArrayList;
3336
import java.util.Arrays;
37+
import java.util.Collection;
3438
import java.util.List;
3539
import java.util.Map;
3640

@@ -129,7 +133,7 @@ public CanMatchNodeRequest(
129133
long nowInMillis,
130134
@Nullable String clusterAlias
131135
) {
132-
this.source = searchRequest.source();
136+
this.source = getCanMatchSource(searchRequest);
133137
this.indicesOptions = indicesOptions;
134138
this.shards = new ArrayList<>(shards);
135139
this.searchType = searchRequest.searchType();
@@ -146,6 +150,36 @@ public CanMatchNodeRequest(
146150
indices = shards.stream().map(Shard::getOriginalIndices).flatMap(Arrays::stream).distinct().toArray(String[]::new);
147151
}
148152

153+
private static void collectAggregationQueries(Collection<AggregationBuilder> aggregations, List<QueryBuilder> aggregationQueries) {
154+
for (AggregationBuilder aggregation : aggregations) {
155+
QueryBuilder aggregationQuery = aggregation.getQuery();
156+
if (aggregationQuery != null) {
157+
aggregationQueries.add(aggregationQuery);
158+
}
159+
collectAggregationQueries(aggregation.getSubAggregations(), aggregationQueries);
160+
}
161+
}
162+
163+
private SearchSourceBuilder getCanMatchSource(SearchRequest searchRequest) {
164+
// Aggregations may use a different query than the top-level search query. An example is
165+
// the significant terms aggregation, which also collects data over a background that
166+
// typically much larger than the search query. To accommodate for this, we take the union
167+
// of all queries to determine whether a request can match.
168+
List<QueryBuilder> aggregationQueries = new ArrayList<>();
169+
if (searchRequest.source() != null && searchRequest.source().aggregations() != null) {
170+
collectAggregationQueries(searchRequest.source().aggregations().getAggregatorFactories(), aggregationQueries);
171+
}
172+
if (aggregationQueries.isEmpty()) {
173+
return searchRequest.source();
174+
} else {
175+
List<SubSearchSourceBuilder> subSearches = new ArrayList<>(searchRequest.source().subSearches());
176+
for (QueryBuilder aggregationQuery : aggregationQueries) {
177+
subSearches.add(new SubSearchSourceBuilder(aggregationQuery));
178+
}
179+
return searchRequest.source().shallowCopy().subSearches(subSearches);
180+
}
181+
}
182+
149183
public CanMatchNodeRequest(StreamInput in) throws IOException {
150184
super(in);
151185
source = in.readOptionalWriteable(SearchSourceBuilder::new);

server/src/main/java/org/elasticsearch/search/aggregations/AggregationBuilder.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import org.elasticsearch.common.Strings;
1111
import org.elasticsearch.common.io.stream.VersionedNamedWriteable;
12+
import org.elasticsearch.index.query.QueryBuilder;
1213
import org.elasticsearch.index.query.QueryRewriteContext;
1314
import org.elasticsearch.index.query.Rewriteable;
1415
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
@@ -97,6 +98,11 @@ public Collection<AggregationBuilder> getSubAggregations() {
9798
return factoriesBuilder.getAggregatorFactories();
9899
}
99100

101+
/** Return the aggregation's query if it's different from the search query, or null otherwise. */
102+
public QueryBuilder getQuery() {
103+
return null;
104+
}
105+
100106
/** Return the configured set of pipeline aggregations **/
101107
public Collection<PipelineAggregationBuilder> getPipelineAggregations() {
102108
return factoriesBuilder.getPipelineAggregatorFactories();

server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTermsAggregationBuilder.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.elasticsearch.common.io.stream.StreamInput;
1313
import org.elasticsearch.common.io.stream.StreamOutput;
1414
import org.elasticsearch.index.query.QueryBuilder;
15+
import org.elasticsearch.index.query.QueryBuilders;
1516
import org.elasticsearch.index.query.QueryRewriteContext;
1617
import org.elasticsearch.search.aggregations.AggregationBuilder;
1718
import org.elasticsearch.search.aggregations.AggregatorFactories;
@@ -257,6 +258,11 @@ public SignificantTermsAggregationBuilder backgroundFilter(QueryBuilder backgrou
257258
return this;
258259
}
259260

261+
@Override
262+
public QueryBuilder getQuery() {
263+
return backgroundFilter != null ? backgroundFilter : QueryBuilders.matchAllQuery();
264+
}
265+
260266
/**
261267
* Set terms to include and exclude from the aggregation results
262268
*/

server/src/test/java/org/elasticsearch/action/search/CanMatchPreFilterSearchPhaseTests.java

Lines changed: 115 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,21 +30,24 @@
3030
import org.elasticsearch.index.IndexSettings;
3131
import org.elasticsearch.index.IndexVersion;
3232
import org.elasticsearch.index.mapper.DateFieldMapper;
33-
import org.elasticsearch.index.query.AbstractQueryBuilder;
3433
import org.elasticsearch.index.query.BoolQueryBuilder;
3534
import org.elasticsearch.index.query.CoordinatorRewriteContextProvider;
35+
import org.elasticsearch.index.query.QueryBuilder;
3636
import org.elasticsearch.index.query.RangeQueryBuilder;
3737
import org.elasticsearch.index.query.TermQueryBuilder;
3838
import org.elasticsearch.index.shard.IndexLongFieldRange;
3939
import org.elasticsearch.index.shard.ShardId;
4040
import org.elasticsearch.index.shard.ShardLongFieldRange;
4141
import org.elasticsearch.search.CanMatchShardResponse;
42+
import org.elasticsearch.search.aggregations.AggregationBuilder;
43+
import org.elasticsearch.search.aggregations.bucket.terms.SignificantTermsAggregationBuilder;
4244
import org.elasticsearch.search.builder.SearchSourceBuilder;
4345
import org.elasticsearch.search.internal.AliasFilter;
4446
import org.elasticsearch.search.internal.ShardSearchRequest;
4547
import org.elasticsearch.search.sort.MinAndMax;
4648
import org.elasticsearch.search.sort.SortBuilders;
4749
import org.elasticsearch.search.sort.SortOrder;
50+
import org.elasticsearch.search.suggest.SuggestBuilder;
4851
import org.elasticsearch.test.ESTestCase;
4952
import org.elasticsearch.threadpool.TestThreadPool;
5053
import org.elasticsearch.threadpool.ThreadPool;
@@ -497,14 +500,14 @@ public void testCanMatchFilteringOnCoordinatorThatCanBeSkipped() throws Exceptio
497500
regularIndices,
498501
contextProviderBuilder.build(),
499502
queryBuilder,
503+
List.of(),
504+
null,
500505
(updatedSearchShardIterators, requests) -> {
501506
List<SearchShardIterator> skippedShards = updatedSearchShardIterators.stream().filter(SearchShardIterator::skip).toList();
502-
;
503507

504508
List<SearchShardIterator> nonSkippedShards = updatedSearchShardIterators.stream()
505509
.filter(searchShardIterator -> searchShardIterator.skip() == false)
506510
.toList();
507-
;
508511

509512
int regularIndexShardCount = (int) updatedSearchShardIterators.stream()
510513
.filter(s -> regularIndices.contains(s.shardId().getIndex()))
@@ -568,6 +571,8 @@ public void testCanMatchFilteringOnCoordinatorParsingFails() throws Exception {
568571
regularIndices,
569572
contextProviderBuilder.build(),
570573
queryBuilder,
574+
List.of(),
575+
null,
571576
this::assertAllShardsAreQueried
572577
);
573578
}
@@ -624,6 +629,99 @@ public void testCanMatchFilteringOnCoordinatorThatCanNotBeSkipped() throws Excep
624629
regularIndices,
625630
contextProviderBuilder.build(),
626631
queryBuilder,
632+
List.of(),
633+
null,
634+
this::assertAllShardsAreQueried
635+
);
636+
}
637+
638+
public void testCanMatchFilteringOnCoordinator_withSignificantTermsAggregation_withDefaultBackgroundFilter() throws Exception {
639+
Index index1 = new Index("index1", UUIDs.base64UUID());
640+
Index index2 = new Index("index2", UUIDs.base64UUID());
641+
Index index3 = new Index("index3", UUIDs.base64UUID());
642+
643+
StaticCoordinatorRewriteContextProviderBuilder contextProviderBuilder = new StaticCoordinatorRewriteContextProviderBuilder();
644+
contextProviderBuilder.addIndexMinMaxTimestamps(index1, DataStream.TIMESTAMP_FIELD_NAME, 0, 999);
645+
contextProviderBuilder.addIndexMinMaxTimestamps(index2, DataStream.TIMESTAMP_FIELD_NAME, 1000, 1999);
646+
contextProviderBuilder.addIndexMinMaxTimestamps(index3, DataStream.TIMESTAMP_FIELD_NAME, 2000, 2999);
647+
648+
QueryBuilder query = new BoolQueryBuilder().filter(new RangeQueryBuilder(DataStream.TIMESTAMP_FIELD_NAME).from(2100).to(2200));
649+
AggregationBuilder aggregation = new SignificantTermsAggregationBuilder("significant_terms");
650+
651+
assignShardsAndExecuteCanMatchPhase(
652+
List.of(),
653+
List.of(index1, index2, index3),
654+
contextProviderBuilder.build(),
655+
query,
656+
List.of(aggregation),
657+
null,
658+
// The default background filter matches the whole index, so all shards must be queried.
659+
this::assertAllShardsAreQueried
660+
);
661+
}
662+
663+
public void testCanMatchFilteringOnCoordinator_withSignificantTermsAggregation_withBackgroundFilter() throws Exception {
664+
Index index1 = new Index("index1", UUIDs.base64UUID());
665+
Index index2 = new Index("index2", UUIDs.base64UUID());
666+
Index index3 = new Index("index3", UUIDs.base64UUID());
667+
Index index4 = new Index("index4", UUIDs.base64UUID());
668+
669+
StaticCoordinatorRewriteContextProviderBuilder contextProviderBuilder = new StaticCoordinatorRewriteContextProviderBuilder();
670+
contextProviderBuilder.addIndexMinMaxTimestamps(index1, DataStream.TIMESTAMP_FIELD_NAME, 0, 999);
671+
contextProviderBuilder.addIndexMinMaxTimestamps(index2, DataStream.TIMESTAMP_FIELD_NAME, 1000, 1999);
672+
contextProviderBuilder.addIndexMinMaxTimestamps(index3, DataStream.TIMESTAMP_FIELD_NAME, 2000, 2999);
673+
contextProviderBuilder.addIndexMinMaxTimestamps(index4, DataStream.TIMESTAMP_FIELD_NAME, 3000, 3999);
674+
675+
QueryBuilder query = new BoolQueryBuilder().filter(new RangeQueryBuilder(DataStream.TIMESTAMP_FIELD_NAME).from(3100).to(3200));
676+
AggregationBuilder aggregation = new SignificantTermsAggregationBuilder("significant_terms").backgroundFilter(
677+
new RangeQueryBuilder(DataStream.TIMESTAMP_FIELD_NAME).from(0).to(1999)
678+
);
679+
680+
assignShardsAndExecuteCanMatchPhase(
681+
List.of(),
682+
List.of(index1, index2, index3),
683+
contextProviderBuilder.build(),
684+
query,
685+
List.of(aggregation),
686+
null,
687+
(updatedSearchShardIterators, requests) -> {
688+
// The search query matches index4, the background query matches index1 and index2,
689+
// so index3 is the only one that must be skipped.
690+
for (SearchShardIterator shard : updatedSearchShardIterators) {
691+
if (shard.shardId().getIndex().getName().equals("index3")) {
692+
assertTrue(shard.skip());
693+
} else {
694+
assertFalse(shard.skip());
695+
}
696+
}
697+
}
698+
);
699+
}
700+
701+
public void testCanMatchFilteringOnCoordinator_withSignificantTermsAggregation_withSuggest() throws Exception {
702+
Index index1 = new Index("index1", UUIDs.base64UUID());
703+
Index index2 = new Index("index2", UUIDs.base64UUID());
704+
Index index3 = new Index("index3", UUIDs.base64UUID());
705+
706+
StaticCoordinatorRewriteContextProviderBuilder contextProviderBuilder = new StaticCoordinatorRewriteContextProviderBuilder();
707+
contextProviderBuilder.addIndexMinMaxTimestamps(index1, DataStream.TIMESTAMP_FIELD_NAME, 0, 999);
708+
contextProviderBuilder.addIndexMinMaxTimestamps(index2, DataStream.TIMESTAMP_FIELD_NAME, 1000, 1999);
709+
contextProviderBuilder.addIndexMinMaxTimestamps(index3, DataStream.TIMESTAMP_FIELD_NAME, 2000, 2999);
710+
711+
QueryBuilder query = new BoolQueryBuilder().filter(new RangeQueryBuilder(DataStream.TIMESTAMP_FIELD_NAME).from(2100).to(2200));
712+
AggregationBuilder aggregation = new SignificantTermsAggregationBuilder("significant_terms").backgroundFilter(
713+
new RangeQueryBuilder(DataStream.TIMESTAMP_FIELD_NAME).from(2000).to(2300)
714+
);
715+
SuggestBuilder suggest = new SuggestBuilder().setGlobalText("test");
716+
717+
assignShardsAndExecuteCanMatchPhase(
718+
List.of(),
719+
List.of(index1, index2, index3),
720+
contextProviderBuilder.build(),
721+
query,
722+
List.of(aggregation),
723+
suggest,
724+
// The query and aggregation and match only index3, but suggest should match everything.
627725
this::assertAllShardsAreQueried
628726
);
629727
}
@@ -669,6 +767,8 @@ public void testCanMatchFilteringOnCoordinatorThatCanBeSkippedTsdb() throws Exce
669767
List.of(),
670768
contextProviderBuilder.build(),
671769
queryBuilder,
770+
List.of(),
771+
null,
672772
(updatedSearchShardIterators, requests) -> {
673773
var skippedShards = updatedSearchShardIterators.stream().filter(SearchShardIterator::skip).toList();
674774
var nonSkippedShards = updatedSearchShardIterators.stream()
@@ -713,11 +813,13 @@ private void assertAllShardsAreQueried(List<SearchShardIterator> updatedSearchSh
713813
assertThat(requests.size(), equalTo(shardsWithPrimariesAssigned));
714814
}
715815

716-
private <QB extends AbstractQueryBuilder<QB>> void assignShardsAndExecuteCanMatchPhase(
816+
private void assignShardsAndExecuteCanMatchPhase(
717817
List<DataStream> dataStreams,
718818
List<Index> regularIndices,
719819
CoordinatorRewriteContextProvider contextProvider,
720-
AbstractQueryBuilder<QB> query,
820+
QueryBuilder query,
821+
List<AggregationBuilder> aggregations,
822+
SuggestBuilder suggest,
721823
BiConsumer<List<SearchShardIterator>, List<ShardSearchRequest>> canMatchResultsConsumer
722824
) throws Exception {
723825
Map<String, Transport.Connection> lookup = new ConcurrentHashMap<>();
@@ -764,14 +866,20 @@ private <QB extends AbstractQueryBuilder<QB>> void assignShardsAndExecuteCanMatc
764866
searchRequest.allowPartialSearchResults(true);
765867

766868
final AliasFilter aliasFilter;
767-
if (randomBoolean()) {
869+
if (aggregations.isEmpty() == false || randomBoolean()) {
768870
// Apply the query on the request body
769871
SearchSourceBuilder searchSourceBuilder = SearchSourceBuilder.searchSource();
770872
searchSourceBuilder.query(query);
873+
for (AggregationBuilder aggregation : aggregations) {
874+
searchSourceBuilder.aggregation(aggregation);
875+
}
876+
if (suggest != null) {
877+
searchSourceBuilder.suggest(suggest);
878+
}
771879
searchRequest.source(searchSourceBuilder);
772880

773881
// Sometimes apply the same query in the alias filter too
774-
aliasFilter = AliasFilter.of(randomBoolean() ? query : null, Strings.EMPTY_ARRAY);
882+
aliasFilter = AliasFilter.of(aggregations.isEmpty() && randomBoolean() ? query : null, Strings.EMPTY_ARRAY);
775883
} else {
776884
// Apply the query as an alias filter
777885
aliasFilter = AliasFilter.of(query, Strings.EMPTY_ARRAY);

0 commit comments

Comments
 (0)