Move scroll + dfs reduce code from SearchPhaseController to actual users (#119726)

original-brownbear · web-flow · commit d7474e646637 · 2025-01-15T18:57:33.000+01:00
No need to have this logic live in `SearchPhaseController` when it only has a single callsite
elsewhere.
diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchDfsQueryThenFetchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/SearchDfsQueryThenFetchAsyncAction.java
@@ -10,19 +10,30 @@
 package org.elasticsearch.action.search;
 
 import org.apache.logging.log4j.Logger;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.CollectionStatistics;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermStatistics;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TotalHits;
+import org.apache.lucene.util.SetOnce;
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.client.internal.Client;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.routing.GroupShardsIterator;
 import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
 import org.elasticsearch.search.SearchPhaseResult;
 import org.elasticsearch.search.SearchShardTarget;
+import org.elasticsearch.search.builder.SearchSourceBuilder;
 import org.elasticsearch.search.dfs.AggregatedDfs;
 import org.elasticsearch.search.dfs.DfsKnnResults;
 import org.elasticsearch.search.dfs.DfsSearchResult;
 import org.elasticsearch.search.internal.AliasFilter;
 import org.elasticsearch.transport.Transport;
 
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.Executor;
@@ -93,12 +104,11 @@ protected void executePhaseOnShard(
     @Override
     protected SearchPhase getNextPhase() {
         final List<DfsSearchResult> dfsSearchResults = results.getAtomicArray().asList();
-        final AggregatedDfs aggregatedDfs = SearchPhaseController.aggregateDfs(dfsSearchResults);
-        final List<DfsKnnResults> mergedKnnResults = SearchPhaseController.mergeKnnResults(getRequest(), dfsSearchResults);
+        final AggregatedDfs aggregatedDfs = aggregateDfs(dfsSearchResults);
         return new DfsQueryPhase(
             dfsSearchResults,
             aggregatedDfs,
-            mergedKnnResults,
+            mergeKnnResults(getRequest(), dfsSearchResults),
             queryPhaseResultConsumer,
             (queryResults) -> SearchQueryThenFetchAsyncAction.nextPhase(client, this, queryResults, aggregatedDfs),
             this
@@ -109,4 +119,95 @@ protected SearchPhase getNextPhase() {
     protected void onShardGroupFailure(int shardIndex, SearchShardTarget shardTarget, Exception exc) {
         progressListener.notifyQueryFailure(shardIndex, shardTarget, exc);
     }
+
+    private static List<DfsKnnResults> mergeKnnResults(SearchRequest request, List<DfsSearchResult> dfsSearchResults) {
+        if (request.hasKnnSearch() == false) {
+            return null;
+        }
+        SearchSourceBuilder source = request.source();
+        List<List<TopDocs>> topDocsLists = new ArrayList<>(source.knnSearch().size());
+        List<SetOnce<String>> nestedPath = new ArrayList<>(source.knnSearch().size());
+        for (int i = 0; i < source.knnSearch().size(); i++) {
+            topDocsLists.add(new ArrayList<>());
+            nestedPath.add(new SetOnce<>());
+        }
+
+        for (DfsSearchResult dfsSearchResult : dfsSearchResults) {
+            if (dfsSearchResult.knnResults() != null) {
+                for (int i = 0; i < dfsSearchResult.knnResults().size(); i++) {
+                    DfsKnnResults knnResults = dfsSearchResult.knnResults().get(i);
+                    ScoreDoc[] scoreDocs = knnResults.scoreDocs();
+                    TotalHits totalHits = new TotalHits(scoreDocs.length, TotalHits.Relation.EQUAL_TO);
+                    TopDocs shardTopDocs = new TopDocs(totalHits, scoreDocs);
+                    SearchPhaseController.setShardIndex(shardTopDocs, dfsSearchResult.getShardIndex());
+                    topDocsLists.get(i).add(shardTopDocs);
+                    nestedPath.get(i).trySet(knnResults.getNestedPath());
+                }
+            }
+        }
+
+        List<DfsKnnResults> mergedResults = new ArrayList<>(source.knnSearch().size());
+        for (int i = 0; i < source.knnSearch().size(); i++) {
+            TopDocs mergedTopDocs = TopDocs.merge(source.knnSearch().get(i).k(), topDocsLists.get(i).toArray(new TopDocs[0]));
+            mergedResults.add(new DfsKnnResults(nestedPath.get(i).get(), mergedTopDocs.scoreDocs));
+        }
+        return mergedResults;
+    }
+
+    private static AggregatedDfs aggregateDfs(Collection<DfsSearchResult> results) {
+        Map<Term, TermStatistics> termStatistics = new HashMap<>();
+        Map<String, CollectionStatistics> fieldStatistics = new HashMap<>();
+        long aggMaxDoc = 0;
+        for (DfsSearchResult lEntry : results) {
+            final Term[] terms = lEntry.terms();
+            final TermStatistics[] stats = lEntry.termStatistics();
+            assert terms.length == stats.length;
+            for (int i = 0; i < terms.length; i++) {
+                assert terms[i] != null;
+                if (stats[i] == null) {
+                    continue;
+                }
+                TermStatistics existing = termStatistics.get(terms[i]);
+                if (existing != null) {
+                    assert terms[i].bytes().equals(existing.term());
+                    termStatistics.put(
+                        terms[i],
+                        new TermStatistics(
+                            existing.term(),
+                            existing.docFreq() + stats[i].docFreq(),
+                            existing.totalTermFreq() + stats[i].totalTermFreq()
+                        )
+                    );
+                } else {
+                    termStatistics.put(terms[i], stats[i]);
+                }
+
+            }
+
+            assert lEntry.fieldStatistics().containsKey(null) == false;
+            for (var entry : lEntry.fieldStatistics().entrySet()) {
+                String key = entry.getKey();
+                CollectionStatistics value = entry.getValue();
+                if (value == null) {
+                    continue;
+                }
+                assert key != null;
+                CollectionStatistics existing = fieldStatistics.get(key);
+                if (existing != null) {
+                    CollectionStatistics merged = new CollectionStatistics(
+                        key,
+                        existing.maxDoc() + value.maxDoc(),
+                        existing.docCount() + value.docCount(),
+                        existing.sumTotalTermFreq() + value.sumTotalTermFreq(),
+                        existing.sumDocFreq() + value.sumDocFreq()
+                    );
+                    fieldStatistics.put(key, merged);
+                } else {
+                    fieldStatistics.put(key, value);
+                }
+            }
+            aggMaxDoc += lEntry.maxDoc();
+        }
+        return new AggregatedDfs(termStatistics, fieldStatistics, aggMaxDoc);
+    }
 }
diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java b/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java
@@ -9,20 +9,16 @@
 
 package org.elasticsearch.action.search;
 
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.CollectionStatistics;
 import org.apache.lucene.search.FieldDoc;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.SortedNumericSortField;
 import org.apache.lucene.search.SortedSetSortField;
-import org.apache.lucene.search.TermStatistics;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.TopFieldDocs;
 import org.apache.lucene.search.TotalHits;
 import org.apache.lucene.search.TotalHits.Relation;
-import org.apache.lucene.util.SetOnce;
 import org.elasticsearch.common.breaker.CircuitBreaker;
 import org.elasticsearch.common.io.stream.DelayableWriteable;
 import org.elasticsearch.common.lucene.Lucene;
@@ -42,9 +38,6 @@
 import org.elasticsearch.search.aggregations.AggregatorFactories;
 import org.elasticsearch.search.aggregations.InternalAggregations;
 import org.elasticsearch.search.builder.SearchSourceBuilder;
-import org.elasticsearch.search.dfs.AggregatedDfs;
-import org.elasticsearch.search.dfs.DfsKnnResults;
-import org.elasticsearch.search.dfs.DfsSearchResult;
 import org.elasticsearch.search.fetch.FetchSearchResult;
 import org.elasticsearch.search.internal.SearchContext;
 import org.elasticsearch.search.profile.SearchProfileQueryPhaseResult;
@@ -84,97 +77,6 @@ public SearchPhaseController(
         this.requestToAggReduceContextBuilder = requestToAggReduceContextBuilder;
     }
 
-    public static AggregatedDfs aggregateDfs(Collection<DfsSearchResult> results) {
-        Map<Term, TermStatistics> termStatistics = new HashMap<>();
-        Map<String, CollectionStatistics> fieldStatistics = new HashMap<>();
-        long aggMaxDoc = 0;
-        for (DfsSearchResult lEntry : results) {
-            final Term[] terms = lEntry.terms();
-            final TermStatistics[] stats = lEntry.termStatistics();
-            assert terms.length == stats.length;
-            for (int i = 0; i < terms.length; i++) {
-                assert terms[i] != null;
-                if (stats[i] == null) {
-                    continue;
-                }
-                TermStatistics existing = termStatistics.get(terms[i]);
-                if (existing != null) {
-                    assert terms[i].bytes().equals(existing.term());
-                    termStatistics.put(
-                        terms[i],
-                        new TermStatistics(
-                            existing.term(),
-                            existing.docFreq() + stats[i].docFreq(),
-                            existing.totalTermFreq() + stats[i].totalTermFreq()
-                        )
-                    );
-                } else {
-                    termStatistics.put(terms[i], stats[i]);
-                }
-
-            }
-
-            assert lEntry.fieldStatistics().containsKey(null) == false;
-            for (var entry : lEntry.fieldStatistics().entrySet()) {
-                String key = entry.getKey();
-                CollectionStatistics value = entry.getValue();
-                if (value == null) {
-                    continue;
-                }
-                assert key != null;
-                CollectionStatistics existing = fieldStatistics.get(key);
-                if (existing != null) {
-                    CollectionStatistics merged = new CollectionStatistics(
-                        key,
-                        existing.maxDoc() + value.maxDoc(),
-                        existing.docCount() + value.docCount(),
-                        existing.sumTotalTermFreq() + value.sumTotalTermFreq(),
-                        existing.sumDocFreq() + value.sumDocFreq()
-                    );
-                    fieldStatistics.put(key, merged);
-                } else {
-                    fieldStatistics.put(key, value);
-                }
-            }
-            aggMaxDoc += lEntry.maxDoc();
-        }
-        return new AggregatedDfs(termStatistics, fieldStatistics, aggMaxDoc);
-    }
-
-    public static List<DfsKnnResults> mergeKnnResults(SearchRequest request, List<DfsSearchResult> dfsSearchResults) {
-        if (request.hasKnnSearch() == false) {
-            return null;
-        }
-        SearchSourceBuilder source = request.source();
-        List<List<TopDocs>> topDocsLists = new ArrayList<>(source.knnSearch().size());
-        List<SetOnce<String>> nestedPath = new ArrayList<>(source.knnSearch().size());
-        for (int i = 0; i < source.knnSearch().size(); i++) {
-            topDocsLists.add(new ArrayList<>());
-            nestedPath.add(new SetOnce<>());
-        }
-
-        for (DfsSearchResult dfsSearchResult : dfsSearchResults) {
-            if (dfsSearchResult.knnResults() != null) {
-                for (int i = 0; i < dfsSearchResult.knnResults().size(); i++) {
-                    DfsKnnResults knnResults = dfsSearchResult.knnResults().get(i);
-                    ScoreDoc[] scoreDocs = knnResults.scoreDocs();
-                    TotalHits totalHits = new TotalHits(scoreDocs.length, Relation.EQUAL_TO);
-                    TopDocs shardTopDocs = new TopDocs(totalHits, scoreDocs);
-                    setShardIndex(shardTopDocs, dfsSearchResult.getShardIndex());
-                    topDocsLists.get(i).add(shardTopDocs);
-                    nestedPath.get(i).trySet(knnResults.getNestedPath());
-                }
-            }
-        }
-
-        List<DfsKnnResults> mergedResults = new ArrayList<>(source.knnSearch().size());
-        for (int i = 0; i < source.knnSearch().size(); i++) {
-            TopDocs mergedTopDocs = TopDocs.merge(source.knnSearch().get(i).k(), topDocsLists.get(i).toArray(new TopDocs[0]));
-            mergedResults.add(new DfsKnnResults(nestedPath.get(i).get(), mergedTopDocs.scoreDocs));
-        }
-        return mergedResults;
-    }
-
     /**
      * Returns a score doc array of top N search docs across all shards, followed by top suggest docs for each
      * named completion suggestion across all shards. If more than one named completion suggestion is specified in the
@@ -496,38 +398,6 @@ private static SearchHits getHits(
         );
     }
 
-    /**
-     * Reduces the given query results and consumes all aggregations and profile results.
-     * @param queryResults a list of non-null query shard results
-     */
-    static ReducedQueryPhase reducedScrollQueryPhase(Collection<? extends SearchPhaseResult> queryResults) {
-        AggregationReduceContext.Builder aggReduceContextBuilder = new AggregationReduceContext.Builder() {
-            @Override
-            public AggregationReduceContext forPartialReduction() {
-                throw new UnsupportedOperationException("Scroll requests don't have aggs");
-            }
-
-            @Override
-            public AggregationReduceContext forFinalReduction() {
-                throw new UnsupportedOperationException("Scroll requests don't have aggs");
-            }
-        };
-        final TopDocsStats topDocsStats = new TopDocsStats(SearchContext.TRACK_TOTAL_HITS_ACCURATE);
-        final List<TopDocs> topDocs = new ArrayList<>();
-        for (SearchPhaseResult sortedResult : queryResults) {
-            QuerySearchResult queryResult = sortedResult.queryResult();
-            final TopDocsAndMaxScore td = queryResult.consumeTopDocs();
-            assert td != null;
-            topDocsStats.add(td, queryResult.searchTimedOut(), queryResult.terminatedEarly());
-            // make sure we set the shard index before we add it - the consumer didn't do that yet
-            if (td.topDocs.scoreDocs.length > 0) {
-                setShardIndex(td.topDocs, queryResult.getShardIndex());
-                topDocs.add(td.topDocs);
-            }
-        }
-        return reducedQueryPhase(queryResults, null, topDocs, topDocsStats, 0, true, aggReduceContextBuilder, null, true);
-    }
-
     /**
      * Reduces the given query results and consumes all aggregations and profile results.
      * @param queryResults a list of non-null query shard results
diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchScrollAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/SearchScrollAsyncAction.java
@@ -10,21 +10,27 @@
 package org.elasticsearch.action.search;
 
 import org.apache.logging.log4j.Logger;
+import org.apache.lucene.search.TopDocs;
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.cluster.node.DiscoveryNodes;
+import org.elasticsearch.common.lucene.search.TopDocsAndMaxScore;
 import org.elasticsearch.common.util.concurrent.AtomicArray;
 import org.elasticsearch.common.util.concurrent.CountDown;
 import org.elasticsearch.core.Nullable;
 import org.elasticsearch.search.SearchPhaseResult;
 import org.elasticsearch.search.SearchShardTarget;
+import org.elasticsearch.search.aggregations.AggregationReduceContext;
 import org.elasticsearch.search.internal.InternalScrollSearchRequest;
+import org.elasticsearch.search.internal.SearchContext;
 import org.elasticsearch.search.internal.ShardSearchContextId;
+import org.elasticsearch.search.query.QuerySearchResult;
 import org.elasticsearch.transport.RemoteClusterService;
 import org.elasticsearch.transport.Transport;
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
@@ -301,4 +307,48 @@ protected void onShardFailure(
     protected Transport.Connection getConnection(String clusterAlias, DiscoveryNode node) {
         return searchTransportService.getConnection(clusterAlias, node);
     }
+
+    /**
+     * Reduces the given query results and consumes all aggregations and profile results.
+     * @param queryResults a list of non-null query shard results
+     */
+    protected static SearchPhaseController.ReducedQueryPhase reducedScrollQueryPhase(Collection<? extends SearchPhaseResult> queryResults) {
+        AggregationReduceContext.Builder aggReduceContextBuilder = new AggregationReduceContext.Builder() {
+            @Override
+            public AggregationReduceContext forPartialReduction() {
+                throw new UnsupportedOperationException("Scroll requests don't have aggs");
+            }
+
+            @Override
+            public AggregationReduceContext forFinalReduction() {
+                throw new UnsupportedOperationException("Scroll requests don't have aggs");
+            }
+        };
+        final SearchPhaseController.TopDocsStats topDocsStats = new SearchPhaseController.TopDocsStats(
+            SearchContext.TRACK_TOTAL_HITS_ACCURATE
+        );
+        final List<TopDocs> topDocs = new ArrayList<>();
+        for (SearchPhaseResult sortedResult : queryResults) {
+            QuerySearchResult queryResult = sortedResult.queryResult();
+            final TopDocsAndMaxScore td = queryResult.consumeTopDocs();
+            assert td != null;
+            topDocsStats.add(td, queryResult.searchTimedOut(), queryResult.terminatedEarly());
+            // make sure we set the shard index before we add it - the consumer didn't do that yet
+            if (td.topDocs.scoreDocs.length > 0) {
+                SearchPhaseController.setShardIndex(td.topDocs, queryResult.getShardIndex());
+                topDocs.add(td.topDocs);
+            }
+        }
+        return SearchPhaseController.reducedQueryPhase(
+            queryResults,
+            null,
+            topDocs,
+            topDocsStats,
+            0,
+            true,
+            aggReduceContextBuilder,
+            null,
+            true
+        );
+    }
 }
diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchScrollQueryAndFetchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/SearchScrollQueryAndFetchAsyncAction.java
@@ -51,7 +51,7 @@ protected void executeInitialPhase(
 
     @Override
     protected SearchPhase moveToNextPhase(BiFunction<String, String, DiscoveryNode> clusterNodeLookup) {
-        return sendResponsePhase(SearchPhaseController.reducedScrollQueryPhase(queryFetchResults.asList()), queryFetchResults);
+        return sendResponsePhase(reducedScrollQueryPhase(queryFetchResults.asList()), queryFetchResults);
     }
 
     @Override
diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchScrollQueryThenFetchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/SearchScrollQueryThenFetchAsyncAction.java

Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ protected void executeInitialPhase(`
`51`	`51`
`52`	`52`	`@Override`
`53`	`53`	`protected SearchPhase moveToNextPhase(BiFunction<String, String, DiscoveryNode> clusterNodeLookup) {`
`54`		`- return sendResponsePhase(SearchPhaseController.reducedScrollQueryPhase(queryFetchResults.asList()), queryFetchResults);`
	`54`	`+ return sendResponsePhase(reducedScrollQueryPhase(queryFetchResults.asList()), queryFetchResults);`
`55`	`55`	`}`
`56`	`56`
`57`	`57`	`@Override`