From 98fec4d83613aff081faf7bec972e3dc8dc7e6cd Mon Sep 17 00:00:00 2001
From: Armin Braun <me@obrown.io>
Date: Wed, 19 Feb 2025 16:38:00 +0100
Subject: [PATCH] Fix logic in TopHitsAggregator.getLeafCollector

We shouldn't be creating new collectors while also swtiching out the `leafCollectors`
structure. The safety of this is somewhat tricky to reason about and also it's needlessly
slow too to keep indirectly accessing the mutable field.
Also, lets extract the logic creating the leaf conllectors to a hopefully cold path
and save needless lookups from the top docs collectors in it.
---
 .../metrics/TopHitsAggregator.java            | 33 +++++++++++--------
 1 file changed, 19 insertions(+), 14 deletions(-)
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TopHitsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TopHitsAggregator.java
index 87d8f839dfca1..a1bf70dacf741 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TopHitsAggregator.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/TopHitsAggregator.java
@@ -105,11 +105,12 @@ public LeafBucketCollector getLeafCollector(AggregationExecutionContext aggCtx,
         // when post collecting then we have already replaced the leaf readers on the aggregator level have already been
         // replaced with the next leaf readers and then post collection pushes docids of the previous segment, which
         // then causes assertions to trip or incorrect top docs to be computed.
+        var leafCollectors = this.leafCollectors;
         if (leafCollectors != null) {
+            this.leafCollectors = null; // set to null, just in case the new allocation below fails
             leafCollectors.close();
-            leafCollectors = null; // set to null, just in case the new allocation below fails
         }
-        leafCollectors = new LongObjectPagedHashMap<>(1, bigArrays);
+        final var currentLeafCollectors = this.leafCollectors = new LongObjectPagedHashMap<>(1, bigArrays);
         return new LeafBucketCollectorBase(sub, null) {
 
             Scorable scorer;
@@ -118,13 +119,21 @@ public LeafBucketCollector getLeafCollector(AggregationExecutionContext aggCtx,
             public void setScorer(Scorable scorer) throws IOException {
                 this.scorer = scorer;
                 super.setScorer(scorer);
-                for (Cursor<LeafCollector> leafCollector : leafCollectors) {
+                for (Cursor<LeafCollector> leafCollector : currentLeafCollectors) {
                     leafCollector.value.setScorer(scorer);
                 }
             }
 
             @Override
             public void collect(int docId, long bucket) throws IOException {
+                LeafCollector leafCollector = currentLeafCollectors.get(bucket);
+                if (leafCollector == null) {
+                    leafCollector = initLeafCollector(bucket);
+                }
+                leafCollector.collect(docId);
+            }
+
+            private LeafCollector initLeafCollector(long bucket) throws IOException {
                 Collectors collectors = topDocsCollectors.get(bucket);
                 if (collectors == null) {
                     SortAndFormats sort = subSearchContext.sort();
@@ -138,29 +147,25 @@ public void collect(int docId, long bucket) throws IOException {
                     // but here we create collectors ourselves and we need prevent OOM because of crazy an offset and size.
                     topN = Math.min(topN, subSearchContext.searcher().getIndexReader().maxDoc());
                     if (sort == null) {
-                        TopScoreDocCollector topScoreDocCollector = new TopScoreDocCollectorManager(topN, null, Integer.MAX_VALUE, false)
+                        TopScoreDocCollector topScoreDocCollector = new TopScoreDocCollectorManager(topN, null, Integer.MAX_VALUE)
                             .newCollector();
                         collectors = new Collectors(topScoreDocCollector, null);
                     } else {
                         // TODO: can we pass trackTotalHits=subSearchContext.trackTotalHits(){
                         // Note that this would require to catch CollectionTerminatedException
                         collectors = new Collectors(
-                            new TopFieldCollectorManager(sort.sort, topN, null, Integer.MAX_VALUE, false).newCollector(),
+                            new TopFieldCollectorManager(sort.sort, topN, null, Integer.MAX_VALUE).newCollector(),
                             subSearchContext.trackScores() ? new MaxScoreCollector() : null
                         );
                     }
                     topDocsCollectors.put(bucket, collectors);
                 }
-
-                LeafCollector leafCollector = leafCollectors.get(bucket);
-                if (leafCollector == null) {
-                    leafCollector = collectors.collector.getLeafCollector(aggCtx.getLeafReaderContext());
-                    if (scorer != null) {
-                        leafCollector.setScorer(scorer);
-                    }
-                    leafCollectors.put(bucket, leafCollector);
+                LeafCollector leafCollector = collectors.collector.getLeafCollector(aggCtx.getLeafReaderContext());
+                if (scorer != null) {
+                    leafCollector.setScorer(scorer);
                 }
-                leafCollector.collect(docId);
+                currentLeafCollectors.put(bucket, leafCollector);
+                return leafCollector;
             }
         };
     }