Correct score mode in random sampler weight

jan-elastic · jan-elastic · commit 81cdffd7b758 · 2024-11-19T14:17:29.000+01:00
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java
@@ -40,7 +40,7 @@ public abstract class AggregatorBase extends Aggregator {
 
     protected final String name;
     protected final Aggregator parent;
-    private final AggregationContext context;
+    protected final AggregationContext context;
     private final Map<String, Object> metadata;
 
     protected final Aggregator[] subAggregators;
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/random/RandomSamplerAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/random/RandomSamplerAggregator.java
@@ -9,12 +9,14 @@
 
 package org.elasticsearch.search.aggregations.bucket.sampler.random;
 
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.CollectionTerminatedException;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.util.Bits;
-import org.elasticsearch.common.CheckedSupplier;
 import org.elasticsearch.common.util.LongArray;
 import org.elasticsearch.search.aggregations.AggregationExecutionContext;
 import org.elasticsearch.search.aggregations.Aggregator;
@@ -34,14 +36,13 @@ public class RandomSamplerAggregator extends BucketsAggregator implements Single
     private final int seed;
     private final Integer shardSeed;
     private final double probability;
-    private final CheckedSupplier<Weight, IOException> weightSupplier;
+    private Weight weight;
 
     RandomSamplerAggregator(
         String name,
         int seed,
         Integer shardSeed,
         double probability,
-        CheckedSupplier<Weight, IOException> weightSupplier,
         AggregatorFactories factories,
         AggregationContext context,
         Aggregator parent,
@@ -56,10 +57,33 @@ public class RandomSamplerAggregator extends BucketsAggregator implements Single
                 RandomSamplerAggregationBuilder.NAME + " aggregation [" + name + "] must have sub aggregations configured"
             );
         }
-        this.weightSupplier = weightSupplier;
         this.shardSeed = shardSeed;
     }
 
+    /**
+     * This creates the query weight which will be used in the aggregator.
+     *
+     * This weight is a boolean query between {@link RandomSamplingQuery} and the configured top level query of the search. This allows
+     * the aggregation to iterate the documents directly, thus sampling in the background instead of the foreground.
+     * @return weight to be used, is cached for additional usages
+     * @throws IOException when building the weight or queries fails;
+     */
+    private Weight getWeight() throws IOException {
+        if (weight == null) {
+            RandomSamplingQuery query = new RandomSamplingQuery(
+                probability,
+                seed,
+                shardSeed == null ? context.shardRandomSeed() : shardSeed
+            );
+            ScoreMode scoreMode = scoreMode();
+            BooleanQuery booleanQuery = new BooleanQuery.Builder().add(query, BooleanClause.Occur.FILTER)
+                .add(context.query(), scoreMode.needsScores() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER)
+                .build();
+            weight = context.searcher().createWeight(context.searcher().rewrite(booleanQuery), scoreMode, 1f);
+        }
+        return weight;
+    }
+
     @Override
     public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException {
         return buildAggregationsForSingleBucket(
@@ -112,7 +136,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException {
             };
         }
         // TODO know when sampling would be much slower and skip sampling: https://github.com/elastic/elasticsearch/issues/84353
-        Scorer scorer = weightSupplier.get().scorer(aggCtx.getLeafReaderContext());
+        Scorer scorer = getWeight().scorer(aggCtx.getLeafReaderContext());
         // This means there are no docs to iterate, possibly due to the fields not existing
         if (scorer == null) {
             return LeafBucketCollector.NO_OP_COLLECTOR;
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/random/RandomSamplerAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/random/RandomSamplerAggregatorFactory.java
@@ -9,10 +9,6 @@
 
 package org.elasticsearch.search.aggregations.bucket.sampler.random;
 
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.ScoreMode;
-import org.apache.lucene.search.Weight;
 import org.elasticsearch.search.aggregations.Aggregator;
 import org.elasticsearch.search.aggregations.AggregatorFactories;
 import org.elasticsearch.search.aggregations.AggregatorFactory;
@@ -30,7 +26,6 @@ public class RandomSamplerAggregatorFactory extends AggregatorFactory {
     private final Integer shardSeed;
     private final double probability;
     private final SamplingContext samplingContext;
-    private Weight weight;
 
     RandomSamplerAggregatorFactory(
         String name,
@@ -57,40 +52,6 @@ public Optional<SamplingContext> getSamplingContext() {
     @Override
     public Aggregator createInternal(Aggregator parent, CardinalityUpperBound cardinality, Map<String, Object> metadata)
         throws IOException {
-        return new RandomSamplerAggregator(
-            name,
-            seed,
-            shardSeed,
-            probability,
-            this::getWeight,
-            factories,
-            context,
-            parent,
-            cardinality,
-            metadata
-        );
-    }
-
-    /**
-     * This creates the query weight which will be used in the aggregator.
-     *
-     * This weight is a boolean query between {@link RandomSamplingQuery} and the configured top level query of the search. This allows
-     * the aggregation to iterate the documents directly, thus sampling in the background instead of the foreground.
-     * @return weight to be used, is cached for additional usages
-     * @throws IOException when building the weight or queries fails;
-     */
-    private Weight getWeight() throws IOException {
-        if (weight == null) {
-            RandomSamplingQuery query = new RandomSamplingQuery(
-                probability,
-                seed,
-                shardSeed == null ? context.shardRandomSeed() : shardSeed
-            );
-            BooleanQuery booleanQuery = new BooleanQuery.Builder().add(query, BooleanClause.Occur.FILTER)
-                .add(context.query(), BooleanClause.Occur.MUST)
-                .build();
-            weight = context.searcher().createWeight(context.searcher().rewrite(booleanQuery), ScoreMode.COMPLETE, 1f);
-        }
-        return weight;
+        return new RandomSamplerAggregator(name, seed, shardSeed, probability, factories, context, parent, cardinality, metadata);
     }
 }