Improve confidence interval computation with NaNs + unit tests

jan-elastic · jan-elastic · commit 0dbbb66741f0 · 2025-11-10T11:20:10.000+01:00
diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/approximate/ConfidenceInterval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/approximate/ConfidenceInterval.java
@@ -32,7 +32,6 @@
 import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
 
 import java.io.IOException;
-import java.util.Arrays;
 import java.util.List;
 import java.util.Objects;
 
@@ -200,6 +199,10 @@ static void process(
         }
         int trialCount = trialCountBlock.getInt(trialCountBlock.getFirstValueIndex(position));
         int bucketCount = bucketCountBlock.getInt(bucketCountBlock.getFirstValueIndex(position));
+        if (estimates.length != trialCount * bucketCount) {
+            builder.appendNull();
+            return;
+        }
         double confidenceLevel = confidenceLevelBlock.getDouble(confidenceLevelBlock.getFirstValueIndex(position));
         double[] confidenceInterval = computeConfidenceInterval(bestEstimate, estimates, trialCount, bucketCount, confidenceLevel);
         if (confidenceInterval == null) {
@@ -213,50 +216,79 @@ static void process(
         }
     }
 
-    public static double[] computeConfidenceInterval(
+    static double[] computeConfidenceInterval(
         double bestEstimate,
         double[] estimates,
         int trialCount,
         int bucketCount,
         double confidenceLevel
     ) {
-        Mean means = new Mean();
+        Mean meansIgnoreNaN = new Mean();
+        Mean meansZeroNaN = new Mean();
+        for (int trial = 0; trial < trialCount; trial++) {
+            Mean meanIgnoreNaN = new Mean();
+            Mean meanZeroNaN = new Mean();
+            for (int bucket = 0; bucket < bucketCount; bucket++) {
+                double estimate = estimates[trial * bucketCount + bucket];
+                if (Double.isNaN(estimate) == false) {
+                    meanIgnoreNaN.increment(estimate);
+                    meanZeroNaN.increment(estimate);
+                } else {
+                    meanZeroNaN.increment(0.0);
+                }
+            }
+            if (meanIgnoreNaN.getN() >= 3) {
+                meansIgnoreNaN.increment(meanIgnoreNaN.getResult());
+            }
+            if (meanZeroNaN.getN() >= 3) {
+                meansZeroNaN.increment(meanZeroNaN.getResult());
+            }
+        }
+
+        double meanIgnoreNan = meansIgnoreNaN.getResult();
+        double meanZeroNan = meansZeroNaN.getResult();
+
+        boolean ignoreNaNs = Math.abs(meanIgnoreNan - bestEstimate) < Math.abs(meanZeroNan - bestEstimate);
+        double mm = ignoreNaNs ? meanIgnoreNan : meanZeroNan;
+
         Mean stddevs = new Mean();
         Mean skews = new Mean();
         for (int trial = 0; trial < trialCount; trial++) {
-            Mean mean = new Mean();
             StandardDeviation stdDev = new StandardDeviation(false);
             Skewness skew = new Skewness();
             for (int bucket = 0; bucket < bucketCount; bucket++) {
                 double estimate = estimates[trial * bucketCount + bucket];
                 if (Double.isNaN(estimate)) {
-                    continue;
+                    if (ignoreNaNs) {
+                        continue;
+                    } else {
+                        estimate = 0.0;
+                    }
                 }
-                mean.increment(estimate);
                 stdDev.increment(estimate);
                 skew.increment(estimate);
             }
             if (skew.getN() >= 3) {
-                means.increment(mean.getResult());
                 stddevs.increment(stdDev.getResult());
                 skews.increment(skew.getResult());
             }
         }
-        if (means.getN() == 0) {
-            return null;
-        }
+
         double sm = stddevs.getResult();
         if (sm == 0.0) {
             return new double[] { bestEstimate, bestEstimate };
         }
-        double mm = means.getResult();
-        double a = skews.getResult() / (6.0 * Math.sqrt(bucketCount));
+
+        // Scale the acceleration to account for the dependence of skewness on sample size.
+        double scale = 1 / Math.sqrt(bucketCount);
+        double a = scale * skews.getResult() / 6.0;
         double z0 = (bestEstimate - mm) / sm;
         double dz = normal.inverseCumulativeProbability((1.0 + confidenceLevel) / 2.0);
         double zl = z0 + (z0 - dz) / (1.0 - Math.min(a * (z0 - dz), 0.9));
         double zu = z0 + (z0 + dz) / (1.0 - Math.min(a * (z0 + dz), 0.9));
-        double scale = Math.max(1.0 / Math.sqrt(bucketCount), z0 < 0.0 ? z0 / zl : z0 / zu);
-        return new double[] { mm + scale * sm * zl, mm + sm * scale * zu };
+        double lower = mm + scale * sm * zl;
+        double upper = mm + scale * sm * zu;
+        return lower <= bestEstimate && bestEstimate <= upper ? new double[] { lower, upper } : null;
     }
 
     @Override
diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/approximate/ConfidenceIntervalTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/approximate/ConfidenceIntervalTests.java
@@ -1,4 +1,181 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
 package org.elasticsearch.xpack.esql.expression.function.scalar.approximate;
 
-public class ConfidenceIntervalTests {
+import com.carrotsearch.randomizedtesting.annotations.Name;
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+
+import org.elasticsearch.xpack.esql.core.expression.Expression;
+import org.elasticsearch.xpack.esql.core.tree.Source;
+import org.elasticsearch.xpack.esql.core.type.DataType;
+import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase;
+import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.Supplier;
+import java.util.stream.IntStream;
+
+import static java.lang.Double.NaN;
+import static org.hamcrest.Matchers.both;
+import static org.hamcrest.Matchers.closeTo;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.lessThan;
+import static org.hamcrest.Matchers.nullValue;
+
+public class ConfidenceIntervalTests extends AbstractScalarFunctionTestCase {
+
+    @ParametersFactory
+    public static Iterable<Object[]> parameters() {
+        List<TestCaseSupplier> suppliers = new ArrayList<>();
+        suppliers.add(randomBuckets());
+        suppliers.add(allBucketsFilled());
+        suppliers.add(nanBuckets_ignoreNan());
+        suppliers.add(nanBuckets_zeroNan());
+        return parameterSuppliersFromTypedDataWithDefaultChecks(false, suppliers);
+    }
+
+    public ConfidenceIntervalTests(@Name("TestCase") Supplier<TestCaseSupplier.TestCase> testCaseSupplier) {
+        this.testCase = testCaseSupplier.get();
+    }
+
+    @Override
+    protected Expression build(Source source, List<Expression> args) {
+        return new ConfidenceInterval(source, args.get(0), args.get(1), args.get(2), args.get(3), args.get(4));
+    }
+
+    private static TestCaseSupplier randomBuckets() {
+        return new TestCaseSupplier(
+            "randomBuckets",
+            List.of(DataType.DOUBLE, DataType.DOUBLE, DataType.INTEGER, DataType.INTEGER, DataType.DOUBLE),
+            () -> {
+                int trialCount = randomIntBetween(1, 10);
+                int bucketCount = randomIntBetween(3, 10);
+                double confidenceLevel = randomDoubleBetween(0.8, 0.95, true);
+                double bestEstimate = bucketCount / 2.0;
+                List<Double> estimates = IntStream.range(0, trialCount * bucketCount)
+                    .mapToDouble(i -> randomDoubleBetween((i % bucketCount), (i % bucketCount) + 1, true))
+                    .boxed()
+                    .toList();
+                return new TestCaseSupplier.TestCase(
+                    List.of(
+                        new TestCaseSupplier.TypedData(bestEstimate, DataType.DOUBLE, "bestEstimate"),
+                        new TestCaseSupplier.TypedData(estimates, DataType.DOUBLE, "estimates"),
+                        new TestCaseSupplier.TypedData(trialCount, DataType.INTEGER, "trialCount"),
+                        new TestCaseSupplier.TypedData(bucketCount, DataType.INTEGER, "bucketCount"),
+                        new TestCaseSupplier.TypedData(confidenceLevel, DataType.DOUBLE, "confidenceLevel")
+                    ),
+                    "ConfidenceIntervalEvaluator[bestEstimateBlock=Attribute[channel=0], estimatesBlock=Attribute[channel=1], trialCountBlock="
+                        + "Attribute[channel=2], bucketCountBlock=Attribute[channel=3], confidenceLevelBlock=Attribute[channel=4]]",
+                    DataType.DOUBLE,
+                    contains(
+                        both(greaterThan(0.0)).and(lessThan(bestEstimate)),
+                        both(greaterThan(bestEstimate)).and(lessThan((double) bucketCount))
+                    )
+                );
+            }
+        );
+    }
+
+    private static TestCaseSupplier allBucketsFilled() {
+        return new TestCaseSupplier(
+            "allBucketsFilled",
+            List.of(DataType.DOUBLE, DataType.DOUBLE, DataType.INTEGER, DataType.INTEGER, DataType.DOUBLE),
+            () -> new TestCaseSupplier.TestCase(
+                List.of(
+                    new TestCaseSupplier.TypedData(2.0, DataType.DOUBLE, "bestEstimate"),
+                    new TestCaseSupplier.TypedData(
+                        List.of(2.15, 1.73, 2.1, 2.49, 2.41, 2.06, 2.29, 1.97, 1.54, 1.97, 2.41, 1.75, 1.55, 2.33, 1.64),
+                        DataType.DOUBLE,
+                        "estimates"
+                    ),
+                    new TestCaseSupplier.TypedData(3, DataType.INTEGER, "trialCount"),
+                    new TestCaseSupplier.TypedData(5, DataType.INTEGER, "bucketCount"),
+                    new TestCaseSupplier.TypedData(0.8, DataType.DOUBLE, "confidence_level")
+                ),
+                "ConfidenceIntervalEvaluator[bestEstimateBlock=Attribute[channel=0], estimatesBlock=Attribute[channel=1], trialCountBlock="
+                    + "Attribute[channel=2], bucketCountBlock=Attribute[channel=3], confidenceLevelBlock=Attribute[channel=4]]",
+                DataType.DOUBLE,
+                contains(closeTo(1.8293144967855208, 1e-9), closeTo(2.164428203663303, 1e-9))
+            )
+        );
+    }
+
+    private static TestCaseSupplier nanBuckets_ignoreNan() {
+        return new TestCaseSupplier(
+            "nanBuckets_ignoreNan",
+            List.of(DataType.DOUBLE, DataType.DOUBLE, DataType.INTEGER, DataType.INTEGER, DataType.DOUBLE),
+            () -> new TestCaseSupplier.TestCase(
+                List.of(
+                    new TestCaseSupplier.TypedData(2.0, DataType.DOUBLE, "bestEstimate"),
+                    new TestCaseSupplier.TypedData(
+                        List.of(2.15, NaN, NaN, 2.49, 2.41, NaN, 2.29, NaN, 1.54, 1.97, 2.41, NaN, 1.55, NaN, 1.64),
+                        DataType.DOUBLE,
+                        "estimates"
+                    ),
+                    new TestCaseSupplier.TypedData(3, DataType.INTEGER, "trialCount"),
+                    new TestCaseSupplier.TypedData(5, DataType.INTEGER, "bucketCount"),
+                    new TestCaseSupplier.TypedData(0.8, DataType.DOUBLE, "confidence_level")
+                ),
+                "ConfidenceIntervalEvaluator[bestEstimateBlock=Attribute[channel=0], estimatesBlock=Attribute[channel=1], trialCountBlock="
+                    + "Attribute[channel=2], bucketCountBlock=Attribute[channel=3], confidenceLevelBlock=Attribute[channel=4]]",
+                DataType.DOUBLE,
+                contains(closeTo(1.8443260740876288, 1e-9), closeTo(2.164997868635109, 1e-9))
+            )
+        );
+    }
+
+    private static TestCaseSupplier nanBuckets_zeroNan() {
+        return new TestCaseSupplier(
+            "nanBuckets_zeroNan",
+            List.of(DataType.DOUBLE, DataType.DOUBLE, DataType.INTEGER, DataType.INTEGER, DataType.DOUBLE),
+            () -> new TestCaseSupplier.TestCase(
+                List.of(
+                    new TestCaseSupplier.TypedData(1.0, DataType.DOUBLE, "bestEstimate"),
+                    new TestCaseSupplier.TypedData(
+                        List.of(2.15, NaN, NaN, 2.49, 2.41, NaN, 2.29, NaN, 1.54, 1.97, 2.41, NaN, 1.55, NaN, 1.64),
+                        DataType.DOUBLE,
+                        "estimates"
+                    ),
+                    new TestCaseSupplier.TypedData(3, DataType.INTEGER, "trialCount"),
+                    new TestCaseSupplier.TypedData(5, DataType.INTEGER, "bucketCount"),
+                    new TestCaseSupplier.TypedData(0.8, DataType.DOUBLE, "confidence_level")
+                ),
+                "ConfidenceIntervalEvaluator[bestEstimateBlock=Attribute[channel=0], estimatesBlock=Attribute[channel=1], trialCountBlock="
+                    + "Attribute[channel=2], bucketCountBlock=Attribute[channel=3], confidenceLevelBlock=Attribute[channel=4]]",
+                DataType.DOUBLE,
+                contains(closeTo(0.4041519539094244, 1e-9), closeTo(1.6023321533418913, 1e-9))
+            )
+        );
+    }
+
+    private static TestCaseSupplier inconsistentData() {
+        return new TestCaseSupplier(
+            "nanBuckets_zeroNan",
+            List.of(DataType.DOUBLE, DataType.DOUBLE, DataType.INTEGER, DataType.INTEGER, DataType.DOUBLE),
+            () -> new TestCaseSupplier.TestCase(
+                List.of(
+                    new TestCaseSupplier.TypedData(123.456, DataType.DOUBLE, "bestEstimate"),
+                    new TestCaseSupplier.TypedData(
+                        List.of(2.15, NaN, NaN, 2.49, 2.41, NaN, 2.29, NaN, 1.54, 1.97, 2.41, NaN, 1.55, NaN, 1.64),
+                        DataType.DOUBLE,
+                        "estimates"
+                    ),
+                    new TestCaseSupplier.TypedData(3, DataType.INTEGER, "trialCount"),
+                    new TestCaseSupplier.TypedData(5, DataType.INTEGER, "bucketCount"),
+                    new TestCaseSupplier.TypedData(0.8, DataType.DOUBLE, "confidence_level")
+                ),
+                "ConfidenceIntervalEvaluator[bestEstimateBlock=Attribute[channel=0], estimatesBlock=Attribute[channel=1], trialCountBlock="
+                    + "Attribute[channel=2], bucketCountBlock=Attribute[channel=3], confidenceLevelBlock=Attribute[channel=4]]",
+                DataType.DOUBLE,
+                nullValue()
+            )
+        );
+    }
 }