From 140438c521db5f6d3f9d9637bc5284160bef1488 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Wed, 20 Aug 2025 12:52:33 +0200 Subject: [PATCH 01/10] Add sum to exponential histograms --- .../EmptyExponentialHistogram.java | 5 ++ .../ExponentialHistogram.java | 11 ++- .../ExponentialHistogramGenerator.java | 9 +++ .../ExponentialHistogramMerger.java | 1 + .../ExponentialHistogramUtils.java | 53 +++++++++++++ .../ExponentialHistogramXContent.java | 2 + .../FixedCapacityExponentialHistogram.java | 12 +++ .../ExponentialHistogramMergerTests.java | 16 ++++ .../ExponentialHistogramUtilsTests.java | 58 ++++++++++++++ .../ExponentialHistogramXContentTests.java | 16 ++-- .../CompressedExponentialHistogram.java | 13 +++- .../ExponentialHistogramFieldMapper.java | 39 +++++++++- .../exponentialhistogram/IndexWithCount.java | 37 ++++++++- .../ExponentialHistogramFieldMapperTests.java | 77 ++++++++++++------- .../test/10_synthetic_source.yml | 46 +++++++++++ 15 files changed, 355 insertions(+), 40 deletions(-) create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java create mode 100644 libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/EmptyExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/EmptyExponentialHistogram.java index f2d21f0861065..eb0be8bc9262d 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/EmptyExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/EmptyExponentialHistogram.java @@ -77,6 +77,11 @@ public Buckets negativeBuckets() { return EmptyBuckets.INSTANCE; } + @Override + public double sum() { + return 0; + } + @Override public long ramBytesUsed() { return 0; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java index f4603f3fe679c..3abfdddcf3f71 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java @@ -47,7 +47,7 @@ */ public interface ExponentialHistogram extends Accountable { - // TODO(b/128622): support min/max/sum/count storage and merging. + // TODO(b/128622): support min/max storage and merging. // TODO(b/128622): Add special positive and negative infinity buckets // to allow representation of explicit bucket histograms with open boundaries. @@ -93,6 +93,15 @@ public interface ExponentialHistogram extends Accountable { */ Buckets negativeBuckets(); + /** + * Returns the sum of all values represented by this histogram. + * Note that even if histograms are cumulative, the sum is not guaranteed to be monotonically increasing, + * because histograms support negative values. + * + * @return the sum, guaranteed to be zero for empty histograms + */ + double sum(); + /** * Represents a bucket range of an {@link ExponentialHistogram}, either the positive or the negative range. */ diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java index ef4e7f57dba44..29a28db64175d 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java @@ -123,6 +123,7 @@ private void mergeValuesToHistogram() { } valueBuffer.reset(); + valueBuffer.setSum(rawValuesSum()); int scale = valueBuffer.scale(); // Buckets must be provided with their indices in ascending order. @@ -161,6 +162,14 @@ private void mergeValuesToHistogram() { valueCount = 0; } + private double rawValuesSum() { + double sum = 0; + for (int i = 0; i < valueCount; i++) { + sum += rawValueBuffer[i]; + } + return sum; + } + private static long estimateBaseSize(int numBuckets) { return SHALLOW_SIZE + RamEstimationUtil.estimateDoubleArray(numBuckets); }; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java index a92fb376abc9a..ba71f90bf923e 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java @@ -150,6 +150,7 @@ public void add(ExponentialHistogram toAdd) { buffer = FixedCapacityExponentialHistogram.create(bucketLimit, circuitBreaker); } buffer.setZeroBucket(zeroBucket); + buffer.setSum(a.sum() + b.sum()); // We attempt to bring everything to the scale of A. // This might involve increasing the scale for B, which would increase its indices. diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java new file mode 100644 index 0000000000000..bebc1975bd359 --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java @@ -0,0 +1,53 @@ +/* + * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V. + * under one or more license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License. + */ + +package org.elasticsearch.exponentialhistogram; + +public class ExponentialHistogramUtils { + + /** + * Estimates the sum of all values of a histogram just based on the populated buckets. + * + * @param negativeBuckets the negative buckets of the histogram + * @param positiveBuckets the positive buckets of the histogram + * @return the estimated sum of all values in the histogram, guaranteed to be zero if there are no buckets + */ + public static double estimateSum(BucketIterator negativeBuckets, BucketIterator positiveBuckets) { + double sum = 0.0; + while (negativeBuckets.hasNext()) { + double bucketMidPoint = ExponentialScaleUtils.getPointOfLeastRelativeError( + negativeBuckets.peekIndex(), + negativeBuckets.scale() + ); + sum += -bucketMidPoint * negativeBuckets.peekCount(); + negativeBuckets.advance(); + } + while (positiveBuckets.hasNext()) { + double bucketMidPoint = ExponentialScaleUtils.getPointOfLeastRelativeError( + positiveBuckets.peekIndex(), + positiveBuckets.scale() + ); + sum += bucketMidPoint * positiveBuckets.peekCount(); + positiveBuckets.advance(); + } + return sum; + } +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContent.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContent.java index 7fb066bc325f5..8f0995dca6e9c 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContent.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContent.java @@ -31,6 +31,7 @@ public class ExponentialHistogramXContent { public static final String SCALE_FIELD = "scale"; + public static final String SUM_FIELD = "sum"; public static final String ZERO_FIELD = "zero"; public static final String ZERO_COUNT_FIELD = "count"; public static final String ZERO_THRESHOLD_FIELD = "threshold"; @@ -49,6 +50,7 @@ public static void serialize(XContentBuilder builder, ExponentialHistogram histo builder.startObject(); builder.field(SCALE_FIELD, histogram.scale()); + builder.field(SUM_FIELD, histogram.sum()); double zeroThreshold = histogram.zeroBucket().zeroThreshold(); long zeroCount = histogram.zeroBucket().count(); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java index 255206cad8e81..29fd8f01520a2 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java @@ -53,6 +53,8 @@ final class FixedCapacityExponentialHistogram implements ReleasableExponentialHi private final Buckets positiveBuckets = new Buckets(true); + private double sum; + private final ExponentialHistogramCircuitBreaker circuitBreaker; private boolean closed = false; @@ -78,6 +80,7 @@ private FixedCapacityExponentialHistogram(int bucketCapacity, ExponentialHistogr * Resets this histogram to the same state as a newly constructed one with the same capacity. */ void reset() { + sum = 0; setZeroBucket(ZeroBucket.minimalEmpty()); resetBuckets(MAX_SCALE); } @@ -110,6 +113,15 @@ void setZeroBucket(ZeroBucket zeroBucket) { this.zeroBucket = zeroBucket; } + @Override + public double sum() { + return sum; + } + + void setSum(double sum) { + this.sum = sum; + } + /** * Attempts to add a bucket to the positive or negative range of this histogram. *
diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java index 98eca3fe94100..7800270811054 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java @@ -106,6 +106,22 @@ public void testEmptyZeroBucketIgnored() { assertThat(posBuckets.hasNext(), equalTo(false)); } + public void testSumCorrectness() { + double[] firstValues = randomDoubles(100).map(val -> val * 2 - 1).toArray(); + double[] secondValues = randomDoubles(50).map(val -> val * 2 - 1).toArray(); + double correctSum = Arrays.stream(firstValues).sum() + Arrays.stream(secondValues).sum(); + try ( + ReleasableExponentialHistogram merged = ExponentialHistogram.merge( + 2, + breaker(), + createAutoReleasedHistogram(10, firstValues), + createAutoReleasedHistogram(20, secondValues) + ) + ) { + assertThat(merged.sum(), closeTo(correctSum, 0.000001)); + } + } + public void testUpscalingDoesNotExceedIndexLimits() { for (int i = 0; i < 4; i++) { diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java new file mode 100644 index 0000000000000..1b8e014258951 --- /dev/null +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java @@ -0,0 +1,58 @@ +/* + * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V. + * under one or more license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License. + */ + +package org.elasticsearch.exponentialhistogram; + +import static org.hamcrest.Matchers.closeTo; + +public class ExponentialHistogramUtilsTests extends ExponentialHistogramTestCase { + + public void testRandomDataSumEstimation() { + for (int i = 0; i < 100; i++) { + int valueCount = randomIntBetween(100, 10_000); + int bucketCount = randomIntBetween(2, 500); + + double correctSum = 0; + double sign = randomBoolean() ? 1 : -1; + double[] values = new double[valueCount]; + for (int j = 0; j < valueCount; j++) { + values[j] = sign * Math.pow(10, randomIntBetween(1, 9)) * randomDouble(); + correctSum += values[j]; + } + + ExponentialHistogram histo = createAutoReleasedHistogram(bucketCount, values); + + double estimatedSum = ExponentialHistogramUtils.estimateSum( + histo.negativeBuckets().iterator(), + histo.positiveBuckets().iterator() + ); + + double correctAverage = correctSum / valueCount; + double estimatedAverage = estimatedSum / valueCount; + + // If the histogram does not contain mixed sign values, we have a guaranteed relative error bound of 2^(2^-scale) - 1 + double histogramBase = Math.pow(2, Math.pow(2, -histo.scale())); + double allowedError = Math.abs(correctAverage * (histogramBase - 1)); + + assertThat(estimatedAverage, closeTo(correctAverage, allowedError)); + } + } +} diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContentTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContentTests.java index f0ccfba098e20..11c226eac433a 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContentTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContentTests.java @@ -33,13 +33,14 @@ public class ExponentialHistogramXContentTests extends ExponentialHistogramTestC public void testEmptyHistogram() { ExponentialHistogram emptyHistogram = ExponentialHistogram.empty(); - assertThat(toJson(emptyHistogram), equalTo("{\"scale\":" + emptyHistogram.scale() + "}")); + assertThat(toJson(emptyHistogram), equalTo("{\"scale\":" + emptyHistogram.scale() + ",\"sum\":0.0}")); } public void testFullHistogram() { FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(100); histo.setZeroBucket(new ZeroBucket(0.1234, 42)); histo.resetBuckets(7); + histo.setSum(1234.56); histo.tryAddBucket(-10, 15, false); histo.tryAddBucket(10, 5, false); histo.tryAddBucket(-11, 10, true); @@ -49,6 +50,7 @@ public void testFullHistogram() { equalTo( "{" + "\"scale\":7," + + "\"sum\":1234.56," + "\"zero\":{\"count\":42,\"threshold\":0.1234}," + "\"positive\":{\"indices\":[-11,11],\"counts\":[10,20]}," + "\"negative\":{\"indices\":[-10,10],\"counts\":[15,5]}" @@ -61,30 +63,34 @@ public void testOnlyZeroThreshold() { FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(10); histo.setZeroBucket(new ZeroBucket(5.0, 0)); histo.resetBuckets(3); - assertThat(toJson(histo), equalTo("{\"scale\":3,\"zero\":{\"threshold\":5.0}}")); + histo.setSum(1.1); + assertThat(toJson(histo), equalTo("{\"scale\":3,\"sum\":1.1,\"zero\":{\"threshold\":5.0}}")); } public void testOnlyZeroCount() { FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(10); histo.setZeroBucket(new ZeroBucket(0.0, 7)); histo.resetBuckets(2); - assertThat(toJson(histo), equalTo("{\"scale\":2,\"zero\":{\"count\":7}}")); + histo.setSum(1.1); + assertThat(toJson(histo), equalTo("{\"scale\":2,\"sum\":1.1,\"zero\":{\"count\":7}}")); } public void testOnlyPositiveBuckets() { FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(10); histo.resetBuckets(4); + histo.setSum(1.1); histo.tryAddBucket(-1, 3, true); histo.tryAddBucket(2, 5, true); - assertThat(toJson(histo), equalTo("{\"scale\":4,\"positive\":{\"indices\":[-1,2],\"counts\":[3,5]}}")); + assertThat(toJson(histo), equalTo("{\"scale\":4,\"sum\":1.1,\"positive\":{\"indices\":[-1,2],\"counts\":[3,5]}}")); } public void testOnlyNegativeBuckets() { FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(10); histo.resetBuckets(5); + histo.setSum(1.1); histo.tryAddBucket(-1, 4, false); histo.tryAddBucket(2, 6, false); - assertThat(toJson(histo), equalTo("{\"scale\":5,\"negative\":{\"indices\":[-1,2],\"counts\":[4,6]}}")); + assertThat(toJson(histo), equalTo("{\"scale\":5,\"sum\":1.1,\"negative\":{\"indices\":[-1,2],\"counts\":[4,6]}}")); } private static String toJson(ExponentialHistogram histo) { diff --git a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/CompressedExponentialHistogram.java b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/CompressedExponentialHistogram.java index 8a2e348568bbc..78d3afaecf2e4 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/CompressedExponentialHistogram.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/CompressedExponentialHistogram.java @@ -33,6 +33,7 @@ public class CompressedExponentialHistogram implements ExponentialHistogram { private double zeroThreshold; private long valueCount; + private double sum; private ZeroBucket lazyZeroBucket; private final EncodedHistogramData encodedData = new EncodedHistogramData(); @@ -53,6 +54,11 @@ public ZeroBucket zeroBucket() { return lazyZeroBucket; } + @Override + public double sum() { + return sum; + } + @Override public ExponentialHistogram.Buckets positiveBuckets() { return positiveBuckets; @@ -68,20 +74,23 @@ public ExponentialHistogram.Buckets negativeBuckets() { * * @param zeroThreshold the zeroThreshold for the histogram, which needs to be stored externally * @param valueCount the total number of values the histogram contains, needs to be stored externally + * @param sum the total sum of the values the histogram contains, needs to be stored externally * @param encodedHistogramData the encoded histogram bytes which previously where generated via * {@link #writeHistogramBytes(StreamOutput, int, List, List)}. */ - public void reset(double zeroThreshold, long valueCount, BytesRef encodedHistogramData) throws IOException { + public void reset(double zeroThreshold, long valueCount, double sum, BytesRef encodedHistogramData) throws IOException { lazyZeroBucket = null; this.zeroThreshold = zeroThreshold; this.valueCount = valueCount; + this.sum = sum; encodedData.decode(encodedHistogramData); negativeBuckets.resetCachedData(); positiveBuckets.resetCachedData(); } /** - * Serializes the given histogram, so that exactly the same data can be reconstructed via {@link #reset(double, long, BytesRef)}. + * Serializes the given histogram, so that exactly the same data can be reconstructed via + * {@link #reset(double, long, double, BytesRef)}. * * @param output the output to write the serialized bytes to * @param scale the scale of the histogram diff --git a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java index 9f137aa03ef73..6ab601566d5ab 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java @@ -20,6 +20,7 @@ import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.exponentialhistogram.ExponentialHistogram; +import org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils; import org.elasticsearch.exponentialhistogram.ExponentialHistogramXContent; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; @@ -93,6 +94,7 @@ public class ExponentialHistogramFieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "exponential_histogram"; public static final ParseField SCALE_FIELD = new ParseField(ExponentialHistogramXContent.SCALE_FIELD); + public static final ParseField SUM_FIELD = new ParseField(ExponentialHistogramXContent.SUM_FIELD); public static final ParseField ZERO_FIELD = new ParseField(ExponentialHistogramXContent.ZERO_FIELD); public static final ParseField ZERO_COUNT_FIELD = new ParseField(ExponentialHistogramXContent.ZERO_COUNT_FIELD); public static final ParseField ZERO_THRESHOLD_FIELD = new ParseField(ExponentialHistogramXContent.ZERO_THRESHOLD_FIELD); @@ -134,6 +136,10 @@ private static String valuesCountSubFieldName(String fullPath) { return fullPath + "._values_count"; } + private static String valuesSumSubFieldName(String fullPath) { + return fullPath + "._values_sum"; + } + static class Builder extends FieldMapper.Builder { private final FieldMapper.Parameter> meta = FieldMapper.Parameter.metaParam(); @@ -259,6 +265,7 @@ public void parse(DocumentParserContext context) throws IOException { return; } + Double sum = null; Integer scale = null; ParsedZeroBucket zeroBucket = ParsedZeroBucket.DEFAULT; List negativeBuckets = Collections.emptyList(); @@ -296,6 +303,10 @@ public void parse(DocumentParserContext context) throws IOException { + scale ); } + } else if (fieldName.equals(SUM_FIELD.getPreferredName())) { + token = subParser.nextToken(); + ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, subParser); + sum = subParser.doubleValue(); } else if (fieldName.equals(ZERO_FIELD.getPreferredName())) { zeroBucket = parseZeroBucket(subParser); } else if (fieldName.equals(POSITIVE_FIELD.getPreferredName())) { @@ -336,6 +347,20 @@ public void parse(DocumentParserContext context) throws IOException { ); } + if (sum == null) { + sum = ExponentialHistogramUtils.estimateSum( + IndexWithCount.asBucketIterator(scale, negativeBuckets), + IndexWithCount.asBucketIterator(scale, positiveBuckets) + ); + } else { + if (totalValueCount == 0 && sum != 0.0) { + throw new DocumentParsingException( + subParser.getTokenLocation(), + "error parsing field [" + fullPath() + "], sum field must be zero if the histogram is empty, but got " + sum + ); + } + } + BytesStreamOutput histogramBytesOutput = new BytesStreamOutput(); CompressedExponentialHistogram.writeHistogramBytes(histogramBytesOutput, scale, negativeBuckets, positiveBuckets); BytesRef histoBytes = histogramBytesOutput.bytes().toBytesRef(); @@ -344,10 +369,15 @@ public void parse(DocumentParserContext context) throws IOException { long thresholdAsLong = NumericUtils.doubleToSortableLong(zeroBucket.threshold()); NumericDocValuesField zeroThresholdField = new NumericDocValuesField(zeroThresholdSubFieldName(fullPath()), thresholdAsLong); NumericDocValuesField valuesCountField = new NumericDocValuesField(valuesCountSubFieldName(fullPath()), totalValueCount); + NumericDocValuesField sumField = new NumericDocValuesField( + valuesSumSubFieldName(fullPath()), + NumericUtils.doubleToSortableLong(sum) + ); context.doc().addWithKey(fieldType().name(), histoField); context.doc().add(zeroThresholdField); context.doc().add(valuesCountField); + context.doc().add(sumField); } catch (Exception ex) { if (ignoreMalformed.value() == false) { @@ -582,6 +612,7 @@ private class ExponentialHistogramSyntheticFieldLoader implements CompositeSynth private BytesRef binaryValue; private double zeroThreshold; private long valueCount; + private double valueSum; @Override public SourceLoader.SyntheticFieldLoader.DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) @@ -594,18 +625,22 @@ public SourceLoader.SyntheticFieldLoader.DocValuesLoader docValuesLoader(LeafRea } NumericDocValues zeroThresholds = leafReader.getNumericDocValues(zeroThresholdSubFieldName(fullPath())); NumericDocValues valueCounts = leafReader.getNumericDocValues(valuesCountSubFieldName(fullPath())); + NumericDocValues valueSums = leafReader.getNumericDocValues(valuesSumSubFieldName(fullPath())); assert zeroThresholds != null; assert valueCounts != null; + assert valueSums != null; return docId -> { if (histoDocValues.advanceExact(docId)) { boolean zeroThresholdPresent = zeroThresholds.advanceExact(docId); boolean valueCountsPresent = valueCounts.advanceExact(docId); - assert zeroThresholdPresent && valueCountsPresent; + boolean valueSumsPresent = valueSums.advanceExact(docId); + assert zeroThresholdPresent && valueCountsPresent && valueSumsPresent; binaryValue = histoDocValues.binaryValue(); zeroThreshold = NumericUtils.sortableLongToDouble(zeroThresholds.longValue()); valueCount = valueCounts.longValue(); + valueSum = NumericUtils.sortableLongToDouble(valueSums.longValue()); return true; } binaryValue = null; @@ -624,7 +659,7 @@ public void write(XContentBuilder b) throws IOException { return; } - histogram.reset(zeroThreshold, valueCount, binaryValue); + histogram.reset(zeroThreshold, valueCount, valueSum, binaryValue); ExponentialHistogramXContent.serialize(b, histogram); } diff --git a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/IndexWithCount.java b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/IndexWithCount.java index 6a6d0254ea1bd..54c23508f220a 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/IndexWithCount.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/IndexWithCount.java @@ -7,9 +7,44 @@ package org.elasticsearch.xpack.exponentialhistogram; +import org.elasticsearch.exponentialhistogram.BucketIterator; + +import java.util.List; + /** * An exponential histogram bucket represented by its index and associated bucket count. * @param index the index of the bucket * @param count the number of values in that bucket. */ -public record IndexWithCount(long index, long count) {} +public record IndexWithCount(long index, long count) { + public static BucketIterator asBucketIterator(int scale, List buckets) { + return new BucketIterator() { + int position = 0; + + @Override + public boolean hasNext() { + return position < buckets.size(); + } + + @Override + public long peekCount() { + return buckets.get(position).count; + } + + @Override + public long peekIndex() { + return buckets.get(position).index; + } + + @Override + public void advance() { + position++; + } + + @Override + public int scale() { + return scale; + } + }; + } +} diff --git a/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java b/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java index 6bf9ba8d0336d..20ec2afccf2ab 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.exponentialhistogram; import org.elasticsearch.core.Types; +import org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentParsingException; import org.elasticsearch.index.mapper.MappedFieldType; @@ -23,14 +24,13 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.NavigableMap; import java.util.Set; -import java.util.TreeMap; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; @@ -117,16 +117,22 @@ private static Map createRandomHistogramValue(int maxBucketCount fillBucketsRandomly(negativeIndices, negativeCounts, maxBucketCount / 2); } - return Map.of( - "scale", - scale, - "zero", - Map.of("count", zeroCount, "threshold", zeroThreshold), - "positive", - Map.of("indices", positiveIndices, "counts", positiveCounts), - "negative", - Map.of("indices", negativeIndices, "counts", negativeCounts) + Map result = new HashMap<>( + Map.of( + "scale", + scale, + "zero", + Map.of("count", zeroCount, "threshold", zeroThreshold), + "positive", + Map.of("indices", positiveIndices, "counts", positiveCounts), + "negative", + Map.of("indices", negativeIndices, "counts", negativeCounts) + ) ); + if (randomBoolean() && (positiveIndices.isEmpty() == false || negativeIndices.isEmpty() == false)) { + result.put("sum", randomDoubleBetween(-1000, 1000, true)); + } + return result; } private static void fillBucketsRandomly(List indices, List counts, int maxBucketCount) { @@ -425,47 +431,60 @@ public SyntheticSourceExample example(int maxValues) { private Map convertHistogramToCanonicalForm(Map histogram) { Map result = new LinkedHashMap<>(); - result.put("scale", histogram.get("scale")); + int scale = (Integer) histogram.get("scale"); + result.put("scale", scale); + + List positive = parseBuckets(Types.forciblyCast(histogram.get("positive"))); + List negative = parseBuckets(Types.forciblyCast(histogram.get("negative"))); + + Object sum = histogram.get("sum"); + if (sum == null) { + sum = ExponentialHistogramUtils.estimateSum( + IndexWithCount.asBucketIterator(scale, negative), + IndexWithCount.asBucketIterator(scale, positive) + ); + } + result.put("sum", sum); Map zeroBucket = convertZeroBucketToCanonicalForm(Types.forciblyCast(histogram.get("zero"))); if (zeroBucket != null) { result.put("zero", zeroBucket); } - Map positive = convertBucketListToCanonicalForm(Types.forciblyCast(histogram.get("positive"))); - if (positive != null) { - result.put("positive", positive); + if (positive.isEmpty() == false) { + result.put("positive", writeBucketsInCanonicalForm(positive)); } - - Map negative = convertBucketListToCanonicalForm(Types.forciblyCast(histogram.get("negative"))); - if (negative != null) { - result.put("negative", negative); + if (negative.isEmpty() == false) { + result.put("negative", writeBucketsInCanonicalForm(negative)); } return result; } - private Map convertBucketListToCanonicalForm(Map buckets) { + private List parseBuckets(Map buckets) { if (buckets == null) { - return null; + return List.of(); } List indices = Types.forciblyCast(buckets.get("indices")); List counts = Types.forciblyCast(buckets.get("counts")); if (indices == null || indices.isEmpty()) { - return null; + return List.of(); } - NavigableMap indicesToCountsSorted = new TreeMap<>(); + List indexWithCounts = new ArrayList<>(); for (int i = 0; i < indices.size(); i++) { - indicesToCountsSorted.put(indices.get(i).longValue(), counts.get(i).longValue()); + indexWithCounts.add(new IndexWithCount(indices.get(i).longValue(), counts.get(i).longValue())); } + indexWithCounts.sort(Comparator.comparing(IndexWithCount::index)); + return indexWithCounts; + } + private Map writeBucketsInCanonicalForm(List buckets) { List resultIndices = new ArrayList<>(); List resultCounts = new ArrayList<>(); - indicesToCountsSorted.forEach((index, count) -> { - resultIndices.add(index); - resultCounts.add(count); - }); - + for (IndexWithCount indexWithCount : buckets) { + resultIndices.add(indexWithCount.index()); + resultCounts.add(indexWithCount.count()); + } LinkedHashMap result = new LinkedHashMap<>(); result.put("indices", resultIndices); result.put("counts", resultCounts); diff --git a/x-pack/plugin/mapper-exponential-histogram/src/yamlRestTest/resources/rest-api-spec/test/10_synthetic_source.yml b/x-pack/plugin/mapper-exponential-histogram/src/yamlRestTest/resources/rest-api-spec/test/10_synthetic_source.yml index 461748b552421..d31d03e8a3d0a 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/yamlRestTest/resources/rest-api-spec/test/10_synthetic_source.yml +++ b/x-pack/plugin/mapper-exponential-histogram/src/yamlRestTest/resources/rest-api-spec/test/10_synthetic_source.yml @@ -28,6 +28,7 @@ setup: body: my_histo: scale: 12 + sum: 1234.56 zero: threshold: 0.123456 count: 42 @@ -45,6 +46,7 @@ setup: - match: _source.my_histo: scale: 12 + sum: 1234.56 zero: threshold: 0.123456 count: 42 @@ -65,6 +67,7 @@ setup: body: my_histo: scale: -10 + sum: 1234.56 positive: indices: [1,2,3,4,5] counts: [6,7,8,9,10] @@ -79,6 +82,7 @@ setup: - match: _source.my_histo: scale: -10 + sum: 1234.56 positive: indices: [1,2,3,4,5] counts: [6,7,8,9,10] @@ -96,6 +100,7 @@ setup: body: my_histo: scale: 0 + sum: 1234.56 positive: indices: [-100, 10, 20] counts: [3, 2, 1] @@ -107,6 +112,7 @@ setup: - match: _source.my_histo: scale: 0 + sum: 1234.56 positive: indices: [-100, 10, 20] counts: [3, 2, 1] @@ -121,6 +127,7 @@ setup: body: my_histo: scale: 0 + sum: 1234.56 negative: indices: [-100, 10, 20] counts: [3, 2, 1] @@ -132,6 +139,7 @@ setup: - match: _source.my_histo: scale: 0 + sum: 1234.56 negative: indices: [-100, 10, 20] counts: [3, 2, 1] @@ -145,12 +153,14 @@ setup: body: my_histo: scale: -7 + sum: 0.0 - do: get: index: test_exponential_histogram id: "1" - match: _source.my_histo: + sum: 0.0 scale: -7 --- @@ -172,6 +182,7 @@ setup: - match: _source.my_histo: scale: 0 + sum: 0.0 zero: threshold: 42.7 @@ -194,6 +205,7 @@ setup: - match: _source.my_histo: scale: 0 + sum: 0.0 zero: count: 101 @@ -208,6 +220,7 @@ setup: body: my_histo: scale: 38 + sum: 1E300 zero: count: 2305843009213693952 # 2^61 to not cause overflows for the total value count sum threshold: 1E-300 @@ -224,6 +237,7 @@ setup: - match: _source.my_histo: scale: 38 + sum: 1E300 zero: count: 2305843009213693952 threshold: 1E-300 @@ -233,3 +247,35 @@ setup: negative: indices: [-4611686018427387903, 4611686018427387903] counts: [2305843009213693952, 1] + +--- +"Sum estimation": + - do: + index: + index: test_exponential_histogram + id: "1" + refresh: true + body: + my_histo: + scale: 1 + positive: + indices: [8] + counts: [1] + negative: + indices: [0, 1] + counts: [1, 5] + + - do: + get: + index: test_exponential_histogram + id: "1" + - match: + _source.my_histo: + scale: 1 + sum: 9.289321881345247 + positive: + indices: [8] + counts: [1] + negative: + indices: [0, 1] + counts: [1, 5] From 8c6c002a5794d1401daa5c77edcc4aa3a119918e Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Fri, 22 Aug 2025 12:18:10 +0200 Subject: [PATCH 02/10] Add missing test case --- .../ExponentialHistogramFieldMapperTests.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java b/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java index 20ec2afccf2ab..bc1a3a752bf6c 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java @@ -389,7 +389,15 @@ protected List exampleMalformedValues() { .endArray() .endObject() .endObject() - ).errorMatches("has a total value count exceeding the allowed maximum value of " + Long.MAX_VALUE) + ).errorMatches("has a total value count exceeding the allowed maximum value of " + Long.MAX_VALUE), + + // Non-Zero sum for empty histogram + exampleMalformedValue( + b -> b.startObject() + .field("scale", 0) + .field("sum", 42.0) + .endObject() + ).errorMatches("sum field must be zero if the histogram is empty, but got 42.0") ); } From 3f8d3c33e32cb9d7467db3f9195834643da7a6de Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 22 Aug 2025 10:25:32 +0000 Subject: [PATCH 03/10] [CI] Auto commit changes from spotless --- .../ExponentialHistogramFieldMapperTests.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java b/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java index bc1a3a752bf6c..583e420358c22 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java @@ -392,12 +392,9 @@ protected List exampleMalformedValues() { ).errorMatches("has a total value count exceeding the allowed maximum value of " + Long.MAX_VALUE), // Non-Zero sum for empty histogram - exampleMalformedValue( - b -> b.startObject() - .field("scale", 0) - .field("sum", 42.0) - .endObject() - ).errorMatches("sum field must be zero if the histogram is empty, but got 42.0") + exampleMalformedValue(b -> b.startObject().field("scale", 0).field("sum", 42.0).endObject()).errorMatches( + "sum field must be zero if the histogram is empty, but got 42.0" + ) ); } From cde5e65a9cb4ecf1d850d58b7870a2170bfd672a Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Fri, 22 Aug 2025 14:52:54 +0200 Subject: [PATCH 04/10] Fix and test infinity handling --- .../ExponentialHistogramFieldMapper.java | 22 ++++++-- .../ExponentialHistogramFieldMapperTests.java | 4 +- .../test/10_synthetic_source.yml | 52 +++++++++++++++++++ 3 files changed, 74 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java index 6ab601566d5ab..30715b8411304 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java @@ -304,9 +304,7 @@ public void parse(DocumentParserContext context) throws IOException { ); } } else if (fieldName.equals(SUM_FIELD.getPreferredName())) { - token = subParser.nextToken(); - ensureExpectedToken(XContentParser.Token.VALUE_NUMBER, token, subParser); - sum = subParser.doubleValue(); + sum = parseDoubleAllowingInfinity(subParser); } else if (fieldName.equals(ZERO_FIELD.getPreferredName())) { zeroBucket = parseZeroBucket(subParser); } else if (fieldName.equals(POSITIVE_FIELD.getPreferredName())) { @@ -407,6 +405,24 @@ public void parse(DocumentParserContext context) throws IOException { context.path().remove(); } + private double parseDoubleAllowingInfinity(XContentParser parser) throws IOException { + XContentParser.Token token = parser.nextToken(); + boolean isValidNumber = token == XContentParser.Token.VALUE_NUMBER; + if (token == XContentParser.Token.VALUE_STRING) { + String text = parser.text(); + if (text.equals("-Infinity") || text.equals("Infinity")) { + isValidNumber = true; + } + } + if (isValidNumber) { + return parser.doubleValue(); + } + throw new DocumentParsingException( + parser.getTokenLocation(), + "error parsing field [" + fullPath() + "], expected a number but got " + token + ); + } + private static long getTotalValueCount( ParsedZeroBucket zeroBucket, List positiveBuckets, diff --git a/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java b/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java index 583e420358c22..2c24965c238da 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java @@ -129,7 +129,9 @@ private static Map createRandomHistogramValue(int maxBucketCount Map.of("indices", negativeIndices, "counts", negativeCounts) ) ); - if (randomBoolean() && (positiveIndices.isEmpty() == false || negativeIndices.isEmpty() == false)) { + if ((positiveIndices.isEmpty() == false || negativeIndices.isEmpty() == false)) { + // we always add the sum field to avoid numeric problems with the estimation due to random buckets + // sum generation is tested in the yaml tests result.put("sum", randomDoubleBetween(-1000, 1000, true)); } return result; diff --git a/x-pack/plugin/mapper-exponential-histogram/src/yamlRestTest/resources/rest-api-spec/test/10_synthetic_source.yml b/x-pack/plugin/mapper-exponential-histogram/src/yamlRestTest/resources/rest-api-spec/test/10_synthetic_source.yml index d31d03e8a3d0a..c95db7bf8587d 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/yamlRestTest/resources/rest-api-spec/test/10_synthetic_source.yml +++ b/x-pack/plugin/mapper-exponential-histogram/src/yamlRestTest/resources/rest-api-spec/test/10_synthetic_source.yml @@ -279,3 +279,55 @@ setup: negative: indices: [0, 1] counts: [1, 5] + +--- +"Positive infinity sum": + - do: + index: + index: test_exponential_histogram + id: "1" + refresh: true + body: + my_histo: + scale: 0 + positive: + indices: [2000] + counts: [1] + + - do: + get: + index: test_exponential_histogram + id: "1" + - match: + _source.my_histo: + scale: 0 + sum: Infinity + positive: + indices: [2000] + counts: [1] + +--- +"negative infinity sum": + - do: + index: + index: test_exponential_histogram + id: "1" + refresh: true + body: + my_histo: + scale: 0 + negative: + indices: [2000] + counts: [1] + + - do: + get: + index: test_exponential_histogram + id: "1" + - match: + _source.my_histo: + scale: 0 + sum: -Infinity + negative: + indices: [2000] + counts: [1] From 5a9e788e7543c514b2b6b28bb2e3e2b7f26afc80 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Fri, 22 Aug 2025 15:30:18 +0200 Subject: [PATCH 05/10] Fix benchmark --- .../exponentialhistogram/ExponentialHistogramMergeBench.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java index 0f7ee6a2c763e..d1fcf0fb28464 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java @@ -130,7 +130,7 @@ private ExponentialHistogram asCompressedHistogram(ExponentialHistogram histogra CompressedExponentialHistogram.writeHistogramBytes(histoBytes, histogram.scale(), negativeBuckets, positiveBuckets); CompressedExponentialHistogram result = new CompressedExponentialHistogram(); BytesRef data = histoBytes.bytes().toBytesRef(); - result.reset(histogram.zeroBucket().zeroThreshold(), totalCount, data); + result.reset(histogram.zeroBucket().zeroThreshold(), totalCount, histogram.sum(), data); return result; } catch (IOException e) { throw new RuntimeException(e); From 91067c7bf1755f7973823aca49e8fe38723119e6 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Fri, 22 Aug 2025 15:59:49 +0200 Subject: [PATCH 06/10] Avoid NaN in sum computation --- .../ExponentialHistogramUtils.java | 44 +++++++++++++------ .../ExponentialHistogramUtilsTests.java | 28 ++++++++++++ .../ExponentialHistogramFieldMapperTests.java | 4 +- 3 files changed, 60 insertions(+), 16 deletions(-) diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java index bebc1975bd359..fee286d09e263 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java @@ -25,28 +25,46 @@ public class ExponentialHistogramUtils { /** * Estimates the sum of all values of a histogram just based on the populated buckets. + * Will never return NaN, but might return +/-Infinity if the histogram is too big. * * @param negativeBuckets the negative buckets of the histogram * @param positiveBuckets the positive buckets of the histogram - * @return the estimated sum of all values in the histogram, guaranteed to be zero if there are no buckets + * @return the estimated sum of all values in the histogram, guaranteed to be zero if there are no buckets. */ public static double estimateSum(BucketIterator negativeBuckets, BucketIterator positiveBuckets) { + assert negativeBuckets.scale() == positiveBuckets.scale(); double sum = 0.0; - while (negativeBuckets.hasNext()) { - double bucketMidPoint = ExponentialScaleUtils.getPointOfLeastRelativeError( - negativeBuckets.peekIndex(), - negativeBuckets.scale() - ); - sum += -bucketMidPoint * negativeBuckets.peekCount(); - negativeBuckets.advance(); - } - while (positiveBuckets.hasNext()) { + while (negativeBuckets.hasNext() || positiveBuckets.hasNext()) { + long negativeIndex = negativeBuckets.hasNext() ? negativeBuckets.peekIndex() : Long.MAX_VALUE; + long positiveIndex = positiveBuckets.hasNext() ? positiveBuckets.peekIndex() : Long.MAX_VALUE; + double bucketMidPoint = ExponentialScaleUtils.getPointOfLeastRelativeError( - positiveBuckets.peekIndex(), + Math.min(negativeIndex, positiveIndex), positiveBuckets.scale() ); - sum += bucketMidPoint * positiveBuckets.peekCount(); - positiveBuckets.advance(); + + long countWithSign; + if (negativeIndex == positiveIndex) { + countWithSign = positiveBuckets.peekCount() - negativeBuckets.peekCount(); + positiveBuckets.advance(); + negativeBuckets.advance(); + } else if (negativeIndex < positiveIndex){ + countWithSign = -negativeBuckets.peekCount(); + negativeBuckets.advance(); + } else { // positiveIndex > negativeIndex + countWithSign = positiveBuckets.peekCount(); + positiveBuckets.advance(); + } + if (countWithSign != 0) { + double toAdd = bucketMidPoint * countWithSign; + if (Double.isFinite(toAdd)) { + sum += toAdd; + } else { + // Avoid NaN in case we end up with e.g. -Infinity+Infinity + // we consider the bucket with the bigger index the winner for the sign + sum = toAdd; + } + } } return sum; } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java index 1b8e014258951..09517a9d473dd 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java @@ -22,6 +22,7 @@ package org.elasticsearch.exponentialhistogram; import static org.hamcrest.Matchers.closeTo; +import static org.hamcrest.Matchers.equalTo; public class ExponentialHistogramUtilsTests extends ExponentialHistogramTestCase { @@ -55,4 +56,31 @@ public void testRandomDataSumEstimation() { assertThat(estimatedAverage, closeTo(correctAverage, allowedError)); } } + + public void testInfinityHandling() { + FixedCapacityExponentialHistogram morePositiveValues = createAutoReleasedHistogram(100); + morePositiveValues.resetBuckets(0); + morePositiveValues.tryAddBucket(1999, 1,false); + morePositiveValues.tryAddBucket(2000, 2, false); + morePositiveValues.tryAddBucket(1999, 2,true); + morePositiveValues.tryAddBucket(2000, 2,true); + + double sum = ExponentialHistogramUtils.estimateSum( + morePositiveValues.negativeBuckets().iterator(), + morePositiveValues.positiveBuckets().iterator() + ); + assertThat(sum, equalTo(Double.POSITIVE_INFINITY)); + FixedCapacityExponentialHistogram moreNegativeValues = createAutoReleasedHistogram(100); + moreNegativeValues.resetBuckets(0); + moreNegativeValues.tryAddBucket(1999, 2,false); + moreNegativeValues.tryAddBucket(2000, 2, false); + moreNegativeValues.tryAddBucket(1999, 1,true); + moreNegativeValues.tryAddBucket(2000, 2,true); + + sum = ExponentialHistogramUtils.estimateSum( + moreNegativeValues.negativeBuckets().iterator(), + moreNegativeValues.positiveBuckets().iterator() + ); + assertThat(sum, equalTo(Double.NEGATIVE_INFINITY)); + } } diff --git a/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java b/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java index 2c24965c238da..583e420358c22 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java @@ -129,9 +129,7 @@ private static Map createRandomHistogramValue(int maxBucketCount Map.of("indices", negativeIndices, "counts", negativeCounts) ) ); - if ((positiveIndices.isEmpty() == false || negativeIndices.isEmpty() == false)) { - // we always add the sum field to avoid numeric problems with the estimation due to random buckets - // sum generation is tested in the yaml tests + if (randomBoolean() && (positiveIndices.isEmpty() == false || negativeIndices.isEmpty() == false)) { result.put("sum", randomDoubleBetween(-1000, 1000, true)); } return result; From 3d1294f275966d54493e57115dfa5efad037af70 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 22 Aug 2025 14:14:36 +0000 Subject: [PATCH 07/10] [CI] Auto commit changes from spotless --- .../ExponentialHistogramUtils.java | 4 ++-- .../ExponentialHistogramUtilsTests.java | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java index fee286d09e263..74be159eb8b6a 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java @@ -48,10 +48,10 @@ public static double estimateSum(BucketIterator negativeBuckets, BucketIterator countWithSign = positiveBuckets.peekCount() - negativeBuckets.peekCount(); positiveBuckets.advance(); negativeBuckets.advance(); - } else if (negativeIndex < positiveIndex){ + } else if (negativeIndex < positiveIndex) { countWithSign = -negativeBuckets.peekCount(); negativeBuckets.advance(); - } else { // positiveIndex > negativeIndex + } else { // positiveIndex > negativeIndex countWithSign = positiveBuckets.peekCount(); positiveBuckets.advance(); } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java index 09517a9d473dd..96d9d40a37208 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java @@ -60,10 +60,10 @@ public void testRandomDataSumEstimation() { public void testInfinityHandling() { FixedCapacityExponentialHistogram morePositiveValues = createAutoReleasedHistogram(100); morePositiveValues.resetBuckets(0); - morePositiveValues.tryAddBucket(1999, 1,false); + morePositiveValues.tryAddBucket(1999, 1, false); morePositiveValues.tryAddBucket(2000, 2, false); - morePositiveValues.tryAddBucket(1999, 2,true); - morePositiveValues.tryAddBucket(2000, 2,true); + morePositiveValues.tryAddBucket(1999, 2, true); + morePositiveValues.tryAddBucket(2000, 2, true); double sum = ExponentialHistogramUtils.estimateSum( morePositiveValues.negativeBuckets().iterator(), @@ -72,10 +72,10 @@ public void testInfinityHandling() { assertThat(sum, equalTo(Double.POSITIVE_INFINITY)); FixedCapacityExponentialHistogram moreNegativeValues = createAutoReleasedHistogram(100); moreNegativeValues.resetBuckets(0); - moreNegativeValues.tryAddBucket(1999, 2,false); + moreNegativeValues.tryAddBucket(1999, 2, false); moreNegativeValues.tryAddBucket(2000, 2, false); - moreNegativeValues.tryAddBucket(1999, 1,true); - moreNegativeValues.tryAddBucket(2000, 2,true); + moreNegativeValues.tryAddBucket(1999, 1, true); + moreNegativeValues.tryAddBucket(2000, 2, true); sum = ExponentialHistogramUtils.estimateSum( moreNegativeValues.negativeBuckets().iterator(), From f92ec9e06b5fe83de68ed10be089db4133a4eabf Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Mon, 25 Aug 2025 09:56:10 +0200 Subject: [PATCH 08/10] Refactor sum computation to reuse MergingBucketIterator --- .../ExponentialHistogramUtils.java | 30 +++++-------------- .../MergingBucketIterator.java | 23 ++++++++++++-- 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java index 74be159eb8b6a..933dd56b6bb84 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java @@ -33,29 +33,14 @@ public class ExponentialHistogramUtils { */ public static double estimateSum(BucketIterator negativeBuckets, BucketIterator positiveBuckets) { assert negativeBuckets.scale() == positiveBuckets.scale(); - double sum = 0.0; - while (negativeBuckets.hasNext() || positiveBuckets.hasNext()) { - long negativeIndex = negativeBuckets.hasNext() ? negativeBuckets.peekIndex() : Long.MAX_VALUE; - long positiveIndex = positiveBuckets.hasNext() ? positiveBuckets.peekIndex() : Long.MAX_VALUE; - - double bucketMidPoint = ExponentialScaleUtils.getPointOfLeastRelativeError( - Math.min(negativeIndex, positiveIndex), - positiveBuckets.scale() - ); - long countWithSign; - if (negativeIndex == positiveIndex) { - countWithSign = positiveBuckets.peekCount() - negativeBuckets.peekCount(); - positiveBuckets.advance(); - negativeBuckets.advance(); - } else if (negativeIndex < positiveIndex) { - countWithSign = -negativeBuckets.peekCount(); - negativeBuckets.advance(); - } else { // positiveIndex > negativeIndex - countWithSign = positiveBuckets.peekCount(); - positiveBuckets.advance(); - } - if (countWithSign != 0) { + // for each bucket index, sum up the counts, but account for the positive/negative + BucketIterator it = new MergingBucketIterator(negativeBuckets, -1, positiveBuckets, 1, positiveBuckets.scale()); + double sum = 0.0; + while (it.hasNext()) { + long countWithSign = it.peekCount(); + double bucketMidPoint = ExponentialScaleUtils.getPointOfLeastRelativeError(it.peekIndex(), it.scale()); + if (countWithSign != 0) { // avoid 0 * INFINITY = NaN double toAdd = bucketMidPoint * countWithSign; if (Double.isFinite(toAdd)) { sum += toAdd; @@ -65,6 +50,7 @@ public static double estimateSum(BucketIterator negativeBuckets, BucketIterator sum = toAdd; } } + it.advance(); } return sum; } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java index ca13901cd95e9..2d5d2c507d381 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java @@ -33,6 +33,9 @@ final class MergingBucketIterator implements BucketIterator { private long currentIndex; private long currentCount; + private final long factorA; + private final long factorB; + /** * Creates a new merging iterator. * @@ -41,8 +44,24 @@ final class MergingBucketIterator implements BucketIterator { * @param targetScale the histogram scale to which both iterators should be aligned */ MergingBucketIterator(BucketIterator itA, BucketIterator itB, int targetScale) { + this(itA, 1, itB, 1, targetScale); + } + + /** + * Creates a new merging iterator, multiplying counts from each iterator with the provided factors. + * Note that the factors can be negative, in which case {@link #peekCount()} can return zero or negative values. + * + * @param itA the first iterator to merge + * @param factorA a factor to multiply counts from the first iterator with + * @param itB the second iterator to merge + * @param factorB a factor to multiply counts from the second iterator with + * @param targetScale the histogram scale to which both iterators should be aligned + */ + MergingBucketIterator(BucketIterator itA, long factorA, BucketIterator itB, long factorB, int targetScale) { this.itA = new ScaleAdjustingBucketIterator(itA, targetScale); this.itB = new ScaleAdjustingBucketIterator(itB, targetScale); + this.factorA = factorA; + this.factorB = factorB; endReached = false; advance(); } @@ -69,12 +88,12 @@ public void advance() { boolean advanceB = hasNextB && (hasNextA == false || idxB <= idxA); if (advanceA) { currentIndex = idxA; - currentCount += itA.peekCount(); + currentCount += itA.peekCount() * factorA; itA.advance(); } if (advanceB) { currentIndex = idxB; - currentCount += itB.peekCount(); + currentCount += itB.peekCount() * factorB; itB.advance(); } } From 7899f0c5be33fbcc0014dba5daf92269595ff683 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Mon, 25 Aug 2025 12:06:01 +0200 Subject: [PATCH 09/10] wording fix --- .../exponentialhistogram/ExponentialHistogramUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java index 933dd56b6bb84..be5161c600a7b 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java @@ -34,7 +34,7 @@ public class ExponentialHistogramUtils { public static double estimateSum(BucketIterator negativeBuckets, BucketIterator positiveBuckets) { assert negativeBuckets.scale() == positiveBuckets.scale(); - // for each bucket index, sum up the counts, but account for the positive/negative + // for each bucket index, sum up the counts, but account for the positive/negative sign BucketIterator it = new MergingBucketIterator(negativeBuckets, -1, positiveBuckets, 1, positiveBuckets.scale()); double sum = 0.0; while (it.hasNext()) { From 5f94454eba97597e5eaf7fc201fa4b3ce8eea2ad Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Mon, 25 Aug 2025 14:02:58 +0200 Subject: [PATCH 10/10] Replace factors with custom merge operator --- .../ExponentialHistogramUtils.java | 7 ++++- .../MergingBucketIterator.java | 28 ++++++++++--------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java index be5161c600a7b..5359b3c41da15 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java @@ -35,7 +35,12 @@ public static double estimateSum(BucketIterator negativeBuckets, BucketIterator assert negativeBuckets.scale() == positiveBuckets.scale(); // for each bucket index, sum up the counts, but account for the positive/negative sign - BucketIterator it = new MergingBucketIterator(negativeBuckets, -1, positiveBuckets, 1, positiveBuckets.scale()); + BucketIterator it = new MergingBucketIterator( + positiveBuckets, + negativeBuckets, + positiveBuckets.scale(), + (positiveCount, negativeCount) -> positiveCount - negativeCount + ); double sum = 0.0; while (it.hasNext()) { long countWithSign = it.peekCount(); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java index 2d5d2c507d381..6ba293d2b11cb 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java @@ -21,6 +21,8 @@ package org.elasticsearch.exponentialhistogram; +import java.util.function.LongBinaryOperator; + /** * An iterator that merges two bucket iterators, aligning them to a common scale and combining buckets with the same index. */ @@ -33,8 +35,7 @@ final class MergingBucketIterator implements BucketIterator { private long currentIndex; private long currentCount; - private final long factorA; - private final long factorB; + private final LongBinaryOperator countMergeOperator; /** * Creates a new merging iterator. @@ -44,24 +45,22 @@ final class MergingBucketIterator implements BucketIterator { * @param targetScale the histogram scale to which both iterators should be aligned */ MergingBucketIterator(BucketIterator itA, BucketIterator itB, int targetScale) { - this(itA, 1, itB, 1, targetScale); + this(itA, itB, targetScale, Long::sum); } /** - * Creates a new merging iterator, multiplying counts from each iterator with the provided factors. - * Note that the factors can be negative, in which case {@link #peekCount()} can return zero or negative values. + * Creates a new merging iterator, using the provided operator to merge the counts. + * Note that the resulting count can be negative if the operator produces negative results. * * @param itA the first iterator to merge - * @param factorA a factor to multiply counts from the first iterator with * @param itB the second iterator to merge - * @param factorB a factor to multiply counts from the second iterator with + * @param countMergeOperator the operator to use to merge counts of buckets with the same index * @param targetScale the histogram scale to which both iterators should be aligned */ - MergingBucketIterator(BucketIterator itA, long factorA, BucketIterator itB, long factorB, int targetScale) { + MergingBucketIterator(BucketIterator itA, BucketIterator itB, int targetScale, LongBinaryOperator countMergeOperator) { this.itA = new ScaleAdjustingBucketIterator(itA, targetScale); this.itB = new ScaleAdjustingBucketIterator(itB, targetScale); - this.factorA = factorA; - this.factorB = factorB; + this.countMergeOperator = countMergeOperator; endReached = false; advance(); } @@ -83,19 +82,21 @@ public void advance() { idxB = itB.peekIndex(); } - currentCount = 0; boolean advanceA = hasNextA && (hasNextB == false || idxA <= idxB); boolean advanceB = hasNextB && (hasNextA == false || idxB <= idxA); + long countA = 0; + long countB = 0; if (advanceA) { currentIndex = idxA; - currentCount += itA.peekCount() * factorA; + countA = itA.peekCount(); itA.advance(); } if (advanceB) { currentIndex = idxB; - currentCount += itB.peekCount() * factorB; + countB = itB.peekCount(); itB.advance(); } + currentCount = countMergeOperator.applyAsLong(countA, countB); } @Override @@ -125,4 +126,5 @@ private void assertEndNotReached() { throw new IllegalStateException("Iterator has no more buckets"); } } + }