histograms) {
+ ExponentialHistogramMerger merger = new ExponentialHistogramMerger(maxBucketCount);
+ while (histograms.hasNext()) {
+ merger.add(histograms.next());
+ }
+ return merger.get();
+ }
+
+ /**
+ * Merges the provided exponential histograms to a new, single histogram with at most the given amount of buckets.
+ *
+ * @param maxBucketCount the maximum number of buckets the result histogram is allowed to have
+ * @param histograms teh histograms to merge
+ * @return the merged histogram
+ */
+ static ExponentialHistogram merge(int maxBucketCount, ExponentialHistogram... histograms) {
+ return merge(maxBucketCount, List.of(histograms).iterator());
+ }
+
+}
diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java
new file mode 100644
index 0000000000000..13f2fa4215a36
--- /dev/null
+++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java
@@ -0,0 +1,138 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import java.util.Arrays;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex;
+
+/**
+ * Only intended for use in tests currently.
+ * A class for accumulating raw values into an {@link ExponentialHistogram} with a given maximum number of buckets.
+ *
+ * If the number of values is less than or equal to the bucket capacity, the resulting histogram is guaranteed
+ * to represent the exact raw values with a relative error less than {@code 2^(2^-MAX_SCALE) - 1}.
+ */
+public class ExponentialHistogramGenerator {
+
+ // Merging individual values into a histogram would be way too slow with our sparse, array-backed histogram representation.
+ // Therefore, for a bucket capacity of c, we first buffer c raw values to be inserted.
+ // We then turn those into an "exact" histogram, which in turn we merge with our actual result accumulator.
+ // This yields an amortized runtime of O(log(c)).
+ private final double[] rawValueBuffer;
+ int valueCount;
+
+ private final ExponentialHistogramMerger resultMerger;
+ private final FixedCapacityExponentialHistogram valueBuffer;
+
+ private boolean isFinished = false;
+
+ /**
+ * Creates a new instance with the specified maximum number of buckets.
+ *
+ * @param maxBucketCount the maximum number of buckets for the generated histogram
+ */
+ public ExponentialHistogramGenerator(int maxBucketCount) {
+ rawValueBuffer = new double[maxBucketCount];
+ valueCount = 0;
+ valueBuffer = new FixedCapacityExponentialHistogram(maxBucketCount);
+ resultMerger = new ExponentialHistogramMerger(maxBucketCount);
+ }
+
+ /**
+ * Adds the given value to the histogram.
+ * Must not be called after {@link #get()} has been called.
+ *
+ * @param value the value to add
+ */
+ public void add(double value) {
+ if (isFinished) {
+ throw new IllegalStateException("get() has already been called");
+ }
+ if (valueCount == rawValueBuffer.length) {
+ mergeValuesToHistogram();
+ }
+ rawValueBuffer[valueCount] = value;
+ valueCount++;
+ }
+
+ /**
+ * Returns the histogram representing the distribution of all accumulated values.
+ *
+ * @return the histogram representing the distribution of all accumulated values
+ */
+ public ExponentialHistogram get() {
+ isFinished = true;
+ mergeValuesToHistogram();
+ return resultMerger.get();
+ }
+
+ private void mergeValuesToHistogram() {
+ if (valueCount == 0) {
+ return;
+ }
+ Arrays.sort(rawValueBuffer, 0, valueCount);
+ int negativeValuesCount = 0;
+ while (negativeValuesCount < valueCount && rawValueBuffer[negativeValuesCount] < 0) {
+ negativeValuesCount++;
+ }
+
+ valueBuffer.reset();
+ int scale = valueBuffer.scale();
+
+ // Buckets must be provided with their indices in ascending order.
+ // For the negative range, higher bucket indices correspond to bucket boundaries closer to -INF
+ // and smaller bucket indices correspond to bucket boundaries closer to zero.
+ // therefore we have to iterate the negative values in the sorted rawValueBuffer reverse order,
+ // from the value closest to -INF to the value closest to zero.
+ // not that i here is the index of the value in the rawValueBuffer array
+ // and is unrelated to the histogram bucket index for the value.
+ for (int i = negativeValuesCount - 1; i >= 0; i--) {
+ long count = 1;
+ long index = computeIndex(rawValueBuffer[i], scale);
+ while ((i - 1) >= 0 && computeIndex(rawValueBuffer[i - 1], scale) == index) {
+ i--;
+ count++;
+ }
+ valueBuffer.tryAddBucket(index, count, false);
+ }
+
+ int zeroCount = 0;
+ while ((negativeValuesCount + zeroCount) < valueCount && rawValueBuffer[negativeValuesCount + zeroCount] == 0) {
+ zeroCount++;
+ }
+ valueBuffer.setZeroBucket(ZeroBucket.minimalWithCount(zeroCount));
+ for (int i = negativeValuesCount + zeroCount; i < valueCount; i++) {
+ long count = 1;
+ long index = computeIndex(rawValueBuffer[i], scale);
+ while ((i + 1) < valueCount && computeIndex(rawValueBuffer[i + 1], scale) == index) {
+ i++;
+ count++;
+ }
+ valueBuffer.tryAddBucket(index, count, true);
+ }
+
+ resultMerger.add(valueBuffer);
+ valueCount = 0;
+ }
+
+}
diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java
new file mode 100644
index 0000000000000..b00ad053837d9
--- /dev/null
+++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java
@@ -0,0 +1,186 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import java.util.OptionalLong;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getMaximumScaleIncrease;
+
+/**
+ * Allows accumulating multiple {@link ExponentialHistogram} into a single one
+ * while keeping the bucket count in the result below a given limit.
+ */
+public class ExponentialHistogramMerger {
+
+ // Our algorithm is not in-place, therefore we use two histograms and ping-pong between them
+ private FixedCapacityExponentialHistogram result;
+ private FixedCapacityExponentialHistogram buffer;
+
+ private final DownscaleStats downscaleStats;
+
+ private boolean isFinished;
+
+ /**
+ * Creates a new instance with the specified bucket limit.
+ *
+ * @param bucketLimit the maximum number of buckets the result histogram is allowed to have
+ */
+ public ExponentialHistogramMerger(int bucketLimit) {
+ downscaleStats = new DownscaleStats();
+ result = new FixedCapacityExponentialHistogram(bucketLimit);
+ buffer = new FixedCapacityExponentialHistogram(bucketLimit);
+ }
+
+ // Only intended for testing, using this in production means an unnecessary reduction of precision
+ private ExponentialHistogramMerger(int bucketLimit, int minScale) {
+ this(bucketLimit);
+ result.resetBuckets(minScale);
+ buffer.resetBuckets(minScale);
+ }
+
+ static ExponentialHistogramMerger createForTesting(int bucketLimit, int minScale) {
+ return new ExponentialHistogramMerger(bucketLimit, minScale);
+ }
+
+ /**
+ * Merges the given histogram into the current result.
+ * Must not be called after {@link #get()} has been called.
+ *
+ * @param toAdd the histogram to merge
+ */
+ public void add(ExponentialHistogram toAdd) {
+ if (isFinished) {
+ throw new IllegalStateException("get() has already been called");
+ }
+ doMerge(toAdd);
+ }
+
+ /**
+ * Returns the merged histogram.
+ *
+ * @return the merged histogram
+ */
+ public ExponentialHistogram get() {
+ isFinished = true;
+ return result;
+ }
+
+ // TODO(b/128622): this algorithm is very efficient if b has roughly as many buckets as a
+ // However, if b is much smaller we still have to iterate over all buckets of a which is very wasteful.
+ // This can be optimized by buffering multiple histograms to accumulate first,
+ // then in O(log(n)) turn them into a single, merged histogram.
+ // (n is the number of buffered buckets)
+
+ private void doMerge(ExponentialHistogram b) {
+
+ ExponentialHistogram a = result;
+
+ CopyableBucketIterator posBucketsA = a.positiveBuckets().iterator();
+ CopyableBucketIterator negBucketsA = a.negativeBuckets().iterator();
+ CopyableBucketIterator posBucketsB = b.positiveBuckets().iterator();
+ CopyableBucketIterator negBucketsB = b.negativeBuckets().iterator();
+
+ ZeroBucket zeroBucket = a.zeroBucket().merge(b.zeroBucket());
+ zeroBucket = zeroBucket.collapseOverlappingBucketsForAll(posBucketsA, negBucketsA, posBucketsB, negBucketsB);
+
+ buffer.setZeroBucket(zeroBucket);
+
+ // We attempt to bring everything to the scale of A.
+ // This might involve increasing the scale for B, which would increase its indices.
+ // We need to ensure that we do not exceed MAX_INDEX / MIN_INDEX in this case.
+ int targetScale = a.scale();
+ if (targetScale > b.scale()) {
+ if (negBucketsB.hasNext()) {
+ long smallestIndex = negBucketsB.peekIndex();
+ OptionalLong maximumIndex = b.negativeBuckets().maxBucketIndex();
+ assert maximumIndex.isPresent()
+ : "We checked that the negative bucket range is not empty, therefore the maximum index should be present";
+ int maxScaleIncrease = Math.min(getMaximumScaleIncrease(smallestIndex), getMaximumScaleIncrease(maximumIndex.getAsLong()));
+ targetScale = Math.min(targetScale, b.scale() + maxScaleIncrease);
+ }
+ if (posBucketsB.hasNext()) {
+ long smallestIndex = posBucketsB.peekIndex();
+ OptionalLong maximumIndex = b.positiveBuckets().maxBucketIndex();
+ assert maximumIndex.isPresent()
+ : "We checked that the positive bucket range is not empty, therefore the maximum index should be present";
+ int maxScaleIncrease = Math.min(getMaximumScaleIncrease(smallestIndex), getMaximumScaleIncrease(maximumIndex.getAsLong()));
+ targetScale = Math.min(targetScale, b.scale() + maxScaleIncrease);
+ }
+ }
+
+ // Now we are sure that everything fits numerically into targetScale.
+ // However, we might exceed our limit for the total number of buckets.
+ // Therefore, we try the merge optimistically. If we fail, we reduce the target scale to make everything fit.
+
+ MergingBucketIterator positiveMerged = new MergingBucketIterator(posBucketsA.copy(), posBucketsB.copy(), targetScale);
+ MergingBucketIterator negativeMerged = new MergingBucketIterator(negBucketsA.copy(), negBucketsB.copy(), targetScale);
+
+ buffer.resetBuckets(targetScale);
+ downscaleStats.reset();
+ int overflowCount = putBuckets(buffer, negativeMerged, false, downscaleStats);
+ overflowCount += putBuckets(buffer, positiveMerged, true, downscaleStats);
+
+ if (overflowCount > 0) {
+ // UDD-sketch approach: decrease the scale and retry.
+ int reduction = downscaleStats.getRequiredScaleReductionToReduceBucketCountBy(overflowCount);
+ targetScale -= reduction;
+ buffer.resetBuckets(targetScale);
+ positiveMerged = new MergingBucketIterator(posBucketsA, posBucketsB, targetScale);
+ negativeMerged = new MergingBucketIterator(negBucketsA, negBucketsB, targetScale);
+ overflowCount = putBuckets(buffer, negativeMerged, false, null);
+ overflowCount += putBuckets(buffer, positiveMerged, true, null);
+
+ assert overflowCount == 0 : "Should never happen, the histogram should have had enough space";
+ }
+ FixedCapacityExponentialHistogram temp = result;
+ result = buffer;
+ buffer = temp;
+ }
+
+ private static int putBuckets(
+ FixedCapacityExponentialHistogram output,
+ BucketIterator buckets,
+ boolean isPositive,
+ DownscaleStats downscaleStats
+ ) {
+ boolean collectDownScaleStatsOnNext = false;
+ long prevIndex = 0;
+ int overflowCount = 0;
+ while (buckets.hasNext()) {
+ long idx = buckets.peekIndex();
+ if (collectDownScaleStatsOnNext) {
+ downscaleStats.add(prevIndex, idx);
+ } else {
+ collectDownScaleStatsOnNext = downscaleStats != null;
+ }
+
+ if (output.tryAddBucket(idx, buckets.peekCount(), isPositive) == false) {
+ overflowCount++;
+ }
+
+ prevIndex = idx;
+ buckets.advance();
+ }
+ return overflowCount;
+ }
+
+}
diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java
new file mode 100644
index 0000000000000..218873982b1b3
--- /dev/null
+++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java
@@ -0,0 +1,165 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import java.util.OptionalLong;
+
+/**
+ * Provides quantile estimation for {@link ExponentialHistogram} instances.
+ */
+public class ExponentialHistogramQuantile {
+
+ /**
+ * Estimates a quantile for the distribution represented by the given histogram.
+ *
+ * It returns the value of the element at rank {@code max(0, min(n - 1, (quantile * (n + 1)) - 1))}, where n is the total number of
+ * values and rank starts at 0. If the rank is fractional, the result is linearly interpolated from the values of the two
+ * neighboring ranks.
+ *
+ * @param histo the histogram representing the distribution
+ * @param quantile the quantile to query, in the range [0, 1]
+ * @return the estimated quantile value, or {@link Double#NaN} if the histogram is empty
+ */
+ public static double getQuantile(ExponentialHistogram histo, double quantile) {
+ if (quantile < 0 || quantile > 1) {
+ throw new IllegalArgumentException("quantile must be in range [0, 1]");
+ }
+
+ long zeroCount = histo.zeroBucket().count();
+ long negCount = histo.negativeBuckets().valueCount();
+ long posCount = histo.positiveBuckets().valueCount();
+
+ long totalCount = zeroCount + negCount + posCount;
+ if (totalCount == 0) {
+ // Can't compute quantile on an empty histogram
+ return Double.NaN;
+ }
+
+ double exactRank = quantile * (totalCount - 1);
+ long lowerRank = (long) Math.floor(exactRank);
+ long upperRank = (long) Math.ceil(exactRank);
+ double upperFactor = exactRank - lowerRank;
+
+ ValueAndPreviousValue values = getElementAtRank(histo, upperRank);
+
+ double result;
+ if (lowerRank == upperRank) {
+ result = values.valueAtRank();
+ } else {
+ result = values.valueAtPreviousRank() * (1 - upperFactor) + values.valueAtRank() * upperFactor;
+ }
+ return removeNegativeZero(result);
+ }
+
+ private static double removeNegativeZero(double result) {
+ return result == 0.0 ? 0.0 : result;
+ }
+
+ private static ValueAndPreviousValue getElementAtRank(ExponentialHistogram histo, long rank) {
+ long negativeValuesCount = histo.negativeBuckets().valueCount();
+ long zeroCount = histo.zeroBucket().count();
+ if (rank < negativeValuesCount) {
+ if (rank == 0) {
+ return new ValueAndPreviousValue(Double.NaN, -getLastBucketMidpoint(histo.negativeBuckets()));
+ } else {
+ return getBucketMidpointForRank(histo.negativeBuckets().iterator(), negativeValuesCount - rank).negateAndSwap();
+ }
+ } else if (rank < (negativeValuesCount + zeroCount)) {
+ if (rank == negativeValuesCount) {
+ // the element at the previous rank falls into the negative bucket range
+ return new ValueAndPreviousValue(-getFirstBucketMidpoint(histo.negativeBuckets()), 0.0);
+ } else {
+ return new ValueAndPreviousValue(0.0, 0.0);
+ }
+ } else {
+ ValueAndPreviousValue result = getBucketMidpointForRank(
+ histo.positiveBuckets().iterator(),
+ rank - negativeValuesCount - zeroCount
+ );
+ if ((rank - 1) < negativeValuesCount) {
+ // previous value falls into the negative bucket range or has rank -1 and therefore doesn't exist
+ return new ValueAndPreviousValue(-getFirstBucketMidpoint(histo.negativeBuckets()), result.valueAtRank);
+ } else if ((rank - 1) < (negativeValuesCount + zeroCount)) {
+ // previous value falls into the zero bucket
+ return new ValueAndPreviousValue(0.0, result.valueAtRank);
+ } else {
+ return result;
+ }
+ }
+ }
+
+ private static double getFirstBucketMidpoint(ExponentialHistogram.Buckets buckets) {
+ CopyableBucketIterator iterator = buckets.iterator();
+ if (iterator.hasNext()) {
+ return ExponentialScaleUtils.getPointOfLeastRelativeError(iterator.peekIndex(), iterator.scale());
+ } else {
+ return Double.NaN;
+ }
+ }
+
+ private static double getLastBucketMidpoint(ExponentialHistogram.Buckets buckets) {
+ OptionalLong highestIndex = buckets.maxBucketIndex();
+ if (highestIndex.isPresent()) {
+ return ExponentialScaleUtils.getPointOfLeastRelativeError(highestIndex.getAsLong(), buckets.iterator().scale());
+ } else {
+ return Double.NaN;
+ }
+ }
+
+ private static ValueAndPreviousValue getBucketMidpointForRank(BucketIterator buckets, long rank) {
+ long prevIndex = Long.MIN_VALUE;
+ long seenCount = 0;
+ while (buckets.hasNext()) {
+ seenCount += buckets.peekCount();
+ if (rank < seenCount) {
+ double center = ExponentialScaleUtils.getPointOfLeastRelativeError(buckets.peekIndex(), buckets.scale());
+ double prevCenter;
+ if (rank > 0) {
+ if ((rank - 1) >= (seenCount - buckets.peekCount())) {
+ // element at previous rank is in same bucket
+ prevCenter = center;
+ } else {
+ // element at previous rank is in the previous bucket
+ prevCenter = ExponentialScaleUtils.getPointOfLeastRelativeError(prevIndex, buckets.scale());
+ }
+ } else {
+ // there is no previous element
+ prevCenter = Double.NaN;
+ }
+ return new ValueAndPreviousValue(prevCenter, center);
+ }
+ prevIndex = buckets.peekIndex();
+ buckets.advance();
+ }
+ throw new IllegalStateException("The total number of elements in the buckets is less than the desired rank.");
+ }
+
+ /**
+ * @param valueAtPreviousRank the value at the rank before the desired rank, NaN if not applicable.
+ * @param valueAtRank the value at the desired rank
+ */
+ private record ValueAndPreviousValue(double valueAtPreviousRank, double valueAtRank) {
+ ValueAndPreviousValue negateAndSwap() {
+ return new ValueAndPreviousValue(-valueAtRank, -valueAtPreviousRank);
+ }
+ }
+}
diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java
new file mode 100644
index 0000000000000..a9a93cf023369
--- /dev/null
+++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java
@@ -0,0 +1,267 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX_BITS;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_SCALE;
+
+/**
+ * A collection of utility methods for working with indices and scales of exponential bucket histograms.
+ */
+public class ExponentialScaleUtils {
+
+ private static final double LN_2 = Math.log(2);
+
+ /**
+ * This table is visible for testing to ensure it is up-to-date.
+ *
+ * For each scale from {@link ExponentialHistogram#MIN_SCALE} to {@link ExponentialHistogram#MAX_SCALE},
+ * the table contains a pre-computed constant for up-scaling bucket indices.
+ * The constant is computed using the following formula:
+ * {@code 2^63 * (1 + 2^scale * (1 - log2(1 + 2^(2^-scale))))}
+ */
+ static final long[] SCALE_UP_CONSTANT_TABLE = new long[] {
+ 4503599627370495L,
+ 9007199254740991L,
+ 18014398509481983L,
+ 36028797018963967L,
+ 72057594037927935L,
+ 144115188075855871L,
+ 288230376054894118L,
+ 576448062320457790L,
+ 1146436840887505800L,
+ 2104167428150631728L,
+ 3127054724296373505L,
+ 3828045265094622256L,
+ 4214097751025163417L,
+ 4412149414858430624L,
+ 4511824212543271281L,
+ 4561743405547877994L,
+ 4586713247558758689L,
+ 4599199449917992829L,
+ 4605442711287634239L,
+ 4608564361996858084L,
+ 4610125189854540715L,
+ 4610905604096266504L,
+ 4611295811256239977L,
+ 4611490914841115537L,
+ 4611588466634164420L,
+ 4611637242530765249L,
+ 4611661630479075212L,
+ 4611673824453231387L,
+ 4611679921440309624L,
+ 4611682969933848761L,
+ 4611684494180618332L,
+ 4611685256304003118L,
+ 4611685637365695511L,
+ 4611685827896541707L,
+ 4611685923161964805L,
+ 4611685970794676354L,
+ 4611685994611032129L,
+ 4611686006519210016L,
+ 4611686012473298960L,
+ 4611686015450343432L,
+ 4611686016938865668L,
+ 4611686017683126786L,
+ 4611686018055257345L,
+ 4611686018241322624L,
+ 4611686018334355264L,
+ 4611686018380871584L,
+ 4611686018404129744L,
+ 4611686018415758824L,
+ 4611686018421573364L,
+ 4611686018424480634L };
+
+ /**
+ * Computes the new index for a bucket when adjusting the scale of the histogram.
+ * This method supports both down-scaling (reducing the scale) and up-scaling.
+ * When up-scaling, it returns the bucket containing the point of least error of the original bucket.
+ *
+ * @param index the current bucket index to be adjusted
+ * @param currentScale the current scale
+ * @param scaleAdjustment the adjustment to make; the new scale will be {@code currentScale + scaleAdjustment}
+ * @return the index of the bucket in the new scale
+ */
+ static long adjustScale(long index, int currentScale, int scaleAdjustment) {
+ checkIndexAndScaleBounds(index, currentScale);
+
+ int newScale = currentScale + scaleAdjustment;
+ assert newScale >= MIN_SCALE && newScale <= MAX_SCALE
+ : "adjusted scale must be in the range [" + MIN_SCALE + ", " + MAX_SCALE + "]";
+
+ if (scaleAdjustment <= 0) {
+ return index >> -scaleAdjustment;
+ } else {
+ assert scaleAdjustment <= MAX_INDEX_BITS : "Scaling up more than " + MAX_INDEX_BITS + " does not make sense";
+ // When scaling up, we want to return the bucket containing the point of least relative error.
+ // This bucket index can be computed as (index << adjustment) + offset.
+ // The offset is a constant that depends only on the scale and adjustment, not the index.
+ // The mathematically correct formula for the offset is:
+ // 2^adjustment * (1 + 2^currentScale * (1 - log2(1 + 2^(2^-currentScale))))
+ // This is hard to compute with double-precision floating-point numbers due to rounding errors and is also expensive.
+ // Therefore, we precompute 2^63 * (1 + 2^currentScale * (1 - log2(1 + 2^(2^-currentScale)))) and store it
+ // in SCALE_UP_CONSTANT_TABLE for each scale.
+ // This can then be converted to the correct offset by dividing with (2^(63-adjustment)),
+ // which is equivalent to a right shift with (63-adjustment)
+ long offset = SCALE_UP_CONSTANT_TABLE[currentScale - MIN_SCALE] >> (63 - scaleAdjustment);
+ return (index << scaleAdjustment) + offset;
+ }
+ }
+
+ /**
+ * Compares the lower boundaries of two buckets, which may have different scales.
+ * This is equivalent to a mathematically correct comparison of the lower bucket boundaries.
+ * Note that this method allows for scales and indices of the full numeric range of the types.
+ *
+ * @param idxA the index of the first bucket
+ * @param scaleA the scale of the first bucket
+ * @param idxB the index of the second bucket
+ * @param scaleB the scale of the second bucket
+ * @return a negative integer, zero, or a positive integer as the first bucket's lower boundary is
+ * less than, equal to, or greater than the second bucket's lower boundary
+ */
+ public static int compareExponentiallyScaledValues(long idxA, int scaleA, long idxB, int scaleB) {
+ if (scaleA > scaleB) {
+ return -compareExponentiallyScaledValues(idxB, scaleB, idxA, scaleA);
+ }
+ // scaleA <= scaleB
+ int shifts = scaleB - scaleA;
+
+ long scaledDownB = idxB >> shifts;
+ int result = Long.compare(idxA, scaledDownB);
+ if (result == 0) {
+ // the scaled down values are equal
+ // this means that b is bigger if it has a "fractional" part, which corresponds to the bits that were removed on the right-shift
+ assert (1L << shifts) > 0;
+ long shiftedAway = idxB & ((1L << shifts) - 1);
+ if (shiftedAway > 0) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Returns the maximum permissible scale increase that does not cause the index to grow out
+ * of the [{@link ExponentialHistogram#MIN_INDEX}, {@link ExponentialHistogram#MIN_INDEX}] range.
+ *
+ * @param index the index to check
+ * @return the maximum permissible scale increase
+ */
+ public static int getMaximumScaleIncrease(long index) {
+ checkIndexBounds(index);
+ // Scale increase by one corresponds to a left shift, which in turn is the same as multiplying by two.
+ // Because we know that MIN_INDEX = -MAX_INDEX, we can just compute the maximum increase of the absolute index.
+ // This allows us to reason only about non-negative indices further below.
+ index = Math.abs(index);
+ // the maximum scale increase is defined by how many left-shifts we can do without growing beyond MAX_INDEX
+ // MAX_INDEX is defined as a number where the left MAX_INDEX_BITS are all ones.
+ // So in other words, we must ensure that the leftmost (64 - MAX_INDEX_BITS) remain zero,
+ // which is exactly what the formula below does.
+ return Long.numberOfLeadingZeros(index) - (64 - MAX_INDEX_BITS);
+ }
+
+ /**
+ * Returns the upper boundary of the bucket with the given index and scale.
+ *
+ * @param index the index of the bucket
+ * @param scale the scale of the bucket
+ * @return the upper boundary of the bucket
+ */
+ public static double getUpperBucketBoundary(long index, int scale) {
+ checkIndexAndScaleBounds(index, scale);
+ return exponentiallyScaledToDoubleValue(index + 1, scale);
+ }
+
+ /**
+ * Returns the lower boundary of the bucket with the given index and scale.
+ *
+ * @param index the index of the bucket in the [{@link ExponentialHistogram#MIN_INDEX}, {@link ExponentialHistogram#MAX_INDEX}] range.
+ * @param scale the scale of the bucket
+ * @return the lower boundary of the bucket
+ */
+ public static double getLowerBucketBoundary(long index, int scale) {
+ checkIndexAndScaleBounds(index, scale);
+ return exponentiallyScaledToDoubleValue(index, scale);
+ }
+
+ /**
+ * Computes (2^(2^-scale))^index,
+ * allowing also indices outside of the [{@link ExponentialHistogram#MIN_INDEX}, {@link ExponentialHistogram#MAX_INDEX}] range.
+ */
+ static double exponentiallyScaledToDoubleValue(long index, int scale) {
+ // Math.exp is expected to be faster and more accurate than Math.pow
+ // For that reason we use (2^(2^-scale))^index = 2^( (2^-scale) * index) = (e^ln(2))^( (2^-scale) * index)
+ // = e^( ln(2) * (2^-scale) * index)
+ double inverseFactor = Math.scalb(LN_2, -scale);
+ return Math.exp(inverseFactor * index);
+ }
+
+ /**
+ * For a bucket with the given index, computes the point {@code x} in the bucket such that
+ * {@code (x - l) / l} equals {@code (u - x) / u}, where {@code l} is the lower bucket boundary and {@code u}
+ * is the upper bucket boundary.
+ *
+ * In other words, we select the point in the bucket that has the least relative error with respect to any other point in the bucket.
+ *
+ * @param bucketIndex the index of the bucket
+ * @param scale the scale of the bucket
+ * @return the point of least relative error
+ */
+ public static double getPointOfLeastRelativeError(long bucketIndex, int scale) {
+ checkIndexAndScaleBounds(bucketIndex, scale);
+ double upperBound = getUpperBucketBoundary(bucketIndex, scale);
+ double histogramBase = Math.pow(2, Math.scalb(1, -scale));
+ return 2 / (histogramBase + 1) * upperBound;
+ }
+
+ /**
+ * Provides the index of the bucket of the exponential histogram with the given scale that contains the provided value.
+ *
+ * @param value the value to find the bucket for
+ * @param scale the scale of the histogram
+ * @return the index of the bucket
+ */
+ public static long computeIndex(double value, int scale) {
+ checkScaleBounds(scale);
+ return Base2ExponentialHistogramIndexer.computeIndex(value, scale);
+ }
+
+ private static void checkIndexAndScaleBounds(long index, int scale) {
+ checkIndexBounds(index);
+ checkScaleBounds(scale);
+ }
+
+ private static void checkScaleBounds(int scale) {
+ assert scale >= MIN_SCALE && scale <= MAX_SCALE : "scale must be in range [" + MIN_SCALE + ".." + MAX_SCALE + "]";
+ }
+
+ private static void checkIndexBounds(long index) {
+ assert index >= MIN_INDEX && index <= MAX_INDEX : "index must be in range [" + MIN_INDEX + ".." + MAX_INDEX + "]";
+ }
+
+}
diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java
new file mode 100644
index 0000000000000..5dde66d4a46ec
--- /dev/null
+++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java
@@ -0,0 +1,261 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import java.util.OptionalLong;
+
+/**
+ * An implementation of a mutable {@link ExponentialHistogram} with a sparse, array-backed representation.
+ *
+ * Consumers must ensure that if the histogram is mutated, all previously acquired {@link BucketIterator}
+ * instances are no longer used.
+ */
+final class FixedCapacityExponentialHistogram implements ExponentialHistogram {
+
+ // These arrays represent both the positive and the negative buckets.
+ // To avoid confusion, we refer to positions within the array as "slots" instead of indices in this file
+ // When we use term "index", we mean the exponential histogram bucket index.
+ // They store all buckets for the negative range first, with the bucket indices in ascending order,
+ // followed by all buckets for the positive range, also with their indices in ascending order.
+ // This means we store the buckets ordered by their boundaries in ascending order (from -INF to +INF).
+ private final long[] bucketIndices;
+ private final long[] bucketCounts;
+
+ private int bucketScale;
+
+ private final Buckets negativeBuckets = new Buckets(false);
+
+ private ZeroBucket zeroBucket;
+
+ private final Buckets positiveBuckets = new Buckets(true);
+
+ /**
+ * Creates an empty histogram with the given capacity and a {@link ZeroBucket#minimalEmpty()} zero bucket.
+ * The scale is initialized to the maximum possible precision ({@link #MAX_SCALE}).
+ *
+ * @param bucketCapacity the maximum total number of positive and negative buckets this histogram can hold.
+ */
+ FixedCapacityExponentialHistogram(int bucketCapacity) {
+ bucketIndices = new long[bucketCapacity];
+ bucketCounts = new long[bucketCapacity];
+ reset();
+ }
+
+ /**
+ * Resets this histogram to the same state as a newly constructed one with the same capacity.
+ */
+ void reset() {
+ setZeroBucket(ZeroBucket.minimalEmpty());
+ resetBuckets(MAX_SCALE);
+ }
+
+ /**
+ * Removes all positive and negative buckets from this histogram and sets the scale to the given value.
+ *
+ * @param scale the scale to set for this histogram
+ */
+ void resetBuckets(int scale) {
+ assert scale >= MIN_SCALE && scale <= MAX_SCALE : "scale must be in range [" + MIN_SCALE + ".." + MAX_SCALE + "]";
+ negativeBuckets.reset();
+ positiveBuckets.reset();
+ bucketScale = scale;
+ }
+
+ @Override
+ public ZeroBucket zeroBucket() {
+ return zeroBucket;
+ }
+
+ /**
+ * Replaces the zero bucket of this histogram with the given one.
+ * Callers must ensure that the given {@link ZeroBucket} does not
+ * overlap with any of the positive or negative buckets of this histogram.
+ *
+ * @param zeroBucket the zero bucket to set
+ */
+ void setZeroBucket(ZeroBucket zeroBucket) {
+ this.zeroBucket = zeroBucket;
+ }
+
+ /**
+ * Attempts to add a bucket to the positive or negative range of this histogram.
+ *
+ * Callers must adhere to the following rules:
+ *
+ * - All buckets for the negative values range must be provided before the first one from the positive values range.
+ * - For both the negative and positive ranges, buckets must be provided with their indices in ascending order.
+ * - It is not allowed to provide the same bucket more than once.
+ * - It is not allowed to add empty buckets ({@code count <= 0}).
+ *
+ *
+ * If any of these rules are violated, this call will fail with an exception.
+ * If the bucket cannot be added because the maximum capacity has been reached, the call will not modify the state
+ * of this histogram and will return {@code false}.
+ *
+ * @param index the index of the bucket to add
+ * @param count the count to associate with the given bucket
+ * @param isPositive {@code true} if the bucket belongs to the positive range, {@code false} if it belongs to the negative range
+ * @return {@code true} if the bucket was added, {@code false} if it could not be added due to insufficient capacity
+ */
+ boolean tryAddBucket(long index, long count, boolean isPositive) {
+ assert index >= MIN_INDEX && index <= MAX_INDEX : "index must be in range [" + MIN_INDEX + ".." + MAX_INDEX + "]";
+ assert isPositive || positiveBuckets.numBuckets == 0 : "Cannot add negative buckets after a positive bucket has been added";
+ assert count > 0 : "Cannot add a bucket with empty or negative count";
+ if (isPositive) {
+ return positiveBuckets.tryAddBucket(index, count);
+ } else {
+ return negativeBuckets.tryAddBucket(index, count);
+ }
+ }
+
+ @Override
+ public int scale() {
+ return bucketScale;
+ }
+
+ @Override
+ public ExponentialHistogram.Buckets negativeBuckets() {
+ return negativeBuckets;
+ }
+
+ @Override
+ public ExponentialHistogram.Buckets positiveBuckets() {
+ return positiveBuckets;
+ }
+
+ private class Buckets implements ExponentialHistogram.Buckets {
+
+ private final boolean isPositive;
+ private int numBuckets;
+ private int cachedValueSumForNumBuckets;
+ private long cachedValueSum;
+
+ /**
+ * @param isPositive true, if this object should represent the positive bucket range, false for the negative range
+ */
+ Buckets(boolean isPositive) {
+ this.isPositive = isPositive;
+ reset();
+ }
+
+ /**
+ * @return the position of the first bucket of this set of buckets within {@link #bucketCounts} and {@link #bucketIndices}.
+ */
+ int startSlot() {
+ return isPositive ? negativeBuckets.numBuckets : 0;
+ }
+
+ final void reset() {
+ numBuckets = 0;
+ cachedValueSumForNumBuckets = 0;
+ cachedValueSum = 0;
+ }
+
+ boolean tryAddBucket(long index, long count) {
+ int slot = startSlot() + numBuckets;
+ assert numBuckets == 0 || bucketIndices[slot - 1] < index
+ : "Histogram buckets must be added with their indices in ascending order";
+ if (slot >= bucketCounts.length) {
+ return false; // no more space
+ }
+ bucketIndices[slot] = index;
+ bucketCounts[slot] = count;
+ numBuckets++;
+ return true;
+ }
+
+ @Override
+ public CopyableBucketIterator iterator() {
+ int start = startSlot();
+ return new BucketArrayIterator(start, start + numBuckets);
+ }
+
+ @Override
+ public OptionalLong maxBucketIndex() {
+ if (numBuckets == 0) {
+ return OptionalLong.empty();
+ } else {
+ return OptionalLong.of(bucketIndices[startSlot() + numBuckets - 1]);
+ }
+ }
+
+ @Override
+ public long valueCount() {
+ int startSlot = startSlot();
+ while (cachedValueSumForNumBuckets < numBuckets) {
+ cachedValueSum += bucketCounts[startSlot + cachedValueSumForNumBuckets];
+ cachedValueSumForNumBuckets++;
+ }
+ return cachedValueSum;
+ }
+ }
+
+ private class BucketArrayIterator implements CopyableBucketIterator {
+
+ int currentSlot;
+ final int limit;
+
+ private BucketArrayIterator(int startSlot, int limit) {
+ this.currentSlot = startSlot;
+ this.limit = limit;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return currentSlot < limit;
+ }
+
+ @Override
+ public long peekCount() {
+ ensureEndNotReached();
+ return bucketCounts[currentSlot];
+ }
+
+ @Override
+ public long peekIndex() {
+ ensureEndNotReached();
+ return bucketIndices[currentSlot];
+ }
+
+ @Override
+ public void advance() {
+ ensureEndNotReached();
+ currentSlot++;
+ }
+
+ @Override
+ public int scale() {
+ return FixedCapacityExponentialHistogram.this.scale();
+ }
+
+ @Override
+ public CopyableBucketIterator copy() {
+ return new BucketArrayIterator(currentSlot, limit);
+ }
+
+ private void ensureEndNotReached() {
+ if (hasNext() == false) {
+ throw new IllegalStateException("Iterator has no more buckets");
+ }
+ }
+ }
+}
diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java
new file mode 100644
index 0000000000000..ca13901cd95e9
--- /dev/null
+++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java
@@ -0,0 +1,109 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+/**
+ * An iterator that merges two bucket iterators, aligning them to a common scale and combining buckets with the same index.
+ */
+final class MergingBucketIterator implements BucketIterator {
+
+ private final BucketIterator itA;
+ private final BucketIterator itB;
+
+ private boolean endReached;
+ private long currentIndex;
+ private long currentCount;
+
+ /**
+ * Creates a new merging iterator.
+ *
+ * @param itA the first iterator to merge
+ * @param itB the second iterator to merge
+ * @param targetScale the histogram scale to which both iterators should be aligned
+ */
+ MergingBucketIterator(BucketIterator itA, BucketIterator itB, int targetScale) {
+ this.itA = new ScaleAdjustingBucketIterator(itA, targetScale);
+ this.itB = new ScaleAdjustingBucketIterator(itB, targetScale);
+ endReached = false;
+ advance();
+ }
+
+ @Override
+ public void advance() {
+ boolean hasNextA = itA.hasNext();
+ boolean hasNextB = itB.hasNext();
+ endReached = hasNextA == false && hasNextB == false;
+ if (endReached) {
+ return;
+ }
+ long idxA = 0;
+ long idxB = 0;
+ if (hasNextA) {
+ idxA = itA.peekIndex();
+ }
+ if (hasNextB) {
+ idxB = itB.peekIndex();
+ }
+
+ currentCount = 0;
+ boolean advanceA = hasNextA && (hasNextB == false || idxA <= idxB);
+ boolean advanceB = hasNextB && (hasNextA == false || idxB <= idxA);
+ if (advanceA) {
+ currentIndex = idxA;
+ currentCount += itA.peekCount();
+ itA.advance();
+ }
+ if (advanceB) {
+ currentIndex = idxB;
+ currentCount += itB.peekCount();
+ itB.advance();
+ }
+ }
+
+ @Override
+ public boolean hasNext() {
+ return endReached == false;
+ }
+
+ @Override
+ public long peekCount() {
+ assertEndNotReached();
+ return currentCount;
+ }
+
+ @Override
+ public long peekIndex() {
+ assertEndNotReached();
+ return currentIndex;
+ }
+
+ @Override
+ public int scale() {
+ return itA.scale();
+ }
+
+ private void assertEndNotReached() {
+ if (endReached) {
+ throw new IllegalStateException("Iterator has no more buckets");
+ }
+ }
+}
diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java
new file mode 100644
index 0000000000000..b28e782fc2366
--- /dev/null
+++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ScaleAdjustingBucketIterator.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.adjustScale;
+
+/**
+ * An iterator that wraps another bucket iterator and adjusts its scale.
+ * When scaling down, multiple buckets can collapse into a single one. This iterator ensures they are merged correctly.
+ */
+final class ScaleAdjustingBucketIterator implements BucketIterator {
+
+ private final BucketIterator delegate;
+ private final int scaleAdjustment;
+
+ private long currentIndex;
+ private long currentCount;
+ boolean hasNextValue;
+
+ /**
+ * Creates a new scale-adjusting iterator.
+ *
+ * @param delegate the iterator to wrap
+ * @param targetScale the target scale for the new iterator
+ */
+ ScaleAdjustingBucketIterator(BucketIterator delegate, int targetScale) {
+ this.delegate = delegate;
+ scaleAdjustment = targetScale - delegate.scale();
+ hasNextValue = true;
+ advance();
+ }
+
+ @Override
+ public boolean hasNext() {
+ return hasNextValue;
+ }
+
+ @Override
+ public long peekCount() {
+ assertEndNotReached();
+ return currentCount;
+ }
+
+ @Override
+ public long peekIndex() {
+ assertEndNotReached();
+ return currentIndex;
+ }
+
+ @Override
+ public void advance() {
+ assertEndNotReached();
+ hasNextValue = delegate.hasNext();
+ if (hasNextValue == false) {
+ return;
+ }
+ currentIndex = adjustScale(delegate.peekIndex(), delegate.scale(), scaleAdjustment);
+ currentCount = delegate.peekCount();
+ delegate.advance();
+ while (delegate.hasNext() && adjustScale(delegate.peekIndex(), delegate.scale(), scaleAdjustment) == currentIndex) {
+ currentCount += delegate.peekCount();
+ delegate.advance();
+ }
+ }
+
+ private void assertEndNotReached() {
+ if (hasNextValue == false) {
+ throw new IllegalStateException("Iterator has no more buckets");
+ }
+ }
+
+ @Override
+ public int scale() {
+ return delegate.scale() + scaleAdjustment;
+ }
+}
diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java
new file mode 100644
index 0000000000000..9ea75f3a82c27
--- /dev/null
+++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java
@@ -0,0 +1,172 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_SCALE;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.compareExponentiallyScaledValues;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.exponentiallyScaledToDoubleValue;
+
+/**
+ * Represents the bucket for values around zero in an exponential histogram.
+ * The range of this bucket is {@code [-zeroThreshold, +zeroThreshold]}.
+ * To allow efficient comparison with bucket boundaries, this class internally
+ * represents the zero threshold as a exponential histogram bucket index with a scale,
+ * computed via {@link ExponentialScaleUtils#computeIndex(double, int)}.
+ *
+ * @param index The index used with the scale to determine the zero threshold.
+ * @param scale The scale used with the index to determine the zero threshold.
+ * @param count The number of values in the zero bucket.
+ */
+public record ZeroBucket(long index, int scale, long count) {
+
+ // A singleton for an empty zero bucket with the smallest possible threshold.
+ private static final ZeroBucket MINIMAL_EMPTY = new ZeroBucket(MIN_INDEX, MIN_SCALE, 0);
+
+ /**
+ * Creates a new zero bucket with a specific threshold and count.
+ *
+ * @param zeroThreshold The threshold defining the bucket's range [-zeroThreshold, +zeroThreshold].
+ * @param count The number of values in the bucket.
+ */
+ public ZeroBucket(double zeroThreshold, long count) {
+ this(computeIndex(zeroThreshold, MAX_SCALE) + 1, MAX_SCALE, count);
+ }
+
+ /**
+ * @return A singleton instance of an empty zero bucket with the smallest possible threshold.
+ */
+ public static ZeroBucket minimalEmpty() {
+ return MINIMAL_EMPTY;
+ }
+
+ /**
+ * Creates a zero bucket with the smallest possible threshold and a given count.
+ *
+ * @param count The number of values in the bucket.
+ * @return A new {@link ZeroBucket}.
+ */
+ public static ZeroBucket minimalWithCount(long count) {
+ if (count == 0) {
+ return MINIMAL_EMPTY;
+ } else {
+ return new ZeroBucket(MINIMAL_EMPTY.index, MINIMAL_EMPTY.scale(), count);
+ }
+ }
+
+ /**
+ * Merges this zero bucket with another one.
+ *
+ * - If the other zero bucket or both are empty, this instance is returned unchanged.
+ * - If the this zero bucket is empty and the other one is populated, the other instance is returned unchanged.
+ * - Otherwise, the zero threshold is increased if necessary (by taking the maximum of the two), and the counts are summed.
+ *
+ *
+ * @param other The other zero bucket to merge with.
+ * @return A new {@link ZeroBucket} representing the merged result.
+ */
+ public ZeroBucket merge(ZeroBucket other) {
+ if (other.count == 0) {
+ return this;
+ } else if (count == 0) {
+ return other;
+ } else {
+ long totalCount = count + other.count;
+ // Both are populated, so we need to use the higher zero-threshold.
+ if (this.compareZeroThreshold(other) >= 0) {
+ return new ZeroBucket(index, scale, totalCount);
+ } else {
+ return new ZeroBucket(other.index, other.scale, totalCount);
+ }
+ }
+ }
+
+ /**
+ * Collapses all buckets from the given iterators whose lower boundaries are smaller than the zero threshold.
+ * The iterators are advanced to point at the first, non-collapsed bucket.
+ *
+ * @param bucketIterators The iterators whose buckets may be collapsed.
+ * @return A potentially updated {@link ZeroBucket} with the collapsed buckets' counts and an adjusted threshold.
+ */
+ public ZeroBucket collapseOverlappingBucketsForAll(BucketIterator... bucketIterators) {
+ ZeroBucket current = this;
+ ZeroBucket previous;
+ do {
+ previous = current;
+ for (BucketIterator buckets : bucketIterators) {
+ current = current.collapseOverlappingBuckets(buckets);
+ }
+ } while (previous.compareZeroThreshold(current) != 0);
+ return current;
+ }
+
+ /**
+ * Compares the zero threshold of this bucket with another one.
+ *
+ * @param other The other zero bucket to compare against.
+ * @return A negative integer, zero, or a positive integer if this bucket's threshold is less than,
+ * equal to, or greater than the other's.
+ */
+ public int compareZeroThreshold(ZeroBucket other) {
+ return compareExponentiallyScaledValues(index, scale, other.index, other.scale);
+ }
+
+ /**
+ * @return The value of the zero threshold.
+ */
+ public double zeroThreshold() {
+ return exponentiallyScaledToDoubleValue(index, scale);
+ }
+
+ /**
+ * Collapses all buckets from the given iterator whose lower boundaries are smaller than the zero threshold.
+ * The iterator is advanced to point at the first, non-collapsed bucket.
+ *
+ * @param buckets The iterator whose buckets may be collapsed.
+ * @return A potentially updated {@link ZeroBucket} with the collapsed buckets' counts and an adjusted threshold.
+ */
+ public ZeroBucket collapseOverlappingBuckets(BucketIterator buckets) {
+
+ long collapsedCount = 0;
+ long highestCollapsedIndex = 0;
+ while (buckets.hasNext() && compareExponentiallyScaledValues(buckets.peekIndex(), buckets.scale(), index, scale) < 0) {
+ highestCollapsedIndex = buckets.peekIndex();
+ collapsedCount += buckets.peekCount();
+ buckets.advance();
+ }
+ if (collapsedCount == 0) {
+ return this;
+ } else {
+ long newZeroCount = count + collapsedCount;
+ // +1 because we need to adjust the zero threshold to the upper boundary of the collapsed bucket
+ long collapsedUpperBoundIndex = highestCollapsedIndex + 1;
+ if (compareExponentiallyScaledValues(index, scale, collapsedUpperBoundIndex, buckets.scale()) >= 0) {
+ // Our current zero-threshold is larger than the upper boundary of the largest collapsed bucket, so we keep it.
+ return new ZeroBucket(index, scale, newZeroCount);
+ } else {
+ return new ZeroBucket(collapsedUpperBoundIndex, buckets.scale(), newZeroCount);
+ }
+ }
+ }
+}
diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/package-info.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/package-info.java
new file mode 100644
index 0000000000000..5584f5030e887
--- /dev/null
+++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/package-info.java
@@ -0,0 +1,180 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+/**
+ * This library provides an implementation of merging and analysis algorithms for exponential histograms based on the
+ * OpenTelemetry definition.
+ * It is designed as a complementary tool to the OpenTelemetry SDK, focusing specifically on efficient histogram merging and accurate
+ * percentile estimation.
+ *
+ * Overview
+ *
+ * The library implements base-2 exponential histograms with perfect subsetting. The most important properties are:
+ *
+ *
+ * - The histogram has a scale parameter, which defines the accuracy. A higher scale implies a higher accuracy.
+ * - The {@code base} for the buckets is defined as {@code base = 2^(2^-scale)}.
+ * - The histogram bucket at index {@code i} has the range {@code (base^i, base^(i+1)]}
+ * - Negative values are represented by a separate negative range of buckets with the boundaries {@code (-base^(i+1), -base^i]}
+ * - Histograms support perfect subsetting: when the scale is decreased by one, each pair of adjacent buckets is merged into a
+ * single bucket without introducing error
+ * - A special zero bucket with a zero-threshold is used to handle zero and close-to-zero values
+ *
+ *
+ * For more details please refer to the
+ * OpenTelemetry definition.
+ *
+ * The library implements a sparse storage approach where only populated buckets consume memory and count towards the bucket limit.
+ * This differs from the OpenTelemetry implementation, which uses dense storage. While dense storage allows for O(1) time insertion of
+ * individual values, our sparse representation requires O(log m) time where m is the bucket capacity. However, the sparse
+ * representation enables more efficient storage and provides a simple merging algorithm with runtime linear in the number of
+ * populated buckets. Additionally, this library also provides an array-backed sparse storage, ensuring cache efficiency.
+ *
+ * The sparse storage approach offers significant advantages for distributions with fewer distinct values than the bucket count,
+ * allowing the library to achieve representation of such distributions with an error so small that it won't be noticed in practice.
+ * This makes it suitable not only for exponential histograms but also as a universal solution for handling explicit bucket
+ * histograms.
+ *
+ *
Merging Algorithm
+ *
+ * The merging algorithm works similarly to the merge-step of merge sort. We simultaneously walk through the buckets of both
+ * histograms in order, merging them on the fly as needed. If the total number of buckets in the end would exceed the bucket limit,
+ * we scale down as needed.
+ *
+ * Before we merge the buckets, we need to take care of the special zero-bucket and bring both histograms to the same scale.
+ *
+ * For the zero-bucket, we merge the zero threshold from both histograms and collapse any overlapping buckets into the resulting new
+ * zero bucket.
+ *
+ * In order to bring both histograms to the same scale, we can make adjustments in both directions: we can increase or decrease the
+ * scale of histograms as needed.
+ *
+ * See the upscaling section for details on how the upscaling works. Upscaling helps prevent the precision of
+ * the result histogram merged from many histograms from being dragged down to the lowest scale of a potentially misconfigured input
+ * histogram. For example, if a histogram is recorded with a too low zero threshold, this can result in a degraded scale when using
+ * dense histogram storage, even if the histogram only contains two points.
+ *
+ *
Upscaling
+ *
+ * In general, we assume that all values in a bucket lie on a single point: the point of least relative error. This is the point
+ * {@code x} in the bucket such that:
+ *
+ *
+ * (x - l) / l = (u - x) / u
+ *
+ *
+ * where {@code l} is the lower bucket boundary and {@code u} is the upper bucket boundary.
+ *
+ * This assumption allows us to increase the scale of histograms without increasing the bucket count. Buckets are simply mapped to
+ * the ones in the new scale containing the point of least relative error of the original buckets.
+ *
+ * This can introduce a small error, as the original center might be moved slightly. Therefore, we ensure that the upscaling happens
+ * at most once to prevent errors from adding up. The higher the amount of upscaling, the less the error (higher scale means smaller
+ * buckets, which in turn means we get a better fit around the original point of least relative error).
+ *
+ *
Distributions with Few Distinct Values
+ *
+ * The sparse storage model only requires memory linear to the total number of buckets, while dense storage needs to store the entire
+ * range of the smallest and biggest buckets.
+ *
+ * This offers significant benefits for distributions with fewer distinct values:
+ * If we have at least as many buckets as we have distinct values to store in the histogram, we can represent this distribution with
+ * a much smaller error than the dense representation.
+ * This can be achieved by maintaining the scale at the maximum supported value (so the buckets become the smallest).
+ * At the time of writing, the maximum scale is 38, so the relative distance between the lower and upper bucket boundaries is
+ * {@code (2^2^(-38))}.
+ *
+ * The impact of the error is best shown with a concrete example:
+ * If we store, for example, a duration value of {@code 10^15} nanoseconds (= roughly 11.5 days), this value will be stored in a
+ * bucket that guarantees a relative error of at most {@code 2^2^(-38)}, so roughly 2.5 microseconds in this case.
+ * As long as the number of values we insert is lower than the bucket count, we are guaranteed that no down-scaling happens:
+ * In contrast to dense storage, the scale does not depend on the spread between the smallest and largest bucket index.
+ *
+ * To clarify the difference between dense and sparse storage, let's assume that we have an empty histogram and the maximum scale is
+ * zero while the maximum bucket count is four.
+ * The same logic applies to higher scales and bucket counts, but we use these values to get easier numbers for this example.
+ * The scale of zero means that our bucket boundaries are {@code 1, 2, 4, 8, 16, 32, 64, 128, 256, ...}.
+ * We now want to insert the value {@code 6} into the histogram. The dense storage works by storing an array for the bucket counts
+ * plus an initial offset.
+ * This means that the first slot in the bucket counts array corresponds to the bucket with index {@code offset} and the last one to
+ * {@code offset + bucketCounts.length - 1}.
+ * So if we add the value {@code 6} to the histogram, it falls into the {@code (4,8]} bucket, which has the index {@code 2}.
+ *
+ * So our dense histogram looks like this:
+ *
+ *
+ * offset = 2
+ * bucketCounts = [1, 0, 0, 0] // represent bucket counts for bucket index 2 to 5
+ *
+ *
+ * If we now insert the value {@code 20} ({@code (16,32]}, bucket index 4), everything is still fine:
+ *
+ *
+ * offset = 2
+ * bucketCounts = [1, 0, 1, 0] // represent bucket counts for bucket index 2 to 5
+ *
+ *
+ * However, we run into trouble if we insert the value {@code 100}, which corresponds to index 6: That index is outside of the bounds
+ * of our array.
+ * We can't just increase the {@code offset}, because the first bucket in our array is populated too.
+ * We have no other option other than decreasing the scale of the histogram, to make sure that our values {@code 6} and {@code 100}
+ * fall in the range of four consecutive buckets due to the bucket count limit of the dense storage.
+ *
+ * In contrast, a sparse histogram has no trouble storing this data while keeping the scale of zero:
+ *
+ *
+ * bucketIndiciesToCounts: {
+ * "2" : 1,
+ * "4" : 1,
+ * "6" : 1
+ * }
+ *
+ *
+ * Downscaling on the sparse representation only happens if either:
+ *
+ * - The number of populated buckets would become bigger than our maximum bucket count. We have to downscale to combine
+ * neighboring, populated buckets to a single bucket until we are below our limit again.
+ * - The highest or smallest indices require more bits to store than we allow. This does not happen in our implementation for
+ * normal inputs, because we allow up to 62 bits for index storage, which fits the entire numeric range of IEEE 754 double
+ * precision floats at our maximum scale.
+ *
+ *
+ * Handling Explicit Bucket Histograms
+ *
+ * We can make use of this property to convert explicit bucket histograms
+ * (OpenTelemetry Histogram) to exponential
+ * ones by again assuming that all values in a bucket lie in a single point:
+ *
+ * - For each explicit bucket, we take its point of least relative error and add it to the corresponding exponential histogram
+ * bucket with the corresponding count.
+ * - The open, upper, and lower buckets, including infinity, will need special treatment, but these are not useful for percentile
+ * estimates anyway.
+ *
+ *
+ * This gives us a great solution for universally dealing with histograms:
+ * When merging exponential histograms generated from explicit ones, the scale is not decreased (and therefore the error not
+ * increased) as long as the number of distinct buckets from the original explicit bucket histograms does not exceed the exponential
+ * histogram bucket count. As a result, the computed percentiles will be precise with only the
+ * relative error of the initial conversion.
+ * In addition, this allows us to compute percentiles on mixed explicit bucket histograms or even mix them with exponential ones by
+ * just using the exponential histogram algorithms.
+ */
+package org.elasticsearch.exponentialhistogram;
diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java
new file mode 100644
index 0000000000000..63f0b8a301cfe
--- /dev/null
+++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import org.elasticsearch.test.ESTestCase;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.stream.IntStream;
+import java.util.stream.LongStream;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX_BITS;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.equalTo;
+
+public class DownscaleStatsTests extends ESTestCase {
+
+ public void testExponential() {
+ long[] values = IntStream.range(0, 100).mapToLong(i -> (long) Math.min(MAX_INDEX, Math.pow(1.1, i))).distinct().toArray();
+ verifyFor(values);
+ }
+
+ public void testNumericalLimits() {
+ verifyFor(MIN_INDEX, MAX_INDEX);
+ }
+
+ public void testRandom() {
+ for (int i = 0; i < 100; i++) {
+ List values = IntStream.range(0, 1000).mapToObj(j -> randomLongBetween(MIN_INDEX, MAX_INDEX)).distinct().toList();
+ verifyFor(values);
+ }
+ }
+
+ void verifyFor(long... indices) {
+ verifyFor(LongStream.of(indices).boxed().toList());
+ }
+
+ void verifyFor(Collection indices) {
+ // sanity check, we require unique indices
+ assertThat(indices.size(), equalTo(new HashSet<>(indices).size()));
+
+ List sorted = new ArrayList<>(indices);
+ sorted.sort(Long::compareTo);
+
+ DownscaleStats stats = new DownscaleStats();
+ for (int i = 1; i < sorted.size(); i++) {
+ long prev = sorted.get(i - 1);
+ long curr = sorted.get(i);
+ stats.add(prev, curr);
+ }
+
+ for (int i = 0; i <= MAX_INDEX_BITS; i++) {
+ int scaleReduction = i;
+ long remainingCount = indices.stream().mapToLong(Long::longValue).map(index -> index >> scaleReduction).distinct().count();
+ long reduction = sorted.size() - remainingCount;
+
+ assertThat(
+ "Expected size after reduction of " + i + " to match",
+ stats.getCollapsedBucketCountAfterScaleReduction(scaleReduction),
+ equalTo((int) reduction)
+ );
+ }
+
+ }
+}
diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java
new file mode 100644
index 0000000000000..337cfbd69033e
--- /dev/null
+++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import org.elasticsearch.test.ESTestCase;
+
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
+
+public class ExponentialHistogramGeneratorTests extends ESTestCase {
+
+ public void testVeryLargeValue() {
+ double value = Double.MAX_VALUE / 10;
+ ExponentialHistogram histo = ExponentialHistogram.create(1, value);
+
+ long index = histo.positiveBuckets().iterator().peekIndex();
+ int scale = histo.scale();
+
+ double lowerBound = ExponentialScaleUtils.getLowerBucketBoundary(index, scale);
+ double upperBound = ExponentialScaleUtils.getUpperBucketBoundary(index, scale);
+
+ assertThat("Lower bucket boundary should be smaller than value", lowerBound, lessThanOrEqualTo(value));
+ assertThat("Upper bucket boundary should be greater than value", upperBound, greaterThanOrEqualTo(value));
+ }
+
+}
diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java
new file mode 100644
index 0000000000000..9d46798d1a627
--- /dev/null
+++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java
@@ -0,0 +1,173 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import org.elasticsearch.test.ESTestCase;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.adjustScale;
+import static org.hamcrest.Matchers.closeTo;
+import static org.hamcrest.Matchers.equalTo;
+
+public class ExponentialHistogramMergerTests extends ESTestCase {
+
+ public void testZeroThresholdCollapsesOverlappingBuckets() {
+ FixedCapacityExponentialHistogram first = new FixedCapacityExponentialHistogram(100);
+ first.setZeroBucket(new ZeroBucket(2.0001, 10));
+
+ FixedCapacityExponentialHistogram second = new FixedCapacityExponentialHistogram(100);
+ first.resetBuckets(0); // scale 0 means base 2
+ first.tryAddBucket(0, 1, false); // bucket (-2, 1]
+ first.tryAddBucket(1, 1, false); // bucket (-4, 2]
+ first.tryAddBucket(2, 7, false); // bucket (-8, 4]
+ first.tryAddBucket(0, 1, true); // bucket (1, 2]
+ first.tryAddBucket(1, 1, true); // bucket (2, 4]
+ first.tryAddBucket(2, 42, true); // bucket (4, 8]
+
+ ExponentialHistogram mergeResult = mergeWithMinimumScale(100, 0, first, second);
+
+ assertThat(mergeResult.zeroBucket().zeroThreshold(), equalTo(4.0));
+ assertThat(mergeResult.zeroBucket().count(), equalTo(14L));
+
+ // only the (4, 8] bucket should be left
+ assertThat(mergeResult.scale(), equalTo(0));
+
+ BucketIterator negBuckets = mergeResult.negativeBuckets().iterator();
+ assertThat(negBuckets.peekIndex(), equalTo(2L));
+ assertThat(negBuckets.peekCount(), equalTo(7L));
+ negBuckets.advance();
+ assertThat(negBuckets.hasNext(), equalTo(false));
+
+ BucketIterator posBuckets = mergeResult.positiveBuckets().iterator();
+ assertThat(posBuckets.peekIndex(), equalTo(2L));
+ assertThat(posBuckets.peekCount(), equalTo(42L));
+ posBuckets.advance();
+ assertThat(posBuckets.hasNext(), equalTo(false));
+
+ // ensure buckets of the accumulated histogram are collapsed too if needed
+ FixedCapacityExponentialHistogram third = new FixedCapacityExponentialHistogram(100);
+ third.setZeroBucket(new ZeroBucket(45.0, 1));
+
+ mergeResult = mergeWithMinimumScale(100, 0, mergeResult, third);
+ assertThat(mergeResult.zeroBucket().zeroThreshold(), closeTo(45.0, 0.000001));
+ assertThat(mergeResult.zeroBucket().count(), equalTo(1L + 14L + 42L + 7L));
+ assertThat(mergeResult.positiveBuckets().iterator().hasNext(), equalTo(false));
+ assertThat(mergeResult.negativeBuckets().iterator().hasNext(), equalTo(false));
+ }
+
+ public void testEmptyZeroBucketIgnored() {
+ FixedCapacityExponentialHistogram first = new FixedCapacityExponentialHistogram(100);
+ first.setZeroBucket(new ZeroBucket(2.0, 10));
+ first.resetBuckets(0); // scale 0 means base 2
+ first.tryAddBucket(2, 42L, true); // bucket (4, 8]
+
+ FixedCapacityExponentialHistogram second = new FixedCapacityExponentialHistogram(100);
+ second.setZeroBucket(new ZeroBucket(100.0, 0));
+
+ ExponentialHistogram mergeResult = mergeWithMinimumScale(100, 0, first, second);
+
+ assertThat(mergeResult.zeroBucket().zeroThreshold(), equalTo(2.0));
+ assertThat(mergeResult.zeroBucket().count(), equalTo(10L));
+
+ BucketIterator posBuckets = mergeResult.positiveBuckets().iterator();
+ assertThat(posBuckets.peekIndex(), equalTo(2L));
+ assertThat(posBuckets.peekCount(), equalTo(42L));
+ posBuckets.advance();
+ assertThat(posBuckets.hasNext(), equalTo(false));
+ }
+
+ public void testUpscalingDoesNotExceedIndexLimits() {
+ for (int i = 0; i < 4; i++) {
+
+ boolean isPositive = i % 2 == 0;
+ boolean useMinIndex = i > 1;
+
+ FixedCapacityExponentialHistogram histo = new FixedCapacityExponentialHistogram(2);
+ histo.resetBuckets(20);
+
+ long index = useMinIndex ? MIN_INDEX / 2 : MAX_INDEX / 2;
+
+ histo.tryAddBucket(index, 1, isPositive);
+
+ ExponentialHistogramMerger merger = new ExponentialHistogramMerger(100);
+ merger.add(histo);
+ ExponentialHistogram result = merger.get();
+
+ assertThat(result.scale(), equalTo(21));
+ if (isPositive) {
+ assertThat(result.positiveBuckets().iterator().peekIndex(), equalTo(adjustScale(index, 20, 1)));
+ } else {
+ assertThat(result.negativeBuckets().iterator().peekIndex(), equalTo(adjustScale(index, 20, 1)));
+ }
+ }
+ }
+
+ /**
+ * Verify that the resulting histogram is independent of the order of elements and therefore merges performed.
+ */
+ public void testMergeOrderIndependence() {
+ List values = IntStream.range(0, 10_000)
+ .mapToDouble(i -> i < 17 ? 0 : (-1 + 2 * randomDouble()) * Math.pow(10, randomIntBetween(-4, 4)))
+ .boxed()
+ .collect(Collectors.toCollection(ArrayList::new));
+
+ ExponentialHistogram reference = ExponentialHistogram.create(20, values.stream().mapToDouble(Double::doubleValue).toArray());
+
+ for (int i = 0; i < 100; i++) {
+ Collections.shuffle(values, random());
+ ExponentialHistogram shuffled = ExponentialHistogram.create(20, values.stream().mapToDouble(Double::doubleValue).toArray());
+
+ assertThat("Expected same scale", shuffled.scale(), equalTo(reference.scale()));
+ assertThat("Expected same zero-bucket", shuffled.zeroBucket(), equalTo(reference.zeroBucket()));
+ assertBucketsEqual(shuffled.negativeBuckets(), reference.negativeBuckets());
+ assertBucketsEqual(shuffled.positiveBuckets(), reference.positiveBuckets());
+ }
+ }
+
+ private void assertBucketsEqual(ExponentialHistogram.Buckets bucketsA, ExponentialHistogram.Buckets bucketsB) {
+ BucketIterator itA = bucketsA.iterator();
+ BucketIterator itB = bucketsB.iterator();
+ assertThat("Expecting both set of buckets to be empty or non-empty", itA.hasNext(), equalTo(itB.hasNext()));
+ while (itA.hasNext() && itB.hasNext()) {
+ assertThat(itA.peekIndex(), equalTo(itB.peekIndex()));
+ assertThat(itA.peekCount(), equalTo(itB.peekCount()));
+ assertThat("The number of buckets is different", itA.hasNext(), equalTo(itB.hasNext()));
+ itA.advance();
+ itB.advance();
+ }
+ }
+
+ private static ExponentialHistogram mergeWithMinimumScale(int bucketCount, int scale, ExponentialHistogram... histograms) {
+ ExponentialHistogramMerger merger = ExponentialHistogramMerger.createForTesting(bucketCount, scale);
+ Arrays.stream(histograms).forEach(merger::add);
+ return merger.get();
+ }
+
+}
diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java
new file mode 100644
index 0000000000000..a75aedbf35231
--- /dev/null
+++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtilsTests.java
@@ -0,0 +1,211 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import ch.obermuhlner.math.big.BigDecimalMath;
+
+import org.elasticsearch.test.ESTestCase;
+
+import java.math.BigDecimal;
+import java.math.MathContext;
+import java.math.RoundingMode;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX_BITS;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_SCALE;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.SCALE_UP_CONSTANT_TABLE;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.adjustScale;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.compareExponentiallyScaledValues;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getLowerBucketBoundary;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getMaximumScaleIncrease;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getPointOfLeastRelativeError;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getUpperBucketBoundary;
+import static org.hamcrest.Matchers.closeTo;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
+import static org.hamcrest.Matchers.lessThan;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
+
+public class ExponentialScaleUtilsTests extends ESTestCase {
+
+ public void testMaxIndex() {
+ assertThat(getMaximumScaleIncrease(MAX_INDEX), equalTo(0));
+ assertThat(getMaximumScaleIncrease(MAX_INDEX - 1), equalTo(0));
+ assertThat(getMaximumScaleIncrease(MAX_INDEX >> 1), equalTo(1));
+ assertThrows(ArithmeticException.class, () -> Math.multiplyExact(MAX_INDEX, 4));
+ }
+
+ public void testMinIndex() {
+ assertThat(getMaximumScaleIncrease(MIN_INDEX), equalTo(0));
+ assertThat(getMaximumScaleIncrease(MIN_INDEX + 1), equalTo(0));
+ assertThat(getMaximumScaleIncrease(MIN_INDEX >> 1), equalTo(0));
+ assertThat(getMaximumScaleIncrease((MIN_INDEX + 1) >> 1), equalTo(1));
+ assertThrows(ArithmeticException.class, () -> Math.multiplyExact(MIN_INDEX, 4));
+ }
+
+ public void testExtremeValueIndexing() {
+ double leeway = Math.pow(10.0, 20);
+
+ for (double testValue : new double[] { Double.MAX_VALUE / leeway, Double.MIN_VALUE * leeway }) {
+ long idx = computeIndex(testValue, MAX_SCALE);
+ double lowerBound = getLowerBucketBoundary(idx, MAX_SCALE);
+ double upperBound = getUpperBucketBoundary(idx, MAX_SCALE);
+ assertThat(lowerBound, lessThanOrEqualTo(testValue));
+ assertThat(upperBound, greaterThanOrEqualTo(testValue));
+ assertThat(lowerBound, lessThan(upperBound));
+ }
+ }
+
+ public void testRandomValueIndexing() {
+ for (int i = 0; i < 100_000; i++) {
+ // generate values in the range 10^-100 to 10^100
+ double exponent = randomDouble() * 200 - 100;
+ double testValue = Math.pow(10, exponent);
+ int scale = randomIntBetween(MIN_SCALE / 2, MAX_SCALE / 2);
+ long index = computeIndex(testValue, scale);
+
+ double lowerBound = getLowerBucketBoundary(index, scale);
+ double upperBound = getUpperBucketBoundary(index, scale);
+ double pointOfLeastError = getPointOfLeastRelativeError(index, scale);
+
+ String baseMsg = " for input value " + testValue + " and scale " + scale;
+
+ assertThat("Expected lower bound to be less than input value", lowerBound, lessThanOrEqualTo(testValue));
+ assertThat("Expected upper bound to be greater than input value", upperBound, greaterThanOrEqualTo(upperBound));
+ assertThat("Expected lower bound to be less than upper bound" + baseMsg, lowerBound, lessThan(upperBound));
+
+ // only do this check for ranges where we have enough numeric stability
+ if (lowerBound > Math.pow(10, -250) && upperBound < Math.pow(10, 250)) {
+
+ assertThat(
+ "Expected point of least error to be greater than lower bound" + baseMsg,
+ pointOfLeastError,
+ greaterThan(lowerBound)
+ );
+ assertThat("Expected point of least error to be less than upper bound" + baseMsg, pointOfLeastError, lessThan(upperBound));
+
+ double errorLower = (pointOfLeastError - lowerBound) / lowerBound;
+ double errorUpper = (upperBound - pointOfLeastError) / upperBound;
+ assertThat(errorLower / errorUpper, closeTo(1, 0.1));
+ }
+
+ }
+ }
+
+ public void testRandomIndicesScaleAdjustement() {
+
+ for (int i = 0; i < 100_000; i++) {
+ long index = randomLongBetween(MIN_INDEX, MAX_INDEX);
+ int currentScale = randomIntBetween(MIN_SCALE, MAX_SCALE);
+ int maxAdjustment = Math.min(MAX_SCALE - currentScale, getMaximumScaleIncrease(index));
+
+ assertThat(
+ adjustScale(adjustScale(index, currentScale, maxAdjustment), currentScale + maxAdjustment, -maxAdjustment),
+ equalTo(index)
+ );
+ if (currentScale + maxAdjustment < MAX_SCALE) {
+ if (index > 0) {
+ assertThat(adjustScale(index, currentScale, maxAdjustment) * 2, greaterThan(MAX_INDEX));
+ } else if (index < 0) {
+ assertThat(adjustScale(index, currentScale, maxAdjustment) * 2, lessThan(MIN_INDEX));
+ }
+ }
+ }
+
+ }
+
+ public void testRandomBucketBoundaryComparison() {
+
+ for (int i = 0; i < 100_000; i++) {
+ long indexA = randomLongBetween(MIN_INDEX, MAX_INDEX);
+ long indexB = randomLongBetween(MIN_INDEX, MAX_INDEX);
+ int scaleA = randomIntBetween(MIN_SCALE, MAX_SCALE);
+ int scaleB = randomIntBetween(MIN_SCALE, MAX_SCALE);
+
+ double lowerBoundA = getLowerBucketBoundary(indexA, scaleA);
+ while (Double.isInfinite(lowerBoundA)) {
+ indexA = indexA >> 1;
+ lowerBoundA = getLowerBucketBoundary(indexA, scaleA);
+ }
+ double lowerBoundB = getLowerBucketBoundary(indexB, scaleB);
+ while (Double.isInfinite(lowerBoundB)) {
+ indexB = indexB >> 1;
+ lowerBoundB = getLowerBucketBoundary(indexB, scaleB);
+ }
+
+ if (lowerBoundA != lowerBoundB) {
+ assertThat(
+ Double.compare(lowerBoundA, lowerBoundB),
+ equalTo(compareExponentiallyScaledValues(indexA, scaleA, indexB, scaleB))
+ );
+ }
+ }
+ }
+
+ public void testUpscalingAccuracy() {
+ // Use slightly adjusted scales to not run into numeric trouble, because we don't use exact maths here
+ int minScale = MIN_SCALE + 7;
+ int maxScale = MAX_SCALE - 15;
+
+ for (int i = 0; i < 10_000; i++) {
+
+ int startScale = randomIntBetween(minScale, maxScale - 1);
+ int scaleIncrease = randomIntBetween(1, maxScale - startScale);
+
+ long index = MAX_INDEX >> scaleIncrease >> (int) (randomDouble() * (MAX_INDEX_BITS - scaleIncrease));
+ index = Math.max(1, index);
+ index = (long) ((2 * randomDouble() - 1) * index);
+
+ double midPoint = getPointOfLeastRelativeError(index, startScale);
+ // limit the numeric range, otherwise we get rounding errors causing the test to fail
+ while (midPoint > Math.pow(10, 10) || midPoint < Math.pow(10, -10)) {
+ index /= 2;
+ midPoint = getPointOfLeastRelativeError(index, startScale);
+ }
+
+ long scaledUpIndex = adjustScale(index, startScale, scaleIncrease);
+ long correctIdx = computeIndex(midPoint, startScale + scaleIncrease);
+ // Due to rounding problems in the tests, we can still be off by one for extreme scales
+ assertThat(scaledUpIndex, equalTo(correctIdx));
+ }
+ }
+
+ public void testScaleUpTableUpToDate() {
+
+ MathContext mc = new MathContext(1000);
+ BigDecimal one = new BigDecimal(1, mc);
+ BigDecimal two = new BigDecimal(2, mc);
+
+ for (int scale = MIN_SCALE; scale <= MAX_SCALE; scale++) {
+ BigDecimal base = BigDecimalMath.pow(two, two.pow(-scale, mc), mc);
+ BigDecimal factor = one.add(two.pow(scale, mc).multiply(one.subtract(BigDecimalMath.log2(one.add(base), mc))));
+
+ BigDecimal scaledFactor = factor.multiply(two.pow(63, mc)).setScale(0, RoundingMode.FLOOR);
+ assertThat(SCALE_UP_CONSTANT_TABLE[scale - MIN_SCALE], equalTo(scaledFactor.longValue()));
+ }
+ }
+
+}
diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogramTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogramTests.java
new file mode 100644
index 0000000000000..6701bee418299
--- /dev/null
+++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogramTests.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import org.elasticsearch.test.ESTestCase;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class FixedCapacityExponentialHistogramTests extends ESTestCase {
+
+ public void testValueCountUpdatedCorrectly() {
+
+ FixedCapacityExponentialHistogram histogram = new FixedCapacityExponentialHistogram(100);
+
+ assertThat(histogram.negativeBuckets().valueCount(), equalTo(0L));
+ assertThat(histogram.positiveBuckets().valueCount(), equalTo(0L));
+
+ histogram.tryAddBucket(1, 10, false);
+
+ assertThat(histogram.negativeBuckets().valueCount(), equalTo(10L));
+ assertThat(histogram.positiveBuckets().valueCount(), equalTo(0L));
+
+ histogram.tryAddBucket(2, 3, false);
+ histogram.tryAddBucket(3, 4, false);
+ histogram.tryAddBucket(1, 5, true);
+
+ assertThat(histogram.negativeBuckets().valueCount(), equalTo(17L));
+ assertThat(histogram.positiveBuckets().valueCount(), equalTo(5L));
+
+ histogram.tryAddBucket(2, 3, true);
+ histogram.tryAddBucket(3, 4, true);
+
+ assertThat(histogram.negativeBuckets().valueCount(), equalTo(17L));
+ assertThat(histogram.positiveBuckets().valueCount(), equalTo(12L));
+
+ histogram.resetBuckets(0);
+
+ assertThat(histogram.negativeBuckets().valueCount(), equalTo(0L));
+ assertThat(histogram.positiveBuckets().valueCount(), equalTo(0L));
+ }
+}
diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java
new file mode 100644
index 0000000000000..3f1ffaffda30b
--- /dev/null
+++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java
@@ -0,0 +1,310 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import org.apache.commons.math3.distribution.BetaDistribution;
+import org.apache.commons.math3.distribution.ExponentialDistribution;
+import org.apache.commons.math3.distribution.GammaDistribution;
+import org.apache.commons.math3.distribution.LogNormalDistribution;
+import org.apache.commons.math3.distribution.NormalDistribution;
+import org.apache.commons.math3.distribution.RealDistribution;
+import org.apache.commons.math3.distribution.UniformRealDistribution;
+import org.apache.commons.math3.distribution.WeibullDistribution;
+import org.apache.commons.math3.random.Well19937c;
+import org.elasticsearch.test.ESTestCase;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.stream.DoubleStream;
+import java.util.stream.IntStream;
+
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE;
+import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex;
+import static org.hamcrest.Matchers.closeTo;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.lessThan;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
+import static org.hamcrest.Matchers.notANumber;
+
+public class QuantileAccuracyTests extends ESTestCase {
+
+ public static final double[] QUANTILES_TO_TEST = { 0, 0.0000001, 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999999, 1.0 };
+
+ private static int randomBucketCount() {
+ // exponentially distribute the bucket count to test more for smaller sizes
+ return (int) Math.round(5 + Math.pow(1995, randomDouble()));
+ }
+
+ public void testNoNegativeZeroReturned() {
+ FixedCapacityExponentialHistogram histogram = new FixedCapacityExponentialHistogram(2);
+ histogram.resetBuckets(MAX_SCALE);
+ // add a single, negative bucket close to zero
+ histogram.tryAddBucket(MIN_INDEX, 3, false);
+ double median = ExponentialHistogramQuantile.getQuantile(histogram, 0.5);
+ assertThat(median, equalTo(0.0));
+ }
+
+ public void testUniformDistribution() {
+ testDistributionQuantileAccuracy(new UniformRealDistribution(new Well19937c(randomInt()), 0, 100));
+ }
+
+ public void testNormalDistribution() {
+ testDistributionQuantileAccuracy(new NormalDistribution(new Well19937c(randomInt()), 100, 15));
+ }
+
+ public void testExponentialDistribution() {
+ testDistributionQuantileAccuracy(new ExponentialDistribution(new Well19937c(randomInt()), 10));
+ }
+
+ public void testLogNormalDistribution() {
+ testDistributionQuantileAccuracy(new LogNormalDistribution(new Well19937c(randomInt()), 0, 1));
+ }
+
+ public void testGammaDistribution() {
+ testDistributionQuantileAccuracy(new GammaDistribution(new Well19937c(randomInt()), 2, 5));
+ }
+
+ public void testBetaDistribution() {
+ testDistributionQuantileAccuracy(new BetaDistribution(new Well19937c(randomInt()), 2, 5));
+ }
+
+ public void testWeibullDistribution() {
+ testDistributionQuantileAccuracy(new WeibullDistribution(new Well19937c(randomInt()), 2, 5));
+ }
+
+ public void testBasicSmall() {
+ DoubleStream values = IntStream.range(1, 10).mapToDouble(Double::valueOf);
+ double maxError = testQuantileAccuracy(values.toArray(), 100);
+ assertThat(maxError, lessThan(0.000001));
+ }
+
+ public void testPercentileOverlapsZeroBucket() {
+ ExponentialHistogram histo = ExponentialHistogram.create(9, -3.0, -2, -1, 0, 0, 0, 1, 2, 3);
+ assertThat(ExponentialHistogramQuantile.getQuantile(histo, 8.0 / 16.0), equalTo(0.0));
+ assertThat(ExponentialHistogramQuantile.getQuantile(histo, 7.0 / 16.0), equalTo(0.0));
+ assertThat(ExponentialHistogramQuantile.getQuantile(histo, 9.0 / 16.0), equalTo(0.0));
+ assertThat(ExponentialHistogramQuantile.getQuantile(histo, 5.0 / 16.0), closeTo(-0.5, 0.000001));
+ assertThat(ExponentialHistogramQuantile.getQuantile(histo, 11.0 / 16.0), closeTo(0.5, 0.000001));
+ }
+
+ public void testBigJump() {
+ double[] values = DoubleStream.concat(IntStream.range(0, 18).mapToDouble(Double::valueOf), DoubleStream.of(1_000_000.0)).toArray();
+
+ double maxError = testQuantileAccuracy(values, 500);
+ assertThat(maxError, lessThan(0.000001));
+ }
+
+ public void testExplicitSkewedData() {
+ double[] data = new double[] {
+ 245,
+ 246,
+ 247.249,
+ 240,
+ 243,
+ 248,
+ 250,
+ 241,
+ 244,
+ 245,
+ 245,
+ 247,
+ 243,
+ 242,
+ 241,
+ 50100,
+ 51246,
+ 52247,
+ 52249,
+ 51240,
+ 53243,
+ 59248,
+ 59250,
+ 57241,
+ 56244,
+ 55245,
+ 56245,
+ 575247,
+ 58243,
+ 51242,
+ 54241 };
+
+ double maxError = testQuantileAccuracy(data, data.length / 2);
+ assertThat(maxError, lessThan(0.007));
+ }
+
+ public void testEmptyHistogram() {
+ ExponentialHistogram histo = ExponentialHistogram.create(1);
+ for (double q : QUANTILES_TO_TEST) {
+ assertThat(ExponentialHistogramQuantile.getQuantile(histo, q), notANumber());
+ }
+ }
+
+ public void testSingleValueHistogram() {
+ ExponentialHistogram histo = ExponentialHistogram.create(1, 42.0);
+ for (double q : QUANTILES_TO_TEST) {
+ assertThat(ExponentialHistogramQuantile.getQuantile(histo, q), closeTo(42, 0.0000001));
+ }
+ }
+
+ public void testBucketCountImpact() {
+ RealDistribution distribution = new LogNormalDistribution(new Well19937c(randomInt()), 0, 1);
+ int sampleSize = between(100, 50_000);
+ double[] values = generateSamples(distribution, sampleSize);
+
+ // Verify that more buckets generally means better accuracy
+ double errorWithFewBuckets = testQuantileAccuracy(values, 20);
+ double errorWithManyBuckets = testQuantileAccuracy(values, 200);
+ assertThat("More buckets should improve accuracy", errorWithManyBuckets, lessThanOrEqualTo(errorWithFewBuckets));
+ }
+
+ public void testMixedSignValues() {
+ double[] values = new double[between(100, 10_000)];
+ for (int i = 0; i < values.length; i++) {
+ values[i] = (randomDouble() * 200) - 100; // Range from -100 to 100
+ }
+
+ testQuantileAccuracy(values, 100);
+ }
+
+ public void testSkewedData() {
+ // Create a highly skewed dataset
+ double[] values = new double[10000];
+ for (int i = 0; i < values.length; i++) {
+ if (randomDouble() < 0.9) {
+ // 90% of values are small
+ values[i] = randomDouble() * 10;
+ } else {
+ // 10% are very large
+ values[i] = randomDouble() * 10000 + 100;
+ }
+ }
+
+ testQuantileAccuracy(values, 100);
+ }
+
+ public void testDataWithZeros() {
+ double[] values = new double[10000];
+ for (int i = 0; i < values.length; i++) {
+ if (randomDouble() < 0.2) {
+ // 20% zeros
+ values[i] = 0;
+ } else {
+ values[i] = randomDouble() * 100;
+ }
+ }
+
+ testQuantileAccuracy(values, 100);
+ }
+
+ private void testDistributionQuantileAccuracy(RealDistribution distribution) {
+ double[] values = generateSamples(distribution, between(100, 50_000));
+ int bucketCount = randomBucketCount();
+ testQuantileAccuracy(values, bucketCount);
+ }
+
+ private static double[] generateSamples(RealDistribution distribution, int sampleSize) {
+ double[] values = new double[sampleSize];
+ for (int i = 0; i < sampleSize; i++) {
+ values[i] = distribution.sample();
+ }
+ return values;
+ }
+
+ private double testQuantileAccuracy(double[] values, int bucketCount) {
+ // Create histogram
+ ExponentialHistogram histogram = ExponentialHistogram.create(bucketCount, values);
+ Arrays.sort(values);
+
+ double allowedError = getMaximumRelativeError(values, bucketCount);
+ double maxError = 0;
+
+ // Compare histogram quantiles with exact quantiles
+ for (double q : QUANTILES_TO_TEST) {
+ double percentileRank = q * (values.length - 1);
+ int lowerRank = (int) Math.floor(percentileRank);
+ int upperRank = (int) Math.ceil(percentileRank);
+ double upperFactor = percentileRank - lowerRank;
+
+ if (values[lowerRank] < 0 && values[upperRank] > 0) {
+ // the percentile lies directly between a sign change and we interpolate linearly in-between
+ // in this case the relative error bound does not hold
+ continue;
+ }
+ double exactValue = values[lowerRank] * (1 - upperFactor) + values[upperRank] * upperFactor;
+
+ double histoValue = ExponentialHistogramQuantile.getQuantile(histogram, q);
+
+ // Skip comparison if exact value is close to zero to avoid false-positives due to numerical imprecision
+ if (Math.abs(exactValue) < 1e-100) {
+ continue;
+ }
+
+ double relativeError = Math.abs(histoValue - exactValue) / Math.abs(exactValue);
+ maxError = Math.max(maxError, relativeError);
+
+ assertThat(
+ String.format(Locale.ENGLISH, "Quantile %.2f should be accurate within %.6f%% relative error", q, allowedError * 100),
+ histoValue,
+ closeTo(exactValue, Math.abs(exactValue * allowedError))
+ );
+
+ }
+ return maxError;
+ }
+
+ /**
+ * Provides the upper bound of the relative error for any percentile estimate performed with the exponential histogram.
+ * The error depends on the raw values put into the histogram and the number of buckets allowed.
+ * This is an implementation of the error bound computation proven by Theorem 3 in the UDDSketch paper
+ */
+ private static double getMaximumRelativeError(double[] values, int bucketCount) {
+ HashSet usedPositiveIndices = new HashSet<>();
+ HashSet usedNegativeIndices = new HashSet<>();
+ int bestPossibleScale = MAX_SCALE;
+ for (double value : values) {
+ if (value < 0) {
+ usedPositiveIndices.add(computeIndex(value, bestPossibleScale));
+ } else if (value > 0) {
+ usedNegativeIndices.add(computeIndex(value, bestPossibleScale));
+ }
+ while ((usedNegativeIndices.size() + usedPositiveIndices.size()) > bucketCount) {
+ usedNegativeIndices = rightShiftAll(usedNegativeIndices);
+ usedPositiveIndices = rightShiftAll(usedPositiveIndices);
+ bestPossibleScale--;
+ }
+ }
+ // for the best possible scale, compute the worst-case error
+ double base = Math.pow(2.0, Math.scalb(1.0, -bestPossibleScale));
+ return 2 * base / (1 + base) - 1;
+ }
+
+ private static HashSet rightShiftAll(HashSet indices) {
+ HashSet result = new HashSet<>();
+ for (long index : indices) {
+ result.add(index >> 1);
+ }
+ return result;
+ }
+
+}
diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ZeroBucketTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ZeroBucketTests.java
new file mode 100644
index 0000000000000..43873fba53ec7
--- /dev/null
+++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ZeroBucketTests.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
+ * under one or more license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
+ */
+
+package org.elasticsearch.exponentialhistogram;
+
+import org.elasticsearch.test.ESTestCase;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class ZeroBucketTests extends ESTestCase {
+
+ public void testMinimalBucketHasZeroThreshold() {
+ assertThat(ZeroBucket.minimalWithCount(42).zeroThreshold(), equalTo(0.0));
+ }
+}