elastic
diff --git a/‎gradle/verification-metadata.xml‎
Lines changed: 5 additions & 0 deletions b/‎gradle/verification-metadata.xml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎libs/exponential-histogram/build.gradle‎
Lines changed: 1 addition & 0 deletions b/‎libs/exponential-histogram/build.gradle‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java‎
Lines changed: 23 additions & 13 deletions b/‎libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java‎
Lines changed: 23 additions & 13 deletions
diff --git a/‎libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java‎
Lines changed: 93 additions & 9 deletions b/‎libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java‎
Lines changed: 93 additions & 9 deletions
diff --git a/‎libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java‎
Lines changed: 31 additions & 11 deletions b/‎libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java‎
Lines changed: 31 additions & 11 deletions
@@ -66,6 +66,11 @@
             <sha256 value="3366d2c88fb576e486d830f521184e8f1839f8c15dcd2151a3f6e1f62b0b37a0" origin="Generated by Gradle"/>
          </artifact>
       </component>
+      <component group="ch.obermuhlner" name="big-math" version="2.3.2">
+         <artifact name="big-math-2.3.2.jar">
+            <sha256 value="693e1bb7c7f5184b448f03c2a2c0c45d07d8e89e4641fdc31ab0a8057027f43d" origin="Generated by Gradle"/>
+         </artifact>
+      </component>
       <component group="ch.randelshofer" name="fastdoubleparser" version="0.8.0">
          <artifact name="fastdoubleparser-0.8.0.jar">
             <sha256 value="10fe288fd7a2cdaf5175332b73529f9abf8fd54dcfff317d6967c0c35ffb133b" origin="Generated by Gradle"/>
 
@@ -13,5 +13,6 @@ apply plugin: 'elasticsearch.build'
 
 dependencies {
   testImplementation(project(":test:framework"))
+  testImplementation('ch.obermuhlner:big-math:2.3.2')
   testImplementation('org.apache.commons:commons-math3:3.6.1')
 }
@@ -9,18 +9,28 @@
 
 package org.elasticsearch.exponentialhistogram;
 
-public class DownscaleStats {
+import java.util.Arrays;
 
-    // collapsedCount[i] represents the number of assitional
-    // collapsed buckets when increasing the scale by (i+1) instead of (i)
-    int[] collapsedCount = new int[63];
+/**
+ * Data structure for effectively computing by how much the scale of a histogram needs to be reduced to reach a target bucket count.
+ * This works by looking at each pair of neighboring buckets and checking at which scale reduction they would collapse to a single bucket.
+ */
+class DownscaleStats {
+
+    // collapsedBucketCount[i] represents the number of additional
+    // collapsed buckets when increasing the scale by (i+1) instead of just by (i)
+    int[] collapsedBucketCount = new int[63];
+
+    void reset() {
+        Arrays.fill(collapsedBucketCount, 0);
+    }
 
     void add(long previousBucketIndex, long currentBucketIndex) {
         if (currentBucketIndex <= previousBucketIndex) {
             throw new IllegalArgumentException("currentBucketIndex must be bigger than previousBucketIndex");
         }
         /* Below is an efficient variant of the following algorithm:
-        for (int i=0; i<64; i++) {
+        for (int i=0; i<63; i++) {
             if (prevIndex>>(i+1) == currIndex>>(i+1)) {
                 collapsedBucketCount[i]++;
                 break;
@@ -35,28 +45,28 @@ void add(long previousBucketIndex, long currentBucketIndex) {
             return;
         }
         int requiredScaleChange = 64 - numEqualLeadingBits;
-        collapsedCount[requiredScaleChange - 1]++;
+        collapsedBucketCount[requiredScaleChange - 1]++;
     }
 
     int getCollapsedBucketCountAfterScaleReduction(int reduction) {
         int totalCollapsed = 0;
         for (int i = 0; i < reduction; i++) {
-            totalCollapsed += collapsedCount[i];
+            totalCollapsed += collapsedBucketCount[i];
         }
         return totalCollapsed;
     }
 
-    public int getRequiredScaleReductionToReduceBucketCountBy(int desiredReduction) {
-        if (desiredReduction == 0) {
+    int getRequiredScaleReductionToReduceBucketCountBy(int desiredCollapsedBucketCount) {
+        if (desiredCollapsedBucketCount == 0) {
             return 0;
         }
         int totalCollapsed = 0;
-        for (int i = 0; i < collapsedCount.length; i++) {
-            totalCollapsed += collapsedCount[i];
-            if (totalCollapsed >= desiredReduction) {
+        for (int i = 0; i < collapsedBucketCount.length; i++) {
+            totalCollapsed += collapsedBucketCount[i];
+            if (totalCollapsed >= desiredCollapsedBucketCount) {
                 return i + 1;
             }
         }
-        throw new IllegalArgumentException("it is not possible to reduce the bucket count by " + desiredReduction);
+        throw new IllegalArgumentException("it is not possible to reduce the bucket count by " + desiredCollapsedBucketCount);
     }
 }
@@ -9,49 +9,133 @@
 
 package org.elasticsearch.exponentialhistogram;
 
+import java.util.OptionalLong;
+
+/**
+ * Interface for implementations of exponential histograms adhering to the <a href="https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram">opentelemetry definition</a>.
+ * This interface explicitly allows for sparse implementation: It does not offer to directly access buckets by index, instead it
+ * is only possible to iterate over the buckets.<br>
+ * The most important properties are:
+ * <ul>
+ *     <li>The histogram has a scale parameter, which defines the accuracy. The <code>base</code> for the buckets is defined as <code>base = 2^(2^-scale)</code></li>
+ *     <li>The histogram bucket at index <code>i</code> has the range <code>(base^i, base^(i+1)]</code> </li>
+ *     <li>Negative values are represented by a separate negative range of buckets with the boundaries <code>(-base^(i+1), -base^i]</code></li>
+ *     <li>histograms are perfectly subsetting: Increasing the scale by one exactly merges each pair of neighbouring buckets</li>
+ *     <li>a special {@link ZeroBucket} is used to handle zero and close to zero values</li>
+ * </ul>
+ *
+ * <br>
+ * In addition, in all algorithms we make a central assumption about the distribution of samples within each bucket:
+ * We assume they all lie on the single point of least error relative to the bucket boundaries (see {@link ExponentialScaleUtils#getPointOfLeastRelativeError(long, int)}).
+ */
 public interface ExponentialHistogram {
 
-    // scale of 38 is the largest scale where at the borders we don't run into problems due to floating point precision
-    // theoretically, a MAX_SCALE of 51 would work and would still cover the entire range of double values
-    // if we want to use something larger, we'll have to rework the math of converting from double to indices and back
+    //TODO: support min/max/sum/count storage and merging
+    //TODO: Add special positive and negative infinity buckets to allow representation of explicit bucket histograms with open boundaries
+
+    // scale of 38 is the largest scale where at the borders we don't run into problems due to floating point precision when computing
+    // indices for double values
+    // Theoretically, a MAX_SCALE of 51 would work and would still cover the entire range of double values
+    // For that to work, we'll have to rework the math of converting from double to indices and back
     // One option would be to use "Quadruple": https://github.com/m-vokhm/Quadruple
     int MAX_SCALE = 38;
 
+    // Add this scale all double values already fall into a single bucket
+    int MIN_SCALE = -11;
+
     // Only use 62 bit at max to allow to compute the difference between the smallest and largest index without causing overflow
     // Also the extra bit gives us room for some tricks for compact storage
     int MAX_INDEX_BITS = 62;
     long MAX_INDEX = (1L << MAX_INDEX_BITS) - 1;
     long MIN_INDEX = -MAX_INDEX;
 
+    /**
+     * The scale of the histogram. Higher scales result in higher accuracy, but potentially higher bucket count.
+     * Must be less than or equal to {@link #MAX_SCALE} and greater than or equal to {@link #MIN_SCALE}.
+     */
     int scale();
 
+    /**
+     * @return the {@link ZeroBucket} representing the number of zero (or close to zero) values and its threshold
+     */
     ZeroBucket zeroBucket();
 
-    BucketIterator positiveBuckets();
+    /**
+     * @return a {@link BucketIterator} for the populated, positive buckets of this histogram. {@link BucketIterator#scale()} of the return value must return the same value as {@link #scale()}.
+     */
+    CopyableBucketIterator positiveBuckets();
 
-    BucketIterator negativeBuckets();
+    /**
+     * @return a {@link BucketIterator} for the populated, negative buckets of this histogram. {@link BucketIterator#scale()} of the return value must return the same value as {@link #scale()}.
+     */
+    CopyableBucketIterator negativeBuckets();
 
     /**
-     * Returns the highest populated bucket index, taking both negative and positive buckets into account;
-     * If there are no buckets populated, Long.MIN_VALUE shall be returned.
+     * Returns the highest populated bucket index, taking both negative and positive buckets into account.
+     * If there are neither positive nor negative buckets populated, an empty optional is returned.
      */
-    long maximumBucketIndex();
+    OptionalLong maximumBucketIndex();
 
     /**
-     * Iterator over the non-empty buckets.
+     * Iterator over non-empty buckets of the histogram. Can represent either the positive or negative histogram range.
+     *  <ul>
+     *      <li>The iterator always iterates from the lowest bucket index to the highest</li>
+     *      <li>The iterator never returns duplicate buckets (buckets with the same index) </li>
+     *      <li>The iterator never returns empty buckets ({@link #peekCount() is never zero}</li>
+     *  </ul>
      */
     interface BucketIterator {
+        /**
+         * Checks if there are any buckets remaining to be visited by this iterator.
+         * If the end has been reached, it is illegal to call {@link #peekCount()}, {@link #peekIndex()} or {@link #advance()}.
+         *
+         * @return <code>false</code>, if the end has been reached, <code>true</code> otherwise.
+         */
         boolean hasNext();
 
+        /**
+         * The number of items in the bucket this iterator currently points at. Does not advance the iterator by itself and therefore can be called repeatedly to return the same value.
+         * Must not be called if {@link #hasNext()} returns <code>false</code>.
+         *
+         * @return the number of items in the bucket, always greater than zero
+         */
         long peekCount();
 
+        /**
+         * The index of the bucket this iterator currently points at. Does not advance the iterator by itself and therefore can be called repeatedly to return the same value.
+         * Must not be called if {@link #hasNext()} returns <code>false</code>.
+         *
+         * @return the index of the bucket, guaranteed to be in the range [{@link #MIN_INDEX}, {@link #MAX_INDEX}]
+         */
         long peekIndex();
 
+        /**
+         * Moves the iterator to the next, non-empty bucket.
+         * If {@link #hasNext()} is <code>true</code> after {@link #advance()}, {@link #peekIndex()} is guaranteed to return a value bigger than prior to the {@link #advance()} call.
+         */
         void advance();
 
+        /**
+         * Provides the scale that can be used to convert indices returned by {@link #peekIndex()} to the bucket boundaries,
+         * e.g. via {@link ExponentialScaleUtils#getLowerBucketBoundary(long, int)}.
+         *
+         * @return the scale, which is guaranteed to be constant over the lifetime of this iterator.
+         */
         int scale();
 
         BucketIterator copy();
     }
 
+    /**
+     * A {@link BucketIterator} which can be copied.
+     */
+    interface CopyableBucketIterator extends BucketIterator {
+
+        /**
+         * Provides a bucket iterator pointing at the same bucket of the same range of buckets as this iterator.
+         * Calling {@link #advance()} on the copied iterator does not affect <code>this</code> and vice-versa.
+         */
+        CopyableBucketIterator copy();
+    }
+
 }
@@ -12,28 +12,39 @@
 import java.util.Arrays;
 import java.util.stream.DoubleStream;
 
-import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.computeIndex;
+import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex;
 
 /**
- * Class for generating a histogram from raw values.
+ * Class for accumulating raw values into an {@link ExponentialHistogram} with a given maximum bucket count.
+ *
+ * If the number of values is less than or equal the bucket capacity, the resulting histogram is guaranteed
+ * to represent the exact raw values with a relative error less than <code>2^(2^-MAX_SCALE) - 1</code>
  */
 public class ExponentialHistogramGenerator {
 
+    // Merging individual values into a histogram would way to slow with our sparse, array-backed histogram representation
+    // Therefore for a bucket capacity of c, we first buffer c raw values to be inserted
+    // we then turn those into an "exact" histogram, which in turn we merge with our actual result accumulator
+    // This yields an amortized runtime of O( log(c) )
     private final double[] rawValueBuffer;
     int valueCount;
 
     private final ExponentialHistogramMerger resultMerger;
-    private final FixedSizeExponentialHistogram valueBuffer;
+    private final FixedCapacityExponentialHistogram valueBuffer;
 
     private boolean isFinished = false;
 
-    public ExponentialHistogramGenerator(int numBuckets) {
-        rawValueBuffer = new double[numBuckets];
+    public ExponentialHistogramGenerator(int maxBucketCount) {
+        rawValueBuffer = new double[maxBucketCount];
         valueCount = 0;
-        valueBuffer = new FixedSizeExponentialHistogram(numBuckets);
-        resultMerger = new ExponentialHistogramMerger(numBuckets);
+        valueBuffer = new FixedCapacityExponentialHistogram(maxBucketCount);
+        resultMerger = new ExponentialHistogramMerger(maxBucketCount);
     }
 
+    /**
+     * Add the given value to the histogram.
+     * Must not be calles after {@link #get()} has been called.
+     */
     public void add(double value) {
         if (isFinished) {
             throw new IllegalStateException("get() has already been called");
@@ -45,19 +56,28 @@ public void add(double value) {
         valueCount++;
     }
 
+    /**
+     * @return the histogram representing the distribution of all accumulated values.
+     */
     public ExponentialHistogram get() {
-        if (isFinished) {
-            throw new IllegalStateException("get() has already been called");
-        }
         isFinished = true;
         mergeValuesToHistogram();
         return resultMerger.get();
     }
 
+    /**
+     * Create a histogram representing the distribution of the given values.
+     * The histogram will have a bucket count of at most the length of the provided array
+     * and will have a relative error less than <code>2^(2^-MAX_SCALE) - 1</code>.
+     */
     public static ExponentialHistogram createFor(double... values) {
         return createFor(values.length, Arrays.stream(values));
     }
-
+    /**
+     * Create a histogram representing the distribution of the given values with at most the given number of buckets.
+     * If the given bucketCount is greater or equal to the number of values, the resulting histogram will have a
+     * relative error of less than <code>2^(2^-MAX_SCALE) - 1</code>.
+     */
     public static ExponentialHistogram createFor(int bucketCount, DoubleStream values) {
         ExponentialHistogramGenerator generator = new ExponentialHistogramGenerator(bucketCount);
         values.forEach(generator::add);
Original file line number	Diff line number	Diff line change
`@@ -13,5 +13,6 @@ apply plugin: 'elasticsearch.build'`
`13`	`13`
`14`	`14`	`dependencies {`
`15`	`15`	`testImplementation(project(":test:framework"))`
	`16`	`+ testImplementation('ch.obermuhlner:big-math:2.3.2')`
`16`	`17`	`testImplementation('org.apache.commons:commons-math3:3.6.1')`
`17`	`18`	`}`