|
9 | 9 |
|
10 | 10 | package org.elasticsearch.exponentialhistogram; |
11 | 11 |
|
| 12 | +import java.util.OptionalLong; |
| 13 | + |
| 14 | +/** |
| 15 | + * Interface for implementations of exponential histograms adhering to the <a href="https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram">opentelemetry definition</a>. |
| 16 | + * This interface explicitly allows for sparse implementation: It does not offer to directly access buckets by index, instead it |
| 17 | + * is only possible to iterate over the buckets.<br> |
| 18 | + * The most important properties are: |
| 19 | + * <ul> |
| 20 | + * <li>The histogram has a scale parameter, which defines the accuracy. The <code>base</code> for the buckets is defined as <code>base = 2^(2^-scale)</code></li> |
| 21 | + * <li>The histogram bucket at index <code>i</code> has the range <code>(base^i, base^(i+1)]</code> </li> |
| 22 | + * <li>Negative values are represented by a separate negative range of buckets with the boundaries <code>(-base^(i+1), -base^i]</code></li> |
| 23 | + * <li>histograms are perfectly subsetting: Increasing the scale by one exactly merges each pair of neighbouring buckets</li> |
| 24 | + * <li>a special {@link ZeroBucket} is used to handle zero and close to zero values</li> |
| 25 | + * </ul> |
| 26 | + * |
| 27 | + * <br> |
| 28 | + * In addition, in all algorithms we make a central assumption about the distribution of samples within each bucket: |
| 29 | + * We assume they all lie on the single point of least error relative to the bucket boundaries (see {@link ExponentialScaleUtils#getPointOfLeastRelativeError(long, int)}). |
| 30 | + */ |
12 | 31 | public interface ExponentialHistogram { |
13 | 32 |
|
14 | | - // scale of 38 is the largest scale where at the borders we don't run into problems due to floating point precision |
15 | | - // theoretically, a MAX_SCALE of 51 would work and would still cover the entire range of double values |
16 | | - // if we want to use something larger, we'll have to rework the math of converting from double to indices and back |
| 33 | + //TODO: support min/max/sum/count storage and merging |
| 34 | + //TODO: Add special positive and negative infinity buckets to allow representation of explicit bucket histograms with open boundaries |
| 35 | + |
| 36 | + // scale of 38 is the largest scale where at the borders we don't run into problems due to floating point precision when computing |
| 37 | + // indices for double values |
| 38 | + // Theoretically, a MAX_SCALE of 51 would work and would still cover the entire range of double values |
| 39 | + // For that to work, we'll have to rework the math of converting from double to indices and back |
17 | 40 | // One option would be to use "Quadruple": https://github.com/m-vokhm/Quadruple |
18 | 41 | int MAX_SCALE = 38; |
19 | 42 |
|
| 43 | + // Add this scale all double values already fall into a single bucket |
| 44 | + int MIN_SCALE = -11; |
| 45 | + |
20 | 46 | // Only use 62 bit at max to allow to compute the difference between the smallest and largest index without causing overflow |
21 | 47 | // Also the extra bit gives us room for some tricks for compact storage |
22 | 48 | int MAX_INDEX_BITS = 62; |
23 | 49 | long MAX_INDEX = (1L << MAX_INDEX_BITS) - 1; |
24 | 50 | long MIN_INDEX = -MAX_INDEX; |
25 | 51 |
|
| 52 | + /** |
| 53 | + * The scale of the histogram. Higher scales result in higher accuracy, but potentially higher bucket count. |
| 54 | + * Must be less than or equal to {@link #MAX_SCALE} and greater than or equal to {@link #MIN_SCALE}. |
| 55 | + */ |
26 | 56 | int scale(); |
27 | 57 |
|
| 58 | + /** |
| 59 | + * @return the {@link ZeroBucket} representing the number of zero (or close to zero) values and its threshold |
| 60 | + */ |
28 | 61 | ZeroBucket zeroBucket(); |
29 | 62 |
|
30 | | - BucketIterator positiveBuckets(); |
| 63 | + /** |
| 64 | + * @return a {@link BucketIterator} for the populated, positive buckets of this histogram. {@link BucketIterator#scale()} of the return value must return the same value as {@link #scale()}. |
| 65 | + */ |
| 66 | + CopyableBucketIterator positiveBuckets(); |
31 | 67 |
|
32 | | - BucketIterator negativeBuckets(); |
| 68 | + /** |
| 69 | + * @return a {@link BucketIterator} for the populated, negative buckets of this histogram. {@link BucketIterator#scale()} of the return value must return the same value as {@link #scale()}. |
| 70 | + */ |
| 71 | + CopyableBucketIterator negativeBuckets(); |
33 | 72 |
|
34 | 73 | /** |
35 | | - * Returns the highest populated bucket index, taking both negative and positive buckets into account; |
36 | | - * If there are no buckets populated, Long.MIN_VALUE shall be returned. |
| 74 | + * Returns the highest populated bucket index, taking both negative and positive buckets into account. |
| 75 | + * If there are neither positive nor negative buckets populated, an empty optional is returned. |
37 | 76 | */ |
38 | | - long maximumBucketIndex(); |
| 77 | + OptionalLong maximumBucketIndex(); |
39 | 78 |
|
40 | 79 | /** |
41 | | - * Iterator over the non-empty buckets. |
| 80 | + * Iterator over non-empty buckets of the histogram. Can represent either the positive or negative histogram range. |
| 81 | + * <ul> |
| 82 | + * <li>The iterator always iterates from the lowest bucket index to the highest</li> |
| 83 | + * <li>The iterator never returns duplicate buckets (buckets with the same index) </li> |
| 84 | + * <li>The iterator never returns empty buckets ({@link #peekCount() is never zero}</li> |
| 85 | + * </ul> |
42 | 86 | */ |
43 | 87 | interface BucketIterator { |
| 88 | + /** |
| 89 | + * Checks if there are any buckets remaining to be visited by this iterator. |
| 90 | + * If the end has been reached, it is illegal to call {@link #peekCount()}, {@link #peekIndex()} or {@link #advance()}. |
| 91 | + * |
| 92 | + * @return <code>false</code>, if the end has been reached, <code>true</code> otherwise. |
| 93 | + */ |
44 | 94 | boolean hasNext(); |
45 | 95 |
|
| 96 | + /** |
| 97 | + * The number of items in the bucket this iterator currently points at. Does not advance the iterator by itself and therefore can be called repeatedly to return the same value. |
| 98 | + * Must not be called if {@link #hasNext()} returns <code>false</code>. |
| 99 | + * |
| 100 | + * @return the number of items in the bucket, always greater than zero |
| 101 | + */ |
46 | 102 | long peekCount(); |
47 | 103 |
|
| 104 | + /** |
| 105 | + * The index of the bucket this iterator currently points at. Does not advance the iterator by itself and therefore can be called repeatedly to return the same value. |
| 106 | + * Must not be called if {@link #hasNext()} returns <code>false</code>. |
| 107 | + * |
| 108 | + * @return the index of the bucket, guaranteed to be in the range [{@link #MIN_INDEX}, {@link #MAX_INDEX}] |
| 109 | + */ |
48 | 110 | long peekIndex(); |
49 | 111 |
|
| 112 | + /** |
| 113 | + * Moves the iterator to the next, non-empty bucket. |
| 114 | + * If {@link #hasNext()} is <code>true</code> after {@link #advance()}, {@link #peekIndex()} is guaranteed to return a value bigger than prior to the {@link #advance()} call. |
| 115 | + */ |
50 | 116 | void advance(); |
51 | 117 |
|
| 118 | + /** |
| 119 | + * Provides the scale that can be used to convert indices returned by {@link #peekIndex()} to the bucket boundaries, |
| 120 | + * e.g. via {@link ExponentialScaleUtils#getLowerBucketBoundary(long, int)}. |
| 121 | + * |
| 122 | + * @return the scale, which is guaranteed to be constant over the lifetime of this iterator. |
| 123 | + */ |
52 | 124 | int scale(); |
53 | 125 |
|
54 | 126 | BucketIterator copy(); |
55 | 127 | } |
56 | 128 |
|
| 129 | + /** |
| 130 | + * A {@link BucketIterator} which can be copied. |
| 131 | + */ |
| 132 | + interface CopyableBucketIterator extends BucketIterator { |
| 133 | + |
| 134 | + /** |
| 135 | + * Provides a bucket iterator pointing at the same bucket of the same range of buckets as this iterator. |
| 136 | + * Calling {@link #advance()} on the copied iterator does not affect <code>this</code> and vice-versa. |
| 137 | + */ |
| 138 | + CopyableBucketIterator copy(); |
| 139 | + } |
| 140 | + |
57 | 141 | } |
0 commit comments