Skip to content

Commit cab3fdf

Browse files
committed
Clean up, bug fixes and javadoc
1 parent e6924e9 commit cab3fdf

21 files changed

+887
-576
lines changed

gradle/verification-metadata.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@
6666
<sha256 value="3366d2c88fb576e486d830f521184e8f1839f8c15dcd2151a3f6e1f62b0b37a0" origin="Generated by Gradle"/>
6767
</artifact>
6868
</component>
69+
<component group="ch.obermuhlner" name="big-math" version="2.3.2">
70+
<artifact name="big-math-2.3.2.jar">
71+
<sha256 value="693e1bb7c7f5184b448f03c2a2c0c45d07d8e89e4641fdc31ab0a8057027f43d" origin="Generated by Gradle"/>
72+
</artifact>
73+
</component>
6974
<component group="ch.randelshofer" name="fastdoubleparser" version="0.8.0">
7075
<artifact name="fastdoubleparser-0.8.0.jar">
7176
<sha256 value="10fe288fd7a2cdaf5175332b73529f9abf8fd54dcfff317d6967c0c35ffb133b" origin="Generated by Gradle"/>

libs/exponential-histogram/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,6 @@ apply plugin: 'elasticsearch.build'
1313

1414
dependencies {
1515
testImplementation(project(":test:framework"))
16+
testImplementation('ch.obermuhlner:big-math:2.3.2')
1617
testImplementation('org.apache.commons:commons-math3:3.6.1')
1718
}

libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,28 @@
99

1010
package org.elasticsearch.exponentialhistogram;
1111

12-
public class DownscaleStats {
12+
import java.util.Arrays;
1313

14-
// collapsedCount[i] represents the number of assitional
15-
// collapsed buckets when increasing the scale by (i+1) instead of (i)
16-
int[] collapsedCount = new int[63];
14+
/**
15+
* Data structure for effectively computing by how much the scale of a histogram needs to be reduced to reach a target bucket count.
16+
* This works by looking at each pair of neighboring buckets and checking at which scale reduction they would collapse to a single bucket.
17+
*/
18+
class DownscaleStats {
19+
20+
// collapsedBucketCount[i] represents the number of additional
21+
// collapsed buckets when increasing the scale by (i+1) instead of just by (i)
22+
int[] collapsedBucketCount = new int[63];
23+
24+
void reset() {
25+
Arrays.fill(collapsedBucketCount, 0);
26+
}
1727

1828
void add(long previousBucketIndex, long currentBucketIndex) {
1929
if (currentBucketIndex <= previousBucketIndex) {
2030
throw new IllegalArgumentException("currentBucketIndex must be bigger than previousBucketIndex");
2131
}
2232
/* Below is an efficient variant of the following algorithm:
23-
for (int i=0; i<64; i++) {
33+
for (int i=0; i<63; i++) {
2434
if (prevIndex>>(i+1) == currIndex>>(i+1)) {
2535
collapsedBucketCount[i]++;
2636
break;
@@ -35,28 +45,28 @@ void add(long previousBucketIndex, long currentBucketIndex) {
3545
return;
3646
}
3747
int requiredScaleChange = 64 - numEqualLeadingBits;
38-
collapsedCount[requiredScaleChange - 1]++;
48+
collapsedBucketCount[requiredScaleChange - 1]++;
3949
}
4050

4151
int getCollapsedBucketCountAfterScaleReduction(int reduction) {
4252
int totalCollapsed = 0;
4353
for (int i = 0; i < reduction; i++) {
44-
totalCollapsed += collapsedCount[i];
54+
totalCollapsed += collapsedBucketCount[i];
4555
}
4656
return totalCollapsed;
4757
}
4858

49-
public int getRequiredScaleReductionToReduceBucketCountBy(int desiredReduction) {
50-
if (desiredReduction == 0) {
59+
int getRequiredScaleReductionToReduceBucketCountBy(int desiredCollapsedBucketCount) {
60+
if (desiredCollapsedBucketCount == 0) {
5161
return 0;
5262
}
5363
int totalCollapsed = 0;
54-
for (int i = 0; i < collapsedCount.length; i++) {
55-
totalCollapsed += collapsedCount[i];
56-
if (totalCollapsed >= desiredReduction) {
64+
for (int i = 0; i < collapsedBucketCount.length; i++) {
65+
totalCollapsed += collapsedBucketCount[i];
66+
if (totalCollapsed >= desiredCollapsedBucketCount) {
5767
return i + 1;
5868
}
5969
}
60-
throw new IllegalArgumentException("it is not possible to reduce the bucket count by " + desiredReduction);
70+
throw new IllegalArgumentException("it is not possible to reduce the bucket count by " + desiredCollapsedBucketCount);
6171
}
6272
}

libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java

Lines changed: 93 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,49 +9,133 @@
99

1010
package org.elasticsearch.exponentialhistogram;
1111

12+
import java.util.OptionalLong;
13+
14+
/**
15+
* Interface for implementations of exponential histograms adhering to the <a href="https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram">opentelemetry definition</a>.
16+
* This interface explicitly allows for sparse implementation: It does not offer to directly access buckets by index, instead it
17+
* is only possible to iterate over the buckets.<br>
18+
* The most important properties are:
19+
* <ul>
20+
* <li>The histogram has a scale parameter, which defines the accuracy. The <code>base</code> for the buckets is defined as <code>base = 2^(2^-scale)</code></li>
21+
* <li>The histogram bucket at index <code>i</code> has the range <code>(base^i, base^(i+1)]</code> </li>
22+
* <li>Negative values are represented by a separate negative range of buckets with the boundaries <code>(-base^(i+1), -base^i]</code></li>
23+
* <li>histograms are perfectly subsetting: Increasing the scale by one exactly merges each pair of neighbouring buckets</li>
24+
* <li>a special {@link ZeroBucket} is used to handle zero and close to zero values</li>
25+
* </ul>
26+
*
27+
* <br>
28+
* In addition, in all algorithms we make a central assumption about the distribution of samples within each bucket:
29+
* We assume they all lie on the single point of least error relative to the bucket boundaries (see {@link ExponentialScaleUtils#getPointOfLeastRelativeError(long, int)}).
30+
*/
1231
public interface ExponentialHistogram {
1332

14-
// scale of 38 is the largest scale where at the borders we don't run into problems due to floating point precision
15-
// theoretically, a MAX_SCALE of 51 would work and would still cover the entire range of double values
16-
// if we want to use something larger, we'll have to rework the math of converting from double to indices and back
33+
//TODO: support min/max/sum/count storage and merging
34+
//TODO: Add special positive and negative infinity buckets to allow representation of explicit bucket histograms with open boundaries
35+
36+
// scale of 38 is the largest scale where at the borders we don't run into problems due to floating point precision when computing
37+
// indices for double values
38+
// Theoretically, a MAX_SCALE of 51 would work and would still cover the entire range of double values
39+
// For that to work, we'll have to rework the math of converting from double to indices and back
1740
// One option would be to use "Quadruple": https://github.com/m-vokhm/Quadruple
1841
int MAX_SCALE = 38;
1942

43+
// Add this scale all double values already fall into a single bucket
44+
int MIN_SCALE = -11;
45+
2046
// Only use 62 bit at max to allow to compute the difference between the smallest and largest index without causing overflow
2147
// Also the extra bit gives us room for some tricks for compact storage
2248
int MAX_INDEX_BITS = 62;
2349
long MAX_INDEX = (1L << MAX_INDEX_BITS) - 1;
2450
long MIN_INDEX = -MAX_INDEX;
2551

52+
/**
53+
* The scale of the histogram. Higher scales result in higher accuracy, but potentially higher bucket count.
54+
* Must be less than or equal to {@link #MAX_SCALE} and greater than or equal to {@link #MIN_SCALE}.
55+
*/
2656
int scale();
2757

58+
/**
59+
* @return the {@link ZeroBucket} representing the number of zero (or close to zero) values and its threshold
60+
*/
2861
ZeroBucket zeroBucket();
2962

30-
BucketIterator positiveBuckets();
63+
/**
64+
* @return a {@link BucketIterator} for the populated, positive buckets of this histogram. {@link BucketIterator#scale()} of the return value must return the same value as {@link #scale()}.
65+
*/
66+
CopyableBucketIterator positiveBuckets();
3167

32-
BucketIterator negativeBuckets();
68+
/**
69+
* @return a {@link BucketIterator} for the populated, negative buckets of this histogram. {@link BucketIterator#scale()} of the return value must return the same value as {@link #scale()}.
70+
*/
71+
CopyableBucketIterator negativeBuckets();
3372

3473
/**
35-
* Returns the highest populated bucket index, taking both negative and positive buckets into account;
36-
* If there are no buckets populated, Long.MIN_VALUE shall be returned.
74+
* Returns the highest populated bucket index, taking both negative and positive buckets into account.
75+
* If there are neither positive nor negative buckets populated, an empty optional is returned.
3776
*/
38-
long maximumBucketIndex();
77+
OptionalLong maximumBucketIndex();
3978

4079
/**
41-
* Iterator over the non-empty buckets.
80+
* Iterator over non-empty buckets of the histogram. Can represent either the positive or negative histogram range.
81+
* <ul>
82+
* <li>The iterator always iterates from the lowest bucket index to the highest</li>
83+
* <li>The iterator never returns duplicate buckets (buckets with the same index) </li>
84+
* <li>The iterator never returns empty buckets ({@link #peekCount() is never zero}</li>
85+
* </ul>
4286
*/
4387
interface BucketIterator {
88+
/**
89+
* Checks if there are any buckets remaining to be visited by this iterator.
90+
* If the end has been reached, it is illegal to call {@link #peekCount()}, {@link #peekIndex()} or {@link #advance()}.
91+
*
92+
* @return <code>false</code>, if the end has been reached, <code>true</code> otherwise.
93+
*/
4494
boolean hasNext();
4595

96+
/**
97+
* The number of items in the bucket this iterator currently points at. Does not advance the iterator by itself and therefore can be called repeatedly to return the same value.
98+
* Must not be called if {@link #hasNext()} returns <code>false</code>.
99+
*
100+
* @return the number of items in the bucket, always greater than zero
101+
*/
46102
long peekCount();
47103

104+
/**
105+
* The index of the bucket this iterator currently points at. Does not advance the iterator by itself and therefore can be called repeatedly to return the same value.
106+
* Must not be called if {@link #hasNext()} returns <code>false</code>.
107+
*
108+
* @return the index of the bucket, guaranteed to be in the range [{@link #MIN_INDEX}, {@link #MAX_INDEX}]
109+
*/
48110
long peekIndex();
49111

112+
/**
113+
* Moves the iterator to the next, non-empty bucket.
114+
* If {@link #hasNext()} is <code>true</code> after {@link #advance()}, {@link #peekIndex()} is guaranteed to return a value bigger than prior to the {@link #advance()} call.
115+
*/
50116
void advance();
51117

118+
/**
119+
* Provides the scale that can be used to convert indices returned by {@link #peekIndex()} to the bucket boundaries,
120+
* e.g. via {@link ExponentialScaleUtils#getLowerBucketBoundary(long, int)}.
121+
*
122+
* @return the scale, which is guaranteed to be constant over the lifetime of this iterator.
123+
*/
52124
int scale();
53125

54126
BucketIterator copy();
55127
}
56128

129+
/**
130+
* A {@link BucketIterator} which can be copied.
131+
*/
132+
interface CopyableBucketIterator extends BucketIterator {
133+
134+
/**
135+
* Provides a bucket iterator pointing at the same bucket of the same range of buckets as this iterator.
136+
* Calling {@link #advance()} on the copied iterator does not affect <code>this</code> and vice-versa.
137+
*/
138+
CopyableBucketIterator copy();
139+
}
140+
57141
}

libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,28 +12,39 @@
1212
import java.util.Arrays;
1313
import java.util.stream.DoubleStream;
1414

15-
import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.computeIndex;
15+
import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex;
1616

1717
/**
18-
* Class for generating a histogram from raw values.
18+
* Class for accumulating raw values into an {@link ExponentialHistogram} with a given maximum bucket count.
19+
*
20+
* If the number of values is less than or equal the bucket capacity, the resulting histogram is guaranteed
21+
* to represent the exact raw values with a relative error less than <code>2^(2^-MAX_SCALE) - 1</code>
1922
*/
2023
public class ExponentialHistogramGenerator {
2124

25+
// Merging individual values into a histogram would way to slow with our sparse, array-backed histogram representation
26+
// Therefore for a bucket capacity of c, we first buffer c raw values to be inserted
27+
// we then turn those into an "exact" histogram, which in turn we merge with our actual result accumulator
28+
// This yields an amortized runtime of O( log(c) )
2229
private final double[] rawValueBuffer;
2330
int valueCount;
2431

2532
private final ExponentialHistogramMerger resultMerger;
26-
private final FixedSizeExponentialHistogram valueBuffer;
33+
private final FixedCapacityExponentialHistogram valueBuffer;
2734

2835
private boolean isFinished = false;
2936

30-
public ExponentialHistogramGenerator(int numBuckets) {
31-
rawValueBuffer = new double[numBuckets];
37+
public ExponentialHistogramGenerator(int maxBucketCount) {
38+
rawValueBuffer = new double[maxBucketCount];
3239
valueCount = 0;
33-
valueBuffer = new FixedSizeExponentialHistogram(numBuckets);
34-
resultMerger = new ExponentialHistogramMerger(numBuckets);
40+
valueBuffer = new FixedCapacityExponentialHistogram(maxBucketCount);
41+
resultMerger = new ExponentialHistogramMerger(maxBucketCount);
3542
}
3643

44+
/**
45+
* Add the given value to the histogram.
46+
* Must not be calles after {@link #get()} has been called.
47+
*/
3748
public void add(double value) {
3849
if (isFinished) {
3950
throw new IllegalStateException("get() has already been called");
@@ -45,19 +56,28 @@ public void add(double value) {
4556
valueCount++;
4657
}
4758

59+
/**
60+
* @return the histogram representing the distribution of all accumulated values.
61+
*/
4862
public ExponentialHistogram get() {
49-
if (isFinished) {
50-
throw new IllegalStateException("get() has already been called");
51-
}
5263
isFinished = true;
5364
mergeValuesToHistogram();
5465
return resultMerger.get();
5566
}
5667

68+
/**
69+
* Create a histogram representing the distribution of the given values.
70+
* The histogram will have a bucket count of at most the length of the provided array
71+
* and will have a relative error less than <code>2^(2^-MAX_SCALE) - 1</code>.
72+
*/
5773
public static ExponentialHistogram createFor(double... values) {
5874
return createFor(values.length, Arrays.stream(values));
5975
}
60-
76+
/**
77+
* Create a histogram representing the distribution of the given values with at most the given number of buckets.
78+
* If the given bucketCount is greater or equal to the number of values, the resulting histogram will have a
79+
* relative error of less than <code>2^(2^-MAX_SCALE) - 1</code>.
80+
*/
6181
public static ExponentialHistogram createFor(int bucketCount, DoubleStream values) {
6282
ExponentialHistogramGenerator generator = new ExponentialHistogramGenerator(bucketCount);
6383
values.forEach(generator::add);

0 commit comments

Comments
 (0)