Skip to content

Commit 7c6655b

Browse files
committed
AI-assisted javadoc and spotless
1 parent 486a8bd commit 7c6655b

File tree

13 files changed

+376
-236
lines changed

13 files changed

+376
-236
lines changed

benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
@State(Scope.Thread)
4444
public class ExponentialHistogramGenerationBench {
4545

46-
@Param({ "100", "500", "1000", "5000" , "10000", "20000"})
46+
@Param({ "100", "500", "1000", "5000", "10000", "20000" })
4747
int bucketCount;
4848

4949
@Param({ "NORMAL", "GAUSSIAN" })

libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,42 +12,52 @@
1212
import java.util.Arrays;
1313

1414
/**
15-
* Data structure for effectively computing by how much the scale of a histogram needs to be reduced to reach a target bucket count.
16-
* This works by looking at each pair of neighboring buckets and checking at which scale reduction they would collapse to a single bucket.
15+
* A data structure for efficiently computing the required scale reduction for a histogram to reach a target number of buckets.
16+
* This works by examining pairs of neighboring buckets and determining at which scale reduction they would merge into a single bucket.
1717
*/
1818
class DownscaleStats {
1919

20-
// collapsedBucketCount[i] represents the number of additional
20+
// collapsedBucketCount[i] stores the number of additional
2121
// collapsed buckets when increasing the scale by (i+1) instead of just by (i)
2222
int[] collapsedBucketCount = new int[63];
2323

24+
/**
25+
* Resets the data structure to its initial state.
26+
*/
2427
void reset() {
2528
Arrays.fill(collapsedBucketCount, 0);
2629
}
2730

2831
void add(long previousBucketIndex, long currentBucketIndex) {
2932
if (currentBucketIndex <= previousBucketIndex) {
30-
throw new IllegalArgumentException("currentBucketIndex must be bigger than previousBucketIndex");
33+
throw new IllegalArgumentException("currentBucketIndex must be greater than previousBucketIndex");
3134
}
32-
/* Below is an efficient variant of the following algorithm:
33-
for (int i=0; i<63; i++) {
34-
if (prevIndex>>(i+1) == currIndex>>(i+1)) {
35-
collapsedBucketCount[i]++;
36-
break;
37-
}
38-
}
39-
So we find the smallest scale reduction required to make the two buckets collapse into one
40-
*/
35+
/*
36+
* Below is an efficient variant of the following algorithm:
37+
* for (int i=0; i<63; i++) {
38+
* if (prevIndex>>(i+1) == currIndex>>(i+1)) {
39+
* collapsedBucketCount[i]++;
40+
* break;
41+
* }
42+
* }
43+
* So we find the smallest scale reduction required to make the two buckets collapse into one.
44+
*/
4145
long bitXor = previousBucketIndex ^ currentBucketIndex;
4246
int numEqualLeadingBits = Long.numberOfLeadingZeros(bitXor);
4347
if (numEqualLeadingBits == 0) {
44-
// right-shifting will never make the buckets combine, because one is positive and the other negative
48+
// right-shifting will never make the buckets combine, because one is positive and the other is negative
4549
return;
4650
}
4751
int requiredScaleChange = 64 - numEqualLeadingBits;
4852
collapsedBucketCount[requiredScaleChange - 1]++;
4953
}
5054

55+
/**
56+
* Returns the number of buckets that will be merged after applying the given scale reduction.
57+
*
58+
* @param reduction the scale reduction factor
59+
* @return the number of buckets that will be merged
60+
*/
5161
int getCollapsedBucketCountAfterScaleReduction(int reduction) {
5262
int totalCollapsed = 0;
5363
for (int i = 0; i < reduction; i++) {
@@ -56,6 +66,12 @@ int getCollapsedBucketCountAfterScaleReduction(int reduction) {
5666
return totalCollapsed;
5767
}
5868

69+
/**
70+
* Returns the required scale reduction to reduce the number of buckets by at least the given amount.
71+
*
72+
* @param desiredCollapsedBucketCount the target number of buckets to collapse
73+
* @return the required scale reduction
74+
*/
5975
int getRequiredScaleReductionToReduceBucketCountBy(int desiredCollapsedBucketCount) {
6076
if (desiredCollapsedBucketCount == 0) {
6177
return 0;
@@ -67,6 +83,6 @@ int getRequiredScaleReductionToReduceBucketCountBy(int desiredCollapsedBucketCou
6783
return i + 1;
6884
}
6985
}
70-
throw new IllegalArgumentException("it is not possible to reduce the bucket count by " + desiredCollapsedBucketCount);
86+
throw new IllegalArgumentException("Cannot reduce the bucket count by " + desiredCollapsedBucketCount);
7187
}
7288
}

libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java

Lines changed: 49 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -12,128 +12,136 @@
1212
import java.util.OptionalLong;
1313

1414
/**
15-
* Interface for implementations of exponential histograms adhering to the <a href="https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram">opentelemetry definition</a>.
16-
* This interface explicitly allows for sparse implementation: It does not offer to directly access buckets by index, instead it
17-
* is only possible to iterate over the buckets.<br>
15+
* Interface for implementations of exponential histograms adhering to the
16+
* <a href="https://opentelemetry.io/docs/specs/otel/metrics/data-model/#exponentialhistogram">OpenTelemetry definition</a>.
17+
* This interface supports sparse implementations, allowing iteration over buckets without requiring direct index access.<br>
1818
* The most important properties are:
1919
* <ul>
20-
* <li>The histogram has a scale parameter, which defines the accuracy. The <code>base</code> for the buckets is defined as <code>base = 2^(2^-scale)</code></li>
21-
* <li>The histogram bucket at index <code>i</code> has the range <code>(base^i, base^(i+1)]</code> </li>
22-
* <li>Negative values are represented by a separate negative range of buckets with the boundaries <code>(-base^(i+1), -base^i]</code></li>
23-
* <li>histograms are perfectly subsetting: Increasing the scale by one exactly merges each pair of neighbouring buckets</li>
24-
* <li>a special {@link ZeroBucket} is used to handle zero and close to zero values</li>
20+
* <li>The histogram has a scale parameter, which defines the accuracy.
21+
* The {@code base} for the buckets is defined as {@code base = 2^(2^-scale)}</li>
22+
* <li>The histogram bucket at index {@code i} has the range {@code (base^i, base^(i+1)]}</li>
23+
* <li>Negative values are represented by a separate negative range of buckets with the boundaries {@code (-base^(i+1), -base^i]}</li>
24+
* <li>Histograms are perfectly subsetting: increasing the scale by one merges each pair of neighboring buckets</li>
25+
* <li>A special {@link ZeroBucket} is used to handle zero and close-to-zero values</li>
2526
* </ul>
2627
*
2728
* <br>
28-
* In addition, in all algorithms we make a central assumption about the distribution of samples within each bucket:
29-
* We assume they all lie on the single point of least error relative to the bucket boundaries (see {@link ExponentialScaleUtils#getPointOfLeastRelativeError(long, int)}).
29+
* Additionally, all algorithms assume that samples within a bucket are located at a single point: the point of least relative error
30+
* (see {@link ExponentialScaleUtils#getPointOfLeastRelativeError(long, int)}).
3031
*/
3132
public interface ExponentialHistogram {
3233

33-
//TODO: support min/max/sum/count storage and merging
34-
//TODO: Add special positive and negative infinity buckets to allow representation of explicit bucket histograms with open boundaries
34+
// TODO: support min/max/sum/count storage and merging
35+
// TODO: Add special positive and negative infinity buckets to allow representation of explicit bucket histograms with open boundaries
3536

36-
// scale of 38 is the largest scale where at the borders we don't run into problems due to floating point precision when computing
37-
// indices for double values
38-
// Theoretically, a MAX_SCALE of 51 would work and would still cover the entire range of double values
39-
// For that to work, we'll have to rework the math of converting from double to indices and back
37+
// A scale of 38 is the largest scale where we don't run into problems at the borders due to floating-point precision when computing
38+
// indices for double values.
39+
// Theoretically, a MAX_SCALE of 51 would work and would still cover the entire range of double values.
40+
// For that to work, the math for converting from double to indices and back would need to be reworked.
4041
// One option would be to use "Quadruple": https://github.com/m-vokhm/Quadruple
4142
int MAX_SCALE = 38;
4243

43-
// At this scale all double values already fall into a single bucket
44+
// At this scale, all double values fall into a single bucket.
4445
int MIN_SCALE = -11;
4546

46-
// Only use 62 bit at max to allow to compute the difference between the smallest and largest index without causing overflow
47-
// Also the extra bit gives us room for some tricks for compact storage
47+
// Only use 62 bits at max to allow computing the difference between the smallest and largest index without causing an overflow.
48+
// The extra bit also provides room for compact storage tricks.
4849
int MAX_INDEX_BITS = 62;
4950
long MAX_INDEX = (1L << MAX_INDEX_BITS) - 1;
5051
long MIN_INDEX = -MAX_INDEX;
5152

5253
/**
53-
* The scale of the histogram. Higher scales result in higher accuracy, but potentially higher bucket count.
54+
* The scale of the histogram. Higher scales result in higher accuracy but potentially more buckets.
5455
* Must be less than or equal to {@link #MAX_SCALE} and greater than or equal to {@link #MIN_SCALE}.
56+
*
57+
* @return the scale of the histogram
5558
*/
5659
int scale();
5760

5861
/**
59-
* @return the {@link ZeroBucket} representing the number of zero (or close to zero) values and its threshold
62+
* @return the {@link ZeroBucket} representing the number of zero (or close-to-zero) values and its threshold
6063
*/
6164
ZeroBucket zeroBucket();
6265

6366
/**
64-
* @return a {@link BucketIterator} for the populated, positive buckets of this histogram. {@link BucketIterator#scale()} of the return value must return the same value as {@link #scale()}.
67+
* @return a {@link BucketIterator} for the populated, positive buckets of this histogram.
68+
* The {@link BucketIterator#scale()} of the returned iterator must be the same as {@link #scale()}.
6569
*/
6670
CopyableBucketIterator positiveBuckets();
6771

6872
/**
69-
* @return a {@link BucketIterator} for the populated, negative buckets of this histogram. {@link BucketIterator#scale()} of the return value must return the same value as {@link #scale()}.
73+
* @return a {@link BucketIterator} for the populated, negative buckets of this histogram.
74+
* The {@link BucketIterator#scale()} of the returned iterator must be the same as {@link #scale()}.
7075
*/
7176
CopyableBucketIterator negativeBuckets();
7277

7378
/**
7479
* Returns the highest populated bucket index, taking both negative and positive buckets into account.
75-
* If there are neither positive nor negative buckets populated, an empty optional is returned.
80+
*
81+
* @return the highest populated bucket index, or an empty optional if no buckets are populated
7682
*/
7783
OptionalLong maximumBucketIndex();
7884

7985
/**
80-
* Iterator over non-empty buckets of the histogram. Can represent either the positive or negative histogram range.
86+
* An iterator over the non-empty buckets of the histogram for either the positive or negative range.
8187
* <ul>
82-
* <li>The iterator always iterates from the lowest bucket index to the highest</li>
83-
* <li>The iterator never returns duplicate buckets (buckets with the same index) </li>
84-
* <li>The iterator never returns empty buckets ({@link #peekCount() is never zero}</li>
88+
* <li>The iterator always iterates from the lowest bucket index to the highest.</li>
89+
* <li>The iterator never returns duplicate buckets (buckets with the same index).</li>
90+
* <li>The iterator never returns empty buckets ({@link #peekCount()} is never zero).</li>
8591
* </ul>
8692
*/
8793
interface BucketIterator {
8894
/**
8995
* Checks if there are any buckets remaining to be visited by this iterator.
90-
* If the end has been reached, it is illegal to call {@link #peekCount()}, {@link #peekIndex()} or {@link #advance()}.
96+
* If the end has been reached, it is illegal to call {@link #peekCount()}, {@link #peekIndex()}, or {@link #advance()}.
9197
*
92-
* @return <code>false</code>, if the end has been reached, <code>true</code> otherwise.
98+
* @return {@code true} if the iterator has more elements, {@code false} otherwise
9399
*/
94100
boolean hasNext();
95101

96102
/**
97-
* The number of items in the bucket this iterator currently points at. Does not advance the iterator by itself and therefore can be called repeatedly to return the same value.
98-
* Must not be called if {@link #hasNext()} returns <code>false</code>.
103+
* The number of items in the bucket at the current iterator position. Does not advance the iterator.
104+
* Must not be called if {@link #hasNext()} returns {@code false}.
99105
*
100106
* @return the number of items in the bucket, always greater than zero
101107
*/
102108
long peekCount();
103109

104110
/**
105-
* The index of the bucket this iterator currently points at. Does not advance the iterator by itself and therefore can be called repeatedly to return the same value.
106-
* Must not be called if {@link #hasNext()} returns <code>false</code>.
111+
* The index of the bucket at the current iterator position. Does not advance the iterator.
112+
* Must not be called if {@link #hasNext()} returns {@code false}.
107113
*
108114
* @return the index of the bucket, guaranteed to be in the range [{@link #MIN_INDEX}, {@link #MAX_INDEX}]
109115
*/
110116
long peekIndex();
111117

112118
/**
113119
* Moves the iterator to the next, non-empty bucket.
114-
* If {@link #hasNext()} is <code>true</code> after {@link #advance()}, {@link #peekIndex()} is guaranteed to return a value bigger than prior to the {@link #advance()} call.
120+
* If {@link #hasNext()} is {@code true} after calling {@link #advance()}, {@link #peekIndex()} is guaranteed to return a value
121+
* greater than the value returned prior to the {@link #advance()} call.
115122
*/
116123
void advance();
117124

118125
/**
119126
* Provides the scale that can be used to convert indices returned by {@link #peekIndex()} to the bucket boundaries,
120-
* e.g. via {@link ExponentialScaleUtils#getLowerBucketBoundary(long, int)}.
127+
* e.g., via {@link ExponentialScaleUtils#getLowerBucketBoundary(long, int)}.
121128
*
122-
* @return the scale, which is guaranteed to be constant over the lifetime of this iterator.
129+
* @return the scale, which is guaranteed to be constant over the lifetime of this iterator
123130
*/
124131
int scale();
125132
}
126133

127134
/**
128-
* A {@link BucketIterator} which can be copied.
135+
* A {@link BucketIterator} that can be copied.
129136
*/
130137
interface CopyableBucketIterator extends BucketIterator {
131138

132139
/**
133-
* Provides a bucket iterator pointing at the same bucket of the same range of buckets as this iterator.
134-
* Calling {@link #advance()} on the copied iterator does not affect <code>this</code> and vice-versa.
140+
* Creates a copy of this bucket iterator, pointing at the same bucket of the same range of buckets.
141+
* Calling {@link #advance()} on the copied iterator does not affect this instance and vice-versa.
142+
*
143+
* @return a copy of this iterator
135144
*/
136145
CopyableBucketIterator copy();
137146
}
138-
139147
}

libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,17 @@
1515
import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex;
1616

1717
/**
18-
* Class for accumulating raw values into an {@link ExponentialHistogram} with a given maximum bucket count.
18+
* A class for accumulating raw values into an {@link ExponentialHistogram} with a given maximum number of buckets.
1919
*
20-
* If the number of values is less than or equal the bucket capacity, the resulting histogram is guaranteed
21-
* to represent the exact raw values with a relative error less than <code>2^(2^-MAX_SCALE) - 1</code>
20+
* If the number of values is less than or equal to the bucket capacity, the resulting histogram is guaranteed
21+
* to represent the exact raw values with a relative error less than {@code 2^(2^-MAX_SCALE) - 1}.
2222
*/
2323
public class ExponentialHistogramGenerator {
2424

25-
// Merging individual values into a histogram would way to slow with our sparse, array-backed histogram representation
26-
// Therefore for a bucket capacity of c, we first buffer c raw values to be inserted
27-
// we then turn those into an "exact" histogram, which in turn we merge with our actual result accumulator
28-
// This yields an amortized runtime of O( log(c) )
25+
// Merging individual values into a histogram would be way too slow with our sparse, array-backed histogram representation.
26+
// Therefore, for a bucket capacity of c, we first buffer c raw values to be inserted.
27+
// We then turn those into an "exact" histogram, which in turn we merge with our actual result accumulator.
28+
// This yields an amortized runtime of O(log(c)).
2929
private final double[] rawValueBuffer;
3030
int valueCount;
3131

@@ -34,6 +34,11 @@ public class ExponentialHistogramGenerator {
3434

3535
private boolean isFinished = false;
3636

37+
/**
38+
* Creates a new instance with the specified maximum number of buckets.
39+
*
40+
* @param maxBucketCount the maximum number of buckets for the generated histogram
41+
*/
3742
public ExponentialHistogramGenerator(int maxBucketCount) {
3843
rawValueBuffer = new double[maxBucketCount];
3944
valueCount = 0;
@@ -42,8 +47,10 @@ public ExponentialHistogramGenerator(int maxBucketCount) {
4247
}
4348

4449
/**
45-
* Add the given value to the histogram.
46-
* Must not be calles after {@link #get()} has been called.
50+
* Adds the given value to the histogram.
51+
* Must not be called after {@link #get()} has been called.
52+
*
53+
* @param value the value to add
4754
*/
4855
public void add(double value) {
4956
if (isFinished) {
@@ -57,7 +64,9 @@ public void add(double value) {
5764
}
5865

5966
/**
60-
* @return the histogram representing the distribution of all accumulated values.
67+
* Returns the histogram representing the distribution of all accumulated values.
68+
*
69+
* @return the histogram representing the distribution of all accumulated values
6170
*/
6271
public ExponentialHistogram get() {
6372
isFinished = true;
@@ -66,17 +75,25 @@ public ExponentialHistogram get() {
6675
}
6776

6877
/**
69-
* Create a histogram representing the distribution of the given values.
78+
* Creates a histogram representing the distribution of the given values.
7079
* The histogram will have a bucket count of at most the length of the provided array
71-
* and will have a relative error less than <code>2^(2^-MAX_SCALE) - 1</code>.
80+
* and will have a relative error less than {@code 2^(2^-MAX_SCALE) - 1}.
81+
*
82+
* @param values the values to be added to the histogram
83+
* @return a new {@link ExponentialHistogram}
7284
*/
7385
public static ExponentialHistogram createFor(double... values) {
7486
return createFor(values.length, Arrays.stream(values));
7587
}
88+
7689
/**
77-
* Create a histogram representing the distribution of the given values with at most the given number of buckets.
78-
* If the given bucketCount is greater or equal to the number of values, the resulting histogram will have a
79-
* relative error of less than <code>2^(2^-MAX_SCALE) - 1</code>.
90+
* Creates a histogram representing the distribution of the given values with at most the given number of buckets.
91+
* If the given bucketCount is greater than or equal to the number of values, the resulting histogram will have a
92+
* relative error of less than {@code 2^(2^-MAX_SCALE) - 1}.
93+
*
94+
* @param bucketCount the maximum number of buckets
95+
* @param values a stream of values to be added to the histogram
96+
* @return a new {@link ExponentialHistogram}
8097
*/
8198
public static ExponentialHistogram createFor(int bucketCount, DoubleStream values) {
8299
ExponentialHistogramGenerator generator = new ExponentialHistogramGenerator(bucketCount);

0 commit comments

Comments
 (0)