Skip to content

Commit 4b9175e

Browse files
authored
Optimize exponential histogram builder for in-order construction (#135836)
1 parent f9c72c1 commit 4b9175e

File tree

5 files changed

+438
-64
lines changed

5 files changed

+438
-64
lines changed

libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,20 @@ interface Buckets {
144144
*/
145145
long valueCount();
146146

147+
/**
148+
* Returns the number of buckets. Note that this operation might require iterating over all buckets, and therefore is not cheap.
149+
* @return the number of buckets
150+
*/
151+
default int bucketCount() {
152+
int count = 0;
153+
BucketIterator it = iterator();
154+
while (it.hasNext()) {
155+
count++;
156+
it.advance();
157+
}
158+
return count;
159+
}
160+
147161
}
148162

149163
/**

libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramBuilder.java

Lines changed: 155 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,17 @@
2121

2222
package org.elasticsearch.exponentialhistogram;
2323

24+
import org.elasticsearch.core.Releasable;
2425
import org.elasticsearch.core.Releasables;
2526

2627
import java.util.TreeMap;
2728

2829
/**
2930
* A builder for building a {@link ReleasableExponentialHistogram} directly from buckets.
30-
* Note that this class is not optimized regarding memory allocations or performance, so it is not intended for high-throughput usage.
3131
*/
32-
public class ExponentialHistogramBuilder {
32+
public class ExponentialHistogramBuilder implements Releasable {
33+
34+
private static final int DEFAULT_ESTIMATED_BUCKET_COUNT = 32;
3335

3436
private final ExponentialHistogramCircuitBreaker breaker;
3537

@@ -39,8 +41,16 @@ public class ExponentialHistogramBuilder {
3941
private Double min;
4042
private Double max;
4143

42-
private final TreeMap<Long, Long> negativeBuckets = new TreeMap<>();
43-
private final TreeMap<Long, Long> positiveBuckets = new TreeMap<>();
44+
private int estimatedBucketCount = DEFAULT_ESTIMATED_BUCKET_COUNT;
45+
46+
// If the buckets are provided in order, we directly build the histogram to avoid unnecessary copies and allocations
47+
// If a bucket is received out of order, we fallback to storing the buckets in the TreeMaps and build the histogram at the end.
48+
private FixedCapacityExponentialHistogram result;
49+
// Visible for testing to ensure that the low-allocation path is taken for ordered buckets
50+
TreeMap<Long, Long> negativeBuckets;
51+
TreeMap<Long, Long> positiveBuckets;
52+
53+
private boolean resultAlreadyReturned = false;
4454

4555
ExponentialHistogramBuilder(int scale, ExponentialHistogramCircuitBreaker breaker) {
4656
this.breaker = breaker;
@@ -53,6 +63,7 @@ public class ExponentialHistogramBuilder {
5363
sum(toCopy.sum());
5464
min(toCopy.min());
5565
max(toCopy.max());
66+
estimatedBucketCount(toCopy.negativeBuckets().bucketCount() + toCopy.positiveBuckets().bucketCount());
5667
BucketIterator negBuckets = toCopy.negativeBuckets().iterator();
5768
while (negBuckets.hasNext()) {
5869
setNegativeBucket(negBuckets.peekIndex(), negBuckets.peekCount());
@@ -65,6 +76,19 @@ public class ExponentialHistogramBuilder {
6576
}
6677
}
6778

79+
/**
80+
* If known, sets the estimated total number of buckets to minimize unnecessary allocations.
81+
* Only has an effect if invoked before the first call to
82+
* {@link #setPositiveBucket(long, long)} and {@link #setNegativeBucket(long, long)}.
83+
*
84+
* @param totalBuckets the total number of buckets expected to be added
85+
* @return the builder
86+
*/
87+
public ExponentialHistogramBuilder estimatedBucketCount(int totalBuckets) {
88+
estimatedBucketCount = totalBuckets;
89+
return this;
90+
}
91+
6892
public ExponentialHistogramBuilder scale(int scale) {
6993
this.scale = scale;
7094
return this;
@@ -106,69 +130,160 @@ public ExponentialHistogramBuilder max(double max) {
106130
}
107131

108132
/**
109-
* Sets the given bucket of the positive buckets.
110-
* Buckets may be set in arbitrary order. If the bucket already exists, it will be replaced.
133+
* Sets the given bucket of the positive buckets. If the bucket already exists, it will be replaced.
134+
* Buckets may be set in arbitrary order. However, for best performance and minimal allocations,
135+
* buckets should be set in order of increasing index and all negative buckets should be set before positive buckets.
111136
*
112137
* @param index the index of the bucket
113138
* @param count the count of the bucket, must be at least 1
114139
* @return the builder
115140
*/
116141
public ExponentialHistogramBuilder setPositiveBucket(long index, long count) {
117-
if (count < 1) {
118-
throw new IllegalArgumentException("Bucket count must be at least 1");
119-
}
120-
positiveBuckets.put(index, count);
142+
setBucket(index, count, true);
121143
return this;
122144
}
123145

124146
/**
125-
* Sets the given bucket of the negative buckets.
126-
* Buckets may be set in arbitrary order. If the bucket already exists, it will be replaced.
147+
* Sets the given bucket of the negative buckets. If the bucket already exists, it will be replaced.
148+
* Buckets may be set in arbitrary order. However, for best performance and minimal allocations,
149+
* buckets should be set in order of increasing index and all negative buckets should be set before positive buckets.
127150
*
128151
* @param index the index of the bucket
129152
* @param count the count of the bucket, must be at least 1
130153
* @return the builder
131154
*/
132155
public ExponentialHistogramBuilder setNegativeBucket(long index, long count) {
156+
setBucket(index, count, false);
157+
return this;
158+
}
159+
160+
private void setBucket(long index, long count, boolean isPositive) {
133161
if (count < 1) {
134162
throw new IllegalArgumentException("Bucket count must be at least 1");
135163
}
136-
negativeBuckets.put(index, count);
137-
return this;
164+
if (negativeBuckets == null && positiveBuckets == null) {
165+
// so far, all received buckets were in order, try to directly build the result
166+
if (result == null) {
167+
// Initialize the result buffer if required
168+
reallocateResultWithCapacity(estimatedBucketCount, false);
169+
}
170+
if ((isPositive && result.wasLastAddedBucketPositive() == false)
171+
|| (isPositive == result.wasLastAddedBucketPositive() && index > result.getLastAddedBucketIndex())) {
172+
// the new bucket is in order too, we can directly add the bucket
173+
addBucketToResult(index, count, isPositive);
174+
return;
175+
}
176+
}
177+
// fallback to TreeMap if a bucket is received out of order
178+
initializeBucketTreeMapsIfNeeded();
179+
if (isPositive) {
180+
positiveBuckets.put(index, count);
181+
} else {
182+
negativeBuckets.put(index, count);
183+
}
184+
}
185+
186+
private void initializeBucketTreeMapsIfNeeded() {
187+
if (negativeBuckets == null) {
188+
negativeBuckets = new TreeMap<>();
189+
positiveBuckets = new TreeMap<>();
190+
// copy existing buckets to the maps
191+
if (result != null) {
192+
BucketIterator it = result.negativeBuckets().iterator();
193+
while (it.hasNext()) {
194+
negativeBuckets.put(it.peekIndex(), it.peekCount());
195+
it.advance();
196+
}
197+
it = result.positiveBuckets().iterator();
198+
while (it.hasNext()) {
199+
positiveBuckets.put(it.peekIndex(), it.peekCount());
200+
it.advance();
201+
}
202+
}
203+
}
204+
}
205+
206+
private void addBucketToResult(long index, long count, boolean isPositive) {
207+
if (resultAlreadyReturned) {
208+
// we cannot modify the result anymore, create a new one
209+
reallocateResultWithCapacity(result.getCapacity(), true);
210+
}
211+
assert resultAlreadyReturned == false;
212+
boolean sufficientCapacity = result.tryAddBucket(index, count, isPositive);
213+
if (sufficientCapacity == false) {
214+
int newCapacity = Math.max(result.getCapacity() * 2, DEFAULT_ESTIMATED_BUCKET_COUNT);
215+
reallocateResultWithCapacity(newCapacity, true);
216+
boolean bucketAdded = result.tryAddBucket(index, count, isPositive);
217+
assert bucketAdded : "Output histogram should have enough capacity";
218+
}
219+
}
220+
221+
private void reallocateResultWithCapacity(int newCapacity, boolean copyBucketsFromPreviousResult) {
222+
FixedCapacityExponentialHistogram newResult = FixedCapacityExponentialHistogram.create(newCapacity, breaker);
223+
if (copyBucketsFromPreviousResult && result != null) {
224+
BucketIterator it = result.negativeBuckets().iterator();
225+
while (it.hasNext()) {
226+
boolean added = newResult.tryAddBucket(it.peekIndex(), it.peekCount(), false);
227+
assert added : "Output histogram should have enough capacity";
228+
it.advance();
229+
}
230+
it = result.positiveBuckets().iterator();
231+
while (it.hasNext()) {
232+
boolean added = newResult.tryAddBucket(it.peekIndex(), it.peekCount(), true);
233+
assert added : "Output histogram should have enough capacity";
234+
it.advance();
235+
}
236+
}
237+
if (result != null && resultAlreadyReturned == false) {
238+
Releasables.close(result);
239+
}
240+
resultAlreadyReturned = false;
241+
result = newResult;
138242
}
139243

140244
public ReleasableExponentialHistogram build() {
141-
FixedCapacityExponentialHistogram result = FixedCapacityExponentialHistogram.create(
142-
negativeBuckets.size() + positiveBuckets.size(),
143-
breaker
144-
);
145-
boolean success = false;
146-
try {
245+
if (resultAlreadyReturned) {
246+
// result was already returned on a previous call, return a new instance
247+
reallocateResultWithCapacity(result.getCapacity(), true);
248+
}
249+
assert resultAlreadyReturned == false;
250+
if (negativeBuckets != null) {
251+
// copy buckets from tree maps into result
252+
reallocateResultWithCapacity(negativeBuckets.size() + positiveBuckets.size(), false);
147253
result.resetBuckets(scale);
148-
result.setZeroBucket(zeroBucket);
149254
negativeBuckets.forEach((index, count) -> result.tryAddBucket(index, count, false));
150255
positiveBuckets.forEach((index, count) -> result.tryAddBucket(index, count, true));
151-
152-
double sumVal = (sum != null)
153-
? sum
154-
: ExponentialHistogramUtils.estimateSum(result.negativeBuckets().iterator(), result.positiveBuckets().iterator());
155-
double minVal = (min != null)
156-
? min
157-
: ExponentialHistogramUtils.estimateMin(zeroBucket, result.negativeBuckets(), result.positiveBuckets()).orElse(Double.NaN);
158-
double maxVal = (max != null)
159-
? max
160-
: ExponentialHistogramUtils.estimateMax(zeroBucket, result.negativeBuckets(), result.positiveBuckets()).orElse(Double.NaN);
161-
162-
result.setMin(minVal);
163-
result.setMax(maxVal);
164-
result.setSum(sumVal);
165-
166-
success = true;
167-
} finally {
168-
if (success == false) {
169-
Releasables.close(result);
256+
} else {
257+
if (result == null) {
258+
// no buckets were added
259+
reallocateResultWithCapacity(0, false);
170260
}
261+
result.setScale(scale);
171262
}
263+
264+
result.setZeroBucket(zeroBucket);
265+
double sumVal = (sum != null)
266+
? sum
267+
: ExponentialHistogramUtils.estimateSum(result.negativeBuckets().iterator(), result.positiveBuckets().iterator());
268+
double minVal = (min != null)
269+
? min
270+
: ExponentialHistogramUtils.estimateMin(zeroBucket, result.negativeBuckets(), result.positiveBuckets()).orElse(Double.NaN);
271+
double maxVal = (max != null)
272+
? max
273+
: ExponentialHistogramUtils.estimateMax(zeroBucket, result.negativeBuckets(), result.positiveBuckets()).orElse(Double.NaN);
274+
275+
result.setMin(minVal);
276+
result.setMax(maxVal);
277+
result.setSum(sumVal);
278+
279+
resultAlreadyReturned = true;
172280
return result;
173281
}
282+
283+
@Override
284+
public void close() {
285+
if (resultAlreadyReturned == false) {
286+
Releasables.close(result);
287+
}
288+
}
174289
}

libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ private FixedCapacityExponentialHistogram(int bucketCapacity, ExponentialHistogr
7878
reset();
7979
}
8080

81+
int getCapacity() {
82+
return bucketIndices.length;
83+
}
84+
8185
/**
8286
* Resets this histogram to the same state as a newly constructed one with the same capacity.
8387
*/
@@ -95,10 +99,9 @@ void reset() {
9599
* @param scale the scale to set for this histogram
96100
*/
97101
void resetBuckets(int scale) {
98-
assert scale >= MIN_SCALE && scale <= MAX_SCALE : "scale must be in range [" + MIN_SCALE + ".." + MAX_SCALE + "]";
102+
setScale(scale);
99103
negativeBuckets.reset();
100104
positiveBuckets.reset();
101-
bucketScale = scale;
102105
}
103106

104107
@Override
@@ -180,6 +183,11 @@ public int scale() {
180183
return bucketScale;
181184
}
182185

186+
void setScale(int scale) {
187+
assert scale >= MIN_SCALE && scale <= MAX_SCALE : "scale must be in range [" + MIN_SCALE + ".." + MAX_SCALE + "]";
188+
bucketScale = scale;
189+
}
190+
183191
@Override
184192
public ExponentialHistogram.Buckets negativeBuckets() {
185193
return negativeBuckets;
@@ -190,6 +198,25 @@ public ExponentialHistogram.Buckets positiveBuckets() {
190198
return positiveBuckets;
191199
}
192200

201+
/**
202+
* @return the index of the last bucket added successfully via {@link #tryAddBucket(long, long, boolean)},
203+
* or {@link Long#MIN_VALUE} if no buckets have been added yet.
204+
*/
205+
long getLastAddedBucketIndex() {
206+
if (positiveBuckets.numBuckets + negativeBuckets.numBuckets > 0) {
207+
return bucketIndices[negativeBuckets.numBuckets + positiveBuckets.numBuckets - 1];
208+
} else {
209+
return Long.MIN_VALUE;
210+
}
211+
}
212+
213+
/**
214+
* @return true, if the last bucket added successfully via {@link #tryAddBucket(long, long, boolean)} was a positive one.
215+
*/
216+
boolean wasLastAddedBucketPositive() {
217+
return positiveBuckets.numBuckets > 0;
218+
}
219+
193220
@Override
194221
public void close() {
195222
if (closed) {
@@ -276,6 +303,11 @@ public long valueCount() {
276303
}
277304
return cachedValueSum;
278305
}
306+
307+
@Override
308+
public int bucketCount() {
309+
return numBuckets;
310+
}
279311
}
280312

281313
}

0 commit comments

Comments
 (0)