Skip to content

Commit 9423dcb

Browse files
authored
Add min support to exponential histograms (#133639)
1 parent 12fd34d commit 9423dcb

File tree

16 files changed

+373
-44
lines changed

16 files changed

+373
-44
lines changed

benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ private ExponentialHistogram asCompressedHistogram(ExponentialHistogram histogra
130130
CompressedExponentialHistogram.writeHistogramBytes(histoBytes, histogram.scale(), negativeBuckets, positiveBuckets);
131131
CompressedExponentialHistogram result = new CompressedExponentialHistogram();
132132
BytesRef data = histoBytes.bytes().toBytesRef();
133-
result.reset(histogram.zeroBucket().zeroThreshold(), totalCount, histogram.sum(), data);
133+
result.reset(histogram.zeroBucket().zeroThreshold(), totalCount, histogram.sum(), histogram.min(), data);
134134
return result;
135135
} catch (IOException e) {
136136
throw new RuntimeException(e);

libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/EmptyExponentialHistogram.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ public double sum() {
8282
return 0;
8383
}
8484

85+
@Override
86+
public double min() {
87+
return Double.NaN;
88+
}
89+
8590
@Override
8691
public long ramBytesUsed() {
8792
return 0;

libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,13 @@ public interface ExponentialHistogram extends Accountable {
102102
*/
103103
double sum();
104104

105+
/**
106+
* Returns minimum of all values represented by this histogram.
107+
*
108+
* @return the minimum, NaN for empty histograms
109+
*/
110+
double min();
111+
105112
/**
106113
* Represents a bucket range of an {@link ExponentialHistogram}, either the positive or the negative range.
107114
*/

libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,9 @@ private void mergeValuesToHistogram() {
123123
}
124124

125125
valueBuffer.reset();
126-
valueBuffer.setSum(rawValuesSum());
126+
Aggregates aggregates = rawValuesAggregates();
127+
valueBuffer.setSum(aggregates.sum());
128+
valueBuffer.setMin(aggregates.min());
127129
int scale = valueBuffer.scale();
128130

129131
// Buckets must be provided with their indices in ascending order.
@@ -162,12 +164,17 @@ private void mergeValuesToHistogram() {
162164
valueCount = 0;
163165
}
164166

165-
private double rawValuesSum() {
167+
private Aggregates rawValuesAggregates() {
168+
if (valueCount == 0) {
169+
return new Aggregates(0, Double.NaN);
170+
}
166171
double sum = 0;
172+
double min = Double.MAX_VALUE;
167173
for (int i = 0; i < valueCount; i++) {
168174
sum += rawValueBuffer[i];
175+
min = Math.min(min, rawValueBuffer[i]);
169176
}
170-
return sum;
177+
return new Aggregates(sum, min);
171178
}
172179

173180
private static long estimateBaseSize(int numBuckets) {
@@ -190,4 +197,6 @@ public void close() {
190197
circuitBreaker.adjustBreaker(-estimateBaseSize(rawValueBuffer.length));
191198
}
192199
}
200+
201+
private record Aggregates(double sum, double min) {}
193202
}

libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ public void add(ExponentialHistogram toAdd) {
151151
}
152152
buffer.setZeroBucket(zeroBucket);
153153
buffer.setSum(a.sum() + b.sum());
154-
154+
buffer.setMin(nanAwareMin(a.min(), b.min()));
155155
// We attempt to bring everything to the scale of A.
156156
// This might involve increasing the scale for B, which would increase its indices.
157157
// We need to ensure that we do not exceed MAX_INDEX / MIN_INDEX in this case.
@@ -231,4 +231,14 @@ private static int putBuckets(
231231
return overflowCount;
232232
}
233233

234+
private static double nanAwareMin(double a, double b) {
235+
if (Double.isNaN(a)) {
236+
return b;
237+
}
238+
if (Double.isNaN(b)) {
239+
return a;
240+
}
241+
return Math.min(a, b);
242+
}
243+
234244
}

libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121

2222
package org.elasticsearch.exponentialhistogram;
2323

24+
import java.util.OptionalDouble;
25+
import java.util.OptionalLong;
26+
2427
public class ExponentialHistogramUtils {
2528

2629
/**
@@ -59,4 +62,44 @@ public static double estimateSum(BucketIterator negativeBuckets, BucketIterator
5962
}
6063
return sum;
6164
}
65+
66+
/**
67+
* Estimates the minimum value of the histogram based on the populated buckets.
68+
* The returned value is guaranteed to be less than or equal to the exact minimum value of the histogram values.
69+
* If the histogram is empty, an empty Optional is returned.
70+
*
71+
* Note that this method can return +-Infinity if the histogram bucket boundaries are not representable in a double.
72+
*
73+
* @param zeroBucket the zero bucket of the histogram
74+
* @param negativeBuckets the negative buckets of the histogram
75+
* @param positiveBuckets the positive buckets of the histogram
76+
* @return the estimated minimum
77+
*/
78+
public static OptionalDouble estimateMin(
79+
ZeroBucket zeroBucket,
80+
ExponentialHistogram.Buckets negativeBuckets,
81+
ExponentialHistogram.Buckets positiveBuckets
82+
) {
83+
int scale = negativeBuckets.iterator().scale();
84+
assert scale == positiveBuckets.iterator().scale();
85+
86+
OptionalLong negativeMaxIndex = negativeBuckets.maxBucketIndex();
87+
if (negativeMaxIndex.isPresent()) {
88+
return OptionalDouble.of(-ExponentialScaleUtils.getUpperBucketBoundary(negativeMaxIndex.getAsLong(), scale));
89+
}
90+
91+
if (zeroBucket.count() > 0) {
92+
if (zeroBucket.zeroThreshold() == 0.0) {
93+
// avoid negative zero
94+
return OptionalDouble.of(0.0);
95+
}
96+
return OptionalDouble.of(-zeroBucket.zeroThreshold());
97+
}
98+
99+
BucketIterator positiveBucketsIt = positiveBuckets.iterator();
100+
if (positiveBucketsIt.hasNext()) {
101+
return OptionalDouble.of(ExponentialScaleUtils.getLowerBucketBoundary(positiveBucketsIt.peekIndex(), scale));
102+
}
103+
return OptionalDouble.empty();
104+
}
62105
}

libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContent.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ public class ExponentialHistogramXContent {
3232

3333
public static final String SCALE_FIELD = "scale";
3434
public static final String SUM_FIELD = "sum";
35+
public static final String MIN_FIELD = "min";
3536
public static final String ZERO_FIELD = "zero";
3637
public static final String ZERO_COUNT_FIELD = "count";
3738
public static final String ZERO_THRESHOLD_FIELD = "threshold";
@@ -51,6 +52,9 @@ public static void serialize(XContentBuilder builder, ExponentialHistogram histo
5152

5253
builder.field(SCALE_FIELD, histogram.scale());
5354
builder.field(SUM_FIELD, histogram.sum());
55+
if (Double.isNaN(histogram.min()) == false) {
56+
builder.field(MIN_FIELD, histogram.min());
57+
}
5458
double zeroThreshold = histogram.zeroBucket().zeroThreshold();
5559
long zeroCount = histogram.zeroBucket().count();
5660

libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ final class FixedCapacityExponentialHistogram implements ReleasableExponentialHi
5454
private final Buckets positiveBuckets = new Buckets(true);
5555

5656
private double sum;
57+
private double min;
5758

5859
private final ExponentialHistogramCircuitBreaker circuitBreaker;
5960
private boolean closed = false;
@@ -81,6 +82,7 @@ private FixedCapacityExponentialHistogram(int bucketCapacity, ExponentialHistogr
8182
*/
8283
void reset() {
8384
sum = 0;
85+
min = Double.NaN;
8486
setZeroBucket(ZeroBucket.minimalEmpty());
8587
resetBuckets(MAX_SCALE);
8688
}
@@ -122,6 +124,15 @@ void setSum(double sum) {
122124
this.sum = sum;
123125
}
124126

127+
@Override
128+
public double min() {
129+
return min;
130+
}
131+
132+
void setMin(double min) {
133+
this.min = min;
134+
}
135+
125136
/**
126137
* Attempts to add a bucket to the positive or negative range of this histogram.
127138
* <br>

libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.util.Collections;
3030
import java.util.List;
3131
import java.util.stream.Collectors;
32+
import java.util.stream.DoubleStream;
3233
import java.util.stream.IntStream;
3334

3435
import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX;
@@ -106,19 +107,24 @@ public void testEmptyZeroBucketIgnored() {
106107
assertThat(posBuckets.hasNext(), equalTo(false));
107108
}
108109

109-
public void testSumCorrectness() {
110+
public void testAggregatesCorrectness() {
110111
double[] firstValues = randomDoubles(100).map(val -> val * 2 - 1).toArray();
111112
double[] secondValues = randomDoubles(50).map(val -> val * 2 - 1).toArray();
112113
double correctSum = Arrays.stream(firstValues).sum() + Arrays.stream(secondValues).sum();
114+
double correctMin = DoubleStream.concat(Arrays.stream(firstValues), Arrays.stream(secondValues)).min().getAsDouble();
113115
try (
116+
// Merge some empty histograms too to test that code path
114117
ReleasableExponentialHistogram merged = ExponentialHistogram.merge(
115118
2,
116119
breaker(),
120+
ExponentialHistogram.empty(),
117121
createAutoReleasedHistogram(10, firstValues),
118-
createAutoReleasedHistogram(20, secondValues)
122+
createAutoReleasedHistogram(20, secondValues),
123+
ExponentialHistogram.empty()
119124
)
120125
) {
121126
assertThat(merged.sum(), closeTo(correctSum, 0.000001));
127+
assertThat(merged.min(), equalTo(correctMin));
122128
}
123129
}
124130

libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
package org.elasticsearch.exponentialhistogram;
2323

24+
import java.util.OptionalDouble;
25+
2426
import static org.hamcrest.Matchers.closeTo;
2527
import static org.hamcrest.Matchers.equalTo;
2628

@@ -57,7 +59,7 @@ public void testRandomDataSumEstimation() {
5759
}
5860
}
5961

60-
public void testInfinityHandling() {
62+
public void testSumInfinityHandling() {
6163
FixedCapacityExponentialHistogram morePositiveValues = createAutoReleasedHistogram(100);
6264
morePositiveValues.resetBuckets(0);
6365
morePositiveValues.tryAddBucket(1999, 1, false);
@@ -83,4 +85,85 @@ public void testInfinityHandling() {
8385
);
8486
assertThat(sum, equalTo(Double.NEGATIVE_INFINITY));
8587
}
88+
89+
public void testMinimumEstimation() {
90+
for (int i = 0; i < 100; i++) {
91+
int positiveValueCount = randomBoolean() ? 0 : randomIntBetween(10, 10_000);
92+
int negativeValueCount = randomBoolean() ? 0 : randomIntBetween(10, 10_000);
93+
int zeroValueCount = randomBoolean() ? 0 : randomIntBetween(10, 100);
94+
int bucketCount = randomIntBetween(4, 500);
95+
96+
double correctMin = Double.MAX_VALUE;
97+
double zeroThreshold = Double.MAX_VALUE;
98+
double[] values = new double[positiveValueCount + negativeValueCount];
99+
for (int j = 0; j < values.length; j++) {
100+
double absValue = Math.pow(10, randomIntBetween(1, 9)) * randomDouble();
101+
if (j < positiveValueCount) {
102+
values[j] = absValue;
103+
} else {
104+
values[j] = -absValue;
105+
}
106+
zeroThreshold = Math.min(zeroThreshold, absValue / 2);
107+
correctMin = Math.min(correctMin, values[j]);
108+
}
109+
if (zeroValueCount > 0) {
110+
correctMin = Math.min(correctMin, -zeroThreshold);
111+
}
112+
113+
ExponentialHistogram histo = createAutoReleasedHistogram(bucketCount, values);
114+
115+
OptionalDouble estimatedMin = ExponentialHistogramUtils.estimateMin(
116+
new ZeroBucket(zeroThreshold, zeroValueCount),
117+
histo.negativeBuckets(),
118+
histo.positiveBuckets()
119+
);
120+
if (correctMin == Double.MAX_VALUE) {
121+
assertThat(estimatedMin.isPresent(), equalTo(false));
122+
} else {
123+
assertThat(estimatedMin.isPresent(), equalTo(true));
124+
// If the histogram does not contain mixed sign values, we have a guaranteed relative error bound of 2^(2^-scale) - 1
125+
double histogramBase = Math.pow(2, Math.pow(2, -histo.scale()));
126+
double allowedError = Math.abs(correctMin * (histogramBase - 1));
127+
assertThat(estimatedMin.getAsDouble(), closeTo(correctMin, allowedError));
128+
}
129+
}
130+
}
131+
132+
public void testMinimumEstimationPositiveInfinityHandling() {
133+
FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(100);
134+
histo.resetBuckets(0);
135+
histo.tryAddBucket(2000, 1, true);
136+
137+
OptionalDouble estimate = ExponentialHistogramUtils.estimateMin(
138+
ZeroBucket.minimalEmpty(),
139+
histo.negativeBuckets(),
140+
histo.positiveBuckets()
141+
);
142+
assertThat(estimate.isPresent(), equalTo(true));
143+
assertThat(estimate.getAsDouble(), equalTo(Double.POSITIVE_INFINITY));
144+
}
145+
146+
public void testMinimumEstimationNegativeInfinityHandling() {
147+
FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(100);
148+
histo.resetBuckets(0);
149+
histo.tryAddBucket(2000, 1, false);
150+
151+
OptionalDouble estimate = ExponentialHistogramUtils.estimateMin(
152+
ZeroBucket.minimalEmpty(),
153+
histo.negativeBuckets(),
154+
histo.positiveBuckets()
155+
);
156+
assertThat(estimate.isPresent(), equalTo(true));
157+
assertThat(estimate.getAsDouble(), equalTo(Double.NEGATIVE_INFINITY));
158+
}
159+
160+
public void testMinimumEstimationSanitizedNegativeZero() {
161+
OptionalDouble estimate = ExponentialHistogramUtils.estimateMin(
162+
ZeroBucket.minimalWithCount(42),
163+
ExponentialHistogram.empty().negativeBuckets(),
164+
ExponentialHistogram.empty().positiveBuckets()
165+
);
166+
assertThat(estimate.isPresent(), equalTo(true));
167+
assertThat(estimate.getAsDouble(), equalTo(0.0));
168+
}
86169
}

0 commit comments

Comments
 (0)