Skip to content

Commit ebd868b

Browse files
authored
Add rank estimation for exponential histograms (#135692)
1 parent 2858d6a commit ebd868b

File tree

2 files changed

+129
-0
lines changed

2 files changed

+129
-0
lines changed

libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramQuantile.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,45 @@ public static double getQuantile(ExponentialHistogram histo, double quantile) {
7373
return removeNegativeZero(result);
7474
}
7575

76+
/**
77+
* Estimates the rank of a given value in the distribution represented by the histogram.
78+
* In other words, returns the number of values which are less than (or less-or-equal, if {@code inclusive} is true)
79+
* the provided value.
80+
*
81+
* @param histo the histogram to query
82+
* @param value the value to estimate the rank for
83+
* @param inclusive if true, counts values equal to the given value as well
84+
* @return the number of elements less than (or less-or-equal, if {@code inclusive} is true) the given value
85+
*/
86+
public static long estimateRank(ExponentialHistogram histo, double value, boolean inclusive) {
87+
if (value >= 0) {
88+
long rank = histo.negativeBuckets().valueCount();
89+
if (value > 0 || inclusive) {
90+
rank += histo.zeroBucket().count();
91+
}
92+
rank += estimateRank(histo.positiveBuckets().iterator(), value, inclusive, histo.max());
93+
return rank;
94+
} else {
95+
long numValuesGreater = estimateRank(histo.negativeBuckets().iterator(), -value, inclusive == false, -histo.min());
96+
return histo.negativeBuckets().valueCount() - numValuesGreater;
97+
}
98+
}
99+
100+
private static long estimateRank(BucketIterator buckets, double value, boolean inclusive, double maxValue) {
101+
long rank = 0;
102+
while (buckets.hasNext()) {
103+
double bucketMidpoint = ExponentialScaleUtils.getPointOfLeastRelativeError(buckets.peekIndex(), buckets.scale());
104+
bucketMidpoint = Math.min(bucketMidpoint, maxValue);
105+
if (bucketMidpoint < value || (inclusive && bucketMidpoint == value)) {
106+
rank += buckets.peekCount();
107+
buckets.advance();
108+
} else {
109+
break;
110+
}
111+
}
112+
return rank;
113+
}
114+
76115
private static double removeNegativeZero(double result) {
77116
return result == 0.0 ? 0.0 : result;
78117
}
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
3+
* under one or more license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch B.V. licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*
19+
* This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
20+
*/
21+
22+
package org.elasticsearch.exponentialhistogram;
23+
24+
import java.util.Arrays;
25+
import java.util.stream.DoubleStream;
26+
27+
import static org.hamcrest.Matchers.equalTo;
28+
29+
public class RankAccuracyTests extends ExponentialHistogramTestCase {
30+
31+
public void testRandomDistribution() {
32+
int numValues = randomIntBetween(10, 10_000);
33+
double[] values = new double[numValues];
34+
35+
int valuesGenerated = 0;
36+
while (valuesGenerated < values.length) {
37+
double value;
38+
if (randomDouble() < 0.01) { // 1% chance of exact zero
39+
value = 0;
40+
} else {
41+
value = randomDouble() * 2_000_000 - 1_000_000;
42+
}
43+
// Add some duplicates
44+
for (int i = 0; i < randomIntBetween(1, 10) && valuesGenerated < values.length; i++) {
45+
values[valuesGenerated++] = value;
46+
}
47+
}
48+
49+
int numBuckets = randomIntBetween(4, 400);
50+
ExponentialHistogram histo = createAutoReleasedHistogram(numBuckets, values);
51+
52+
Arrays.sort(values);
53+
double min = values[0];
54+
double max = values[values.length - 1];
55+
56+
double[] valuesRoundedToBucketCenters = DoubleStream.of(values).map(value -> {
57+
if (value == 0) {
58+
return 0;
59+
}
60+
long index = ExponentialScaleUtils.computeIndex(value, histo.scale());
61+
double bucketCenter = Math.signum(value) * ExponentialScaleUtils.getPointOfLeastRelativeError(index, histo.scale());
62+
return Math.clamp(bucketCenter, min, max);
63+
}).toArray();
64+
65+
// Test the values at exactly the bucket center for exclusivity correctness
66+
for (double v : valuesRoundedToBucketCenters) {
67+
long inclusiveRank = getRank(v, valuesRoundedToBucketCenters, true);
68+
assertThat(ExponentialHistogramQuantile.estimateRank(histo, v, true), equalTo(inclusiveRank));
69+
long exclusiveRank = getRank(v, valuesRoundedToBucketCenters, false);
70+
assertThat(ExponentialHistogramQuantile.estimateRank(histo, v, false), equalTo(exclusiveRank));
71+
}
72+
// Test the original values to have values in between bucket centers
73+
for (double v : values) {
74+
long inclusiveRank = getRank(v, valuesRoundedToBucketCenters, true);
75+
assertThat(ExponentialHistogramQuantile.estimateRank(histo, v, true), equalTo(inclusiveRank));
76+
long exclusiveRank = getRank(v, valuesRoundedToBucketCenters, false);
77+
assertThat(ExponentialHistogramQuantile.estimateRank(histo, v, false), equalTo(exclusiveRank));
78+
}
79+
80+
}
81+
82+
private static long getRank(double value, double[] sortedValues, boolean inclusive) {
83+
for (int i = 0; i < sortedValues.length; i++) {
84+
if (sortedValues[i] > value || (inclusive == false && sortedValues[i] == value)) {
85+
return i;
86+
}
87+
}
88+
return sortedValues.length;
89+
}
90+
}

0 commit comments

Comments
 (0)