-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Add compute functions to ExponentialHistogramState #136749
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -20,10 +20,17 @@ | |||||||||||
import org.elasticsearch.exponentialhistogram.ExponentialHistogramBuilder; | ||||||||||||
import org.elasticsearch.exponentialhistogram.ExponentialHistogramCircuitBreaker; | ||||||||||||
import org.elasticsearch.exponentialhistogram.ExponentialHistogramMerger; | ||||||||||||
import org.elasticsearch.exponentialhistogram.ExponentialHistogramQuantile; | ||||||||||||
import org.elasticsearch.exponentialhistogram.ExponentialScaleUtils; | ||||||||||||
import org.elasticsearch.exponentialhistogram.ReleasableExponentialHistogram; | ||||||||||||
import org.elasticsearch.exponentialhistogram.ZeroBucket; | ||||||||||||
import org.elasticsearch.tdigest.Centroid; | ||||||||||||
|
||||||||||||
import java.io.IOException; | ||||||||||||
import java.util.ArrayList; | ||||||||||||
import java.util.Collection; | ||||||||||||
import java.util.Collections; | ||||||||||||
import java.util.List; | ||||||||||||
|
||||||||||||
import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; | ||||||||||||
import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_SCALE; | ||||||||||||
|
@@ -117,6 +124,96 @@ public void add(ExponentialHistogram histogram) { | |||||||||||
mergedHistograms.add(histogram); | ||||||||||||
} | ||||||||||||
|
||||||||||||
/** | ||||||||||||
* Returns the number of scalar values added to this histogram, so the sum | ||||||||||||
* of {@link ExponentialHistogram#valueCount()} for all histograms added. | ||||||||||||
* @return the number of values | ||||||||||||
*/ | ||||||||||||
public long size() { | ||||||||||||
return histogram().valueCount(); | ||||||||||||
} | ||||||||||||
|
||||||||||||
/** | ||||||||||||
* Returns the fraction of all points added which are ≤ x. Points | ||||||||||||
* that are exactly equal get half credit (i.e. we use the mid-point | ||||||||||||
* rule) | ||||||||||||
* | ||||||||||||
* @param x The cutoff for the cdf. | ||||||||||||
* @return The fraction of all data which is less or equal to x. | ||||||||||||
*/ | ||||||||||||
public double cdf(double x) { | ||||||||||||
ExponentialHistogram histogram = histogram(); | ||||||||||||
long numValuesLess = ExponentialHistogramQuantile.estimateRank(histogram, x, false); | ||||||||||||
long numValuesLessOrEqual = ExponentialHistogramQuantile.estimateRank(histogram, x, true); | ||||||||||||
long numValuesEqual = numValuesLessOrEqual - numValuesLess; | ||||||||||||
// Just like for t-digest, equal values get half credit | ||||||||||||
return (numValuesLess + numValuesEqual / 2.0) / histogram.valueCount(); | ||||||||||||
} | ||||||||||||
|
||||||||||||
/** | ||||||||||||
* Returns an estimate of a cutoff such that a specified fraction of the data | ||||||||||||
* added to this TDigest would be less than or equal to the cutoff. | ||||||||||||
* | ||||||||||||
* @param q The desired fraction | ||||||||||||
* @return The smallest value x such that cdf(x) ≥ q | ||||||||||||
*/ | ||||||||||||
public double quantile(double q) { | ||||||||||||
return ExponentialHistogramQuantile.getQuantile(histogram(), q); | ||||||||||||
} | ||||||||||||
|
||||||||||||
/** | ||||||||||||
* @return an array of the mean values of the populated histogram buckets with their counts | ||||||||||||
*/ | ||||||||||||
public Collection<Centroid> centroids() { | ||||||||||||
List<Centroid> centroids = new ArrayList<>(); | ||||||||||||
addBucketCentersAsCentroids(centroids, histogram().negativeBuckets().iterator(), -1); | ||||||||||||
// negative buckets are in decreasing order, we want increasing order, therefore reverse | ||||||||||||
Collections.reverse(centroids); | ||||||||||||
Comment on lines
+170
to
+171
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a bit confused by this. Lines 42 to 46 in a0f415d
I guess the last sentence in the comment is wrong then? The indices are ascending but the highest index for the negative scale has the lowest value. Did I get that right? |
||||||||||||
if (histogram().zeroBucket().count() > 0) { | ||||||||||||
centroids.add(new Centroid(0.0, histogram().zeroBucket().count())); | ||||||||||||
} | ||||||||||||
addBucketCentersAsCentroids(centroids, histogram().positiveBuckets().iterator(), 1); | ||||||||||||
return centroids; | ||||||||||||
} | ||||||||||||
|
||||||||||||
private void addBucketCentersAsCentroids(List<Centroid> result, BucketIterator buckets, int sign) { | ||||||||||||
while (buckets.hasNext()) { | ||||||||||||
double center = sign * ExponentialScaleUtils.getPointOfLeastRelativeError(buckets.peekIndex(), buckets.scale()); | ||||||||||||
long count = buckets.peekCount(); | ||||||||||||
result.add(new Centroid(center, count)); | ||||||||||||
buckets.advance(); | ||||||||||||
} | ||||||||||||
} | ||||||||||||
|
||||||||||||
/** | ||||||||||||
* @return the length of the array returned by {@link #centroids()}. | ||||||||||||
*/ | ||||||||||||
public int centroidCount() { | ||||||||||||
ExponentialHistogram histo = histogram(); | ||||||||||||
int count = histo.zeroBucket().count() > 0 ? 1 : 0; | ||||||||||||
count += histo.negativeBuckets().bucketCount(); | ||||||||||||
count += histo.positiveBuckets().bucketCount(); | ||||||||||||
return count; | ||||||||||||
} | ||||||||||||
|
||||||||||||
/** | ||||||||||||
* The minimum value of the histogram, or {@code Double.POSITIVE_INFINITY} if the histogram is empty. | ||||||||||||
* @return the minimum | ||||||||||||
*/ | ||||||||||||
public double getMin() { | ||||||||||||
double min = histogram().min(); | ||||||||||||
return Double.isNaN(min) ? Double.POSITIVE_INFINITY : min; | ||||||||||||
} | ||||||||||||
|
||||||||||||
/** | ||||||||||||
* The maximum value of the histogram, or {@code Double.NEGATIVE_INFINITY} if the histogram is empty. | ||||||||||||
* @return the maximum | ||||||||||||
*/ | ||||||||||||
public double getMax() { | ||||||||||||
double max = histogram().max(); | ||||||||||||
return Double.isNaN(max) ? Double.NEGATIVE_INFINITY : max; | ||||||||||||
} | ||||||||||||
|
||||||||||||
public void write(StreamOutput out) throws IOException { | ||||||||||||
if (isEmpty()) { | ||||||||||||
out.writeByte(EMPTY_HISTOGRAM_MARKER_SCALE); | ||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
would it make sense to pre-allocate the list using
centroidCount
?