Skip to content

Commit 37b6f8f

Browse files
authored
Add implementation for exponential histogram merging and percentiles (#131220)
1 parent 0c27c55 commit 37b6f8f

29 files changed

+3454
-0
lines changed

benchmarks/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ dependencies {
4949
api(project(':x-pack:plugin:esql:compute'))
5050
implementation project(path: ':libs:native')
5151
implementation project(path: ':libs:simdvec')
52+
implementation project(path: ':libs:exponential-histogram')
5253
expression(project(path: ':modules:lang-expression', configuration: 'zip'))
5354
painless(project(path: ':modules:lang-painless', configuration: 'zip'))
5455
nativeLib(project(':libs:native'))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.benchmark.exponentialhistogram;
11+
12+
import org.elasticsearch.exponentialhistogram.ExponentialHistogramGenerator;
13+
import org.openjdk.jmh.annotations.Benchmark;
14+
import org.openjdk.jmh.annotations.BenchmarkMode;
15+
import org.openjdk.jmh.annotations.Fork;
16+
import org.openjdk.jmh.annotations.Measurement;
17+
import org.openjdk.jmh.annotations.Mode;
18+
import org.openjdk.jmh.annotations.OutputTimeUnit;
19+
import org.openjdk.jmh.annotations.Param;
20+
import org.openjdk.jmh.annotations.Scope;
21+
import org.openjdk.jmh.annotations.Setup;
22+
import org.openjdk.jmh.annotations.State;
23+
import org.openjdk.jmh.annotations.Threads;
24+
import org.openjdk.jmh.annotations.Warmup;
25+
import org.openjdk.jmh.profile.GCProfiler;
26+
import org.openjdk.jmh.profile.StackProfiler;
27+
import org.openjdk.jmh.runner.Runner;
28+
import org.openjdk.jmh.runner.RunnerException;
29+
import org.openjdk.jmh.runner.options.Options;
30+
import org.openjdk.jmh.runner.options.OptionsBuilder;
31+
32+
import java.util.Random;
33+
import java.util.concurrent.ThreadLocalRandom;
34+
import java.util.concurrent.TimeUnit;
35+
import java.util.function.DoubleSupplier;
36+
37+
@BenchmarkMode(Mode.AverageTime)
38+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
39+
@Warmup(iterations = 3, time = 3, timeUnit = TimeUnit.SECONDS)
40+
@Measurement(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS)
41+
@Fork(1)
42+
@Threads(1)
43+
@State(Scope.Thread)
44+
public class ExponentialHistogramGenerationBench {
45+
46+
@Param({ "100", "500", "1000", "5000", "10000", "20000" })
47+
int bucketCount;
48+
49+
@Param({ "NORMAL", "GAUSSIAN" })
50+
String distribution;
51+
52+
Random random;
53+
ExponentialHistogramGenerator histoGenerator;
54+
55+
double[] data = new double[1000000];
56+
57+
int index;
58+
59+
@Setup
60+
public void setUp() {
61+
random = ThreadLocalRandom.current();
62+
histoGenerator = new ExponentialHistogramGenerator(bucketCount);
63+
64+
DoubleSupplier nextRandom = () -> distribution.equals("GAUSSIAN") ? random.nextGaussian() : random.nextDouble();
65+
66+
// Make sure that we start with a non-empty histogram, as this distorts initial additions
67+
for (int i = 0; i < 10000; ++i) {
68+
histoGenerator.add(nextRandom.getAsDouble());
69+
}
70+
71+
for (int i = 0; i < data.length; ++i) {
72+
data[i] = nextRandom.getAsDouble();
73+
}
74+
75+
index = 0;
76+
}
77+
78+
@Benchmark
79+
@BenchmarkMode(Mode.AverageTime)
80+
@OutputTimeUnit(TimeUnit.MICROSECONDS)
81+
public void add() {
82+
if (index >= data.length) {
83+
index = 0;
84+
}
85+
histoGenerator.add(data[index++]);
86+
}
87+
88+
public static void main(String[] args) throws RunnerException {
89+
Options opt = new OptionsBuilder().include(".*" + ExponentialHistogramGenerationBench.class.getSimpleName() + ".*")
90+
.warmupIterations(5)
91+
.measurementIterations(5)
92+
.addProfiler(GCProfiler.class)
93+
.addProfiler(StackProfiler.class)
94+
.build();
95+
96+
new Runner(opt).run();
97+
}
98+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.benchmark.exponentialhistogram;
11+
12+
import org.elasticsearch.exponentialhistogram.BucketIterator;
13+
import org.elasticsearch.exponentialhistogram.ExponentialHistogram;
14+
import org.elasticsearch.exponentialhistogram.ExponentialHistogramGenerator;
15+
import org.elasticsearch.exponentialhistogram.ExponentialHistogramMerger;
16+
import org.openjdk.jmh.annotations.Benchmark;
17+
import org.openjdk.jmh.annotations.BenchmarkMode;
18+
import org.openjdk.jmh.annotations.Fork;
19+
import org.openjdk.jmh.annotations.Measurement;
20+
import org.openjdk.jmh.annotations.Mode;
21+
import org.openjdk.jmh.annotations.OutputTimeUnit;
22+
import org.openjdk.jmh.annotations.Param;
23+
import org.openjdk.jmh.annotations.Scope;
24+
import org.openjdk.jmh.annotations.Setup;
25+
import org.openjdk.jmh.annotations.State;
26+
import org.openjdk.jmh.annotations.Threads;
27+
import org.openjdk.jmh.annotations.Warmup;
28+
29+
import java.util.List;
30+
import java.util.Random;
31+
import java.util.concurrent.ThreadLocalRandom;
32+
import java.util.concurrent.TimeUnit;
33+
34+
@BenchmarkMode(Mode.AverageTime)
35+
@OutputTimeUnit(TimeUnit.NANOSECONDS)
36+
@Warmup(iterations = 3, time = 3, timeUnit = TimeUnit.SECONDS)
37+
@Measurement(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS)
38+
@Fork(1)
39+
@Threads(1)
40+
@State(Scope.Thread)
41+
public class ExponentialHistogramMergeBench {
42+
43+
@Param({ "1000", "2000", "5000" })
44+
int bucketCount;
45+
46+
@Param({ "0.01", "0.1", "0.25", "0.5", "1.0", "2.0" })
47+
double mergedHistoSizeFactor;
48+
49+
Random random;
50+
ExponentialHistogramMerger histoMerger;
51+
52+
ExponentialHistogram[] toMerge = new ExponentialHistogram[10_000];
53+
54+
int index;
55+
56+
@Setup
57+
public void setUp() {
58+
random = ThreadLocalRandom.current();
59+
histoMerger = new ExponentialHistogramMerger(bucketCount);
60+
61+
ExponentialHistogramGenerator initial = new ExponentialHistogramGenerator(bucketCount);
62+
for (int j = 0; j < bucketCount; j++) {
63+
initial.add(Math.pow(1.001, j));
64+
}
65+
ExponentialHistogram initialHisto = initial.get();
66+
int cnt = getBucketCount(initialHisto);
67+
if (cnt < bucketCount) {
68+
throw new IllegalArgumentException("Expected bucket count to be " + bucketCount + ", but was " + cnt);
69+
}
70+
histoMerger.add(initialHisto);
71+
72+
int dataPointSize = (int) Math.round(bucketCount * mergedHistoSizeFactor);
73+
74+
for (int i = 0; i < toMerge.length; i++) {
75+
ExponentialHistogramGenerator generator = new ExponentialHistogramGenerator(dataPointSize);
76+
77+
int bucketIndex = 0;
78+
for (int j = 0; j < dataPointSize; j++) {
79+
bucketIndex += 1 + random.nextInt(bucketCount) % (Math.max(1, bucketCount / dataPointSize));
80+
generator.add(Math.pow(1.001, bucketIndex));
81+
}
82+
toMerge[i] = generator.get();
83+
cnt = getBucketCount(toMerge[i]);
84+
if (cnt < dataPointSize) {
85+
throw new IllegalArgumentException("Expected bucket count to be " + dataPointSize + ", but was " + cnt);
86+
}
87+
}
88+
89+
index = 0;
90+
}
91+
92+
private static int getBucketCount(ExponentialHistogram histo) {
93+
int cnt = 0;
94+
for (BucketIterator it : List.of(histo.negativeBuckets().iterator(), histo.positiveBuckets().iterator())) {
95+
while (it.hasNext()) {
96+
cnt++;
97+
it.advance();
98+
}
99+
}
100+
return cnt;
101+
}
102+
103+
@Benchmark
104+
@BenchmarkMode(Mode.AverageTime)
105+
@OutputTimeUnit(TimeUnit.MICROSECONDS)
106+
public void add() {
107+
if (index >= toMerge.length) {
108+
index = 0;
109+
}
110+
histoMerger.add(toMerge[index++]);
111+
}
112+
}

build-conventions/src/main/java/org/elasticsearch/gradle/internal/conventions/precommit/LicenseHeadersTask.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ public void runRat() {
173173
matchers.add(subStringMatcher("BSD4 ", "Original BSD License (with advertising clause)", "All advertising materials"));
174174
// Apache
175175
matchers.add(subStringMatcher("AL ", "Apache", "Licensed to Elasticsearch B.V. under one or more contributor"));
176+
matchers.add(subStringMatcher("AL ", "Apache", "Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V."));
176177
// Apache lz4-java
177178
matchers.add(subStringMatcher("ALLZ4", "Apache LZ4-Java", "Copyright 2020 Adrien Grand and the lz4-java contributors"));
178179
// Generated resources

gradle/verification-metadata.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@
6666
<sha256 value="3366d2c88fb576e486d830f521184e8f1839f8c15dcd2151a3f6e1f62b0b37a0" origin="Generated by Gradle"/>
6767
</artifact>
6868
</component>
69+
<component group="ch.obermuhlner" name="big-math" version="2.3.2">
70+
<artifact name="big-math-2.3.2.jar">
71+
<sha256 value="693e1bb7c7f5184b448f03c2a2c0c45d07d8e89e4641fdc31ab0a8057027f43d" origin="Generated by Gradle"/>
72+
</artifact>
73+
</component>
6974
<component group="ch.randelshofer" name="fastdoubleparser" version="0.8.0">
7075
<artifact name="fastdoubleparser-0.8.0.jar">
7176
<sha256 value="10fe288fd7a2cdaf5175332b73529f9abf8fd54dcfff317d6967c0c35ffb133b" origin="Generated by Gradle"/>

0 commit comments

Comments
 (0)