Skip to content

Commit 088c410

Browse files
JonasKunzKubik42
authored andcommitted
ESQL: Add exponential histogram percentile function (elastic#137553)
1 parent df131d8 commit 088c410

File tree

8 files changed

+472
-3
lines changed

8 files changed

+472
-3
lines changed

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,14 +1035,14 @@ public static Literal randomLiteral(DataType type) {
10351035
}
10361036
case TSID_DATA_TYPE -> randomTsId().toBytesRef();
10371037
case DENSE_VECTOR -> Arrays.asList(randomArray(10, 10, i -> new Float[10], ESTestCase::randomFloat));
1038-
case EXPONENTIAL_HISTOGRAM -> new WriteableExponentialHistogram(EsqlTestUtils.randomExponentialHistogram());
1038+
case EXPONENTIAL_HISTOGRAM -> EsqlTestUtils.randomExponentialHistogram();
10391039
case UNSUPPORTED, OBJECT, DOC_DATA_TYPE, PARTIAL_AGG -> throw new IllegalArgumentException(
10401040
"can't make random values for [" + type.typeName() + "]"
10411041
);
10421042
}, type);
10431043
}
10441044

1045-
private static ExponentialHistogram randomExponentialHistogram() {
1045+
public static ExponentialHistogram randomExponentialHistogram() {
10461046
// TODO(b/133393): allow (index,scale) based zero thresholds as soon as we support them in the block
10471047
// ideally Replace this with the shared random generation in ExponentialHistogramTestUtils
10481048
boolean hasNegativeValues = randomBoolean();
@@ -1062,7 +1062,8 @@ private static ExponentialHistogram randomExponentialHistogram() {
10621062
ExponentialHistogramCircuitBreaker.noop(),
10631063
rawValues
10641064
);
1065-
return histo;
1065+
// Make the result histogram writeable to allow usage in Literals for testing
1066+
return new WriteableExponentialHistogram(histo);
10661067
}
10671068

10681069
static Version randomVersion() {

x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/histogram/HistogramPercentileEvaluator.java

Lines changed: 152 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/ScalarFunctionWritables.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.elasticsearch.xpack.esql.expression.function.scalar.date.DayName;
2424
import org.elasticsearch.xpack.esql.expression.function.scalar.date.MonthName;
2525
import org.elasticsearch.xpack.esql.expression.function.scalar.date.Now;
26+
import org.elasticsearch.xpack.esql.expression.function.scalar.histogram.HistogramPercentile;
2627
import org.elasticsearch.xpack.esql.expression.function.scalar.ip.CIDRMatch;
2728
import org.elasticsearch.xpack.esql.expression.function.scalar.ip.IpPrefix;
2829
import org.elasticsearch.xpack.esql.expression.function.scalar.ip.NetworkDirection;
@@ -118,6 +119,7 @@ public static List<NamedWriteableRegistry.Entry> getNamedWriteables() {
118119
entries.add(Tau.ENTRY);
119120
entries.add(ToLower.ENTRY);
120121
entries.add(ToUpper.ENTRY);
122+
entries.add(HistogramPercentile.ENTRY);
121123

122124
entries.addAll(GroupingWritables.getNamedWriteables());
123125
return entries;
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.expression.function.scalar.histogram;
9+
10+
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
11+
import org.elasticsearch.common.io.stream.StreamInput;
12+
import org.elasticsearch.common.io.stream.StreamOutput;
13+
import org.elasticsearch.compute.ann.Evaluator;
14+
import org.elasticsearch.compute.data.DoubleBlock;
15+
import org.elasticsearch.compute.operator.EvalOperator;
16+
import org.elasticsearch.exponentialhistogram.ExponentialHistogram;
17+
import org.elasticsearch.exponentialhistogram.ExponentialHistogramQuantile;
18+
import org.elasticsearch.xpack.esql.core.expression.Expression;
19+
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
20+
import org.elasticsearch.xpack.esql.core.tree.Source;
21+
import org.elasticsearch.xpack.esql.core.type.DataType;
22+
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
23+
import org.elasticsearch.xpack.esql.expression.function.Param;
24+
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
25+
import org.elasticsearch.xpack.esql.expression.function.scalar.math.Cast;
26+
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
27+
28+
import java.io.IOException;
29+
import java.util.List;
30+
31+
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT;
32+
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType;
33+
34+
/**
35+
* Extracts a percentile value from a single histogram value.
36+
* Note that this function is currently only intended for usage in surrogates and not available as a user-facing function.
37+
* Therefore, it is intentionally not registered in {@link org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry}.
38+
*/
39+
public class HistogramPercentile extends EsqlScalarFunction {
40+
41+
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(
42+
Expression.class,
43+
"HistogramPercentile",
44+
HistogramPercentile::new
45+
);
46+
47+
private final Expression histogram;
48+
private final Expression percentile;
49+
50+
@FunctionInfo(returnType = { "double" })
51+
public HistogramPercentile(
52+
Source source,
53+
@Param(name = "histogram", type = { "exponential_histogram" }) Expression histogram,
54+
@Param(name = "percentile", type = { "double", "integer", "long", "unsigned_long" }) Expression percentile
55+
) {
56+
super(source, List.of(histogram, percentile));
57+
this.histogram = histogram;
58+
this.percentile = percentile;
59+
}
60+
61+
private HistogramPercentile(StreamInput in) throws IOException {
62+
this(Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class), in.readNamedWriteable(Expression.class));
63+
}
64+
65+
Expression histogram() {
66+
return histogram;
67+
}
68+
69+
Expression percentile() {
70+
return percentile;
71+
}
72+
73+
@Override
74+
protected TypeResolution resolveType() {
75+
return isType(histogram, dt -> dt == DataType.EXPONENTIAL_HISTOGRAM, sourceText(), DEFAULT, "exponential_histogram").and(
76+
isType(percentile, DataType::isNumeric, sourceText(), DEFAULT, "numeric types")
77+
);
78+
}
79+
80+
@Override
81+
public DataType dataType() {
82+
return DataType.DOUBLE;
83+
}
84+
85+
@Override
86+
public Expression replaceChildren(List<Expression> newChildren) {
87+
return new HistogramPercentile(source(), newChildren.get(0), newChildren.get(1));
88+
}
89+
90+
@Override
91+
public boolean foldable() {
92+
return histogram.foldable() && percentile.foldable();
93+
}
94+
95+
@Override
96+
protected NodeInfo<? extends Expression> info() {
97+
return NodeInfo.create(this, HistogramPercentile::new, histogram, percentile);
98+
}
99+
100+
@Override
101+
public String getWriteableName() {
102+
return ENTRY.name;
103+
}
104+
105+
@Override
106+
public void writeTo(StreamOutput out) throws IOException {
107+
source().writeTo(out);
108+
out.writeNamedWriteable(histogram);
109+
out.writeNamedWriteable(percentile);
110+
}
111+
112+
@Evaluator(warnExceptions = ArithmeticException.class)
113+
static void process(DoubleBlock.Builder resultBuilder, ExponentialHistogram value, double percentile) {
114+
if (percentile < 0.0 || percentile > 100.0) {
115+
throw new ArithmeticException("Percentile value must be in the range [0, 100], got: " + percentile);
116+
}
117+
double result = ExponentialHistogramQuantile.getQuantile(value, percentile / 100.0);
118+
if (Double.isNaN(result)) { // can happen if the histogram is empty
119+
resultBuilder.appendNull();
120+
} else {
121+
resultBuilder.appendDouble(result);
122+
}
123+
}
124+
125+
@Override
126+
public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
127+
var fieldEvaluator = toEvaluator.apply(histogram);
128+
var percentileEvaluator = Cast.cast(source(), percentile.dataType(), DataType.DOUBLE, toEvaluator.apply(percentile));
129+
return new HistogramPercentileEvaluator.Factory(source(), fieldEvaluator, percentileEvaluator);
130+
}
131+
}

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/TestCaseSupplier.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1509,6 +1509,19 @@ public static List<TypedDataSupplier> aggregateMetricDoubleCases() {
15091509
);
15101510
}
15111511

1512+
/**
1513+
* Generate cases for {@link DataType#EXPONENTIAL_HISTOGRAM}.
1514+
*/
1515+
public static List<TypedDataSupplier> exponentialHistogramCases() {
1516+
return List.of(
1517+
new TypedDataSupplier(
1518+
"<random exponential histogram>",
1519+
EsqlTestUtils::randomExponentialHistogram,
1520+
DataType.EXPONENTIAL_HISTOGRAM
1521+
)
1522+
);
1523+
}
1524+
15121525
public static String getCastEvaluator(String original, DataType current, DataType target) {
15131526
if (current == target) {
15141527
return original;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.expression.function.scalar.histogram;
9+
10+
import org.elasticsearch.xpack.esql.core.expression.Expression;
11+
import org.elasticsearch.xpack.esql.core.tree.Source;
12+
import org.elasticsearch.xpack.esql.core.type.DataType;
13+
import org.elasticsearch.xpack.esql.expression.function.ErrorsForCasesWithoutExamplesTestCase;
14+
import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier;
15+
import org.hamcrest.Matcher;
16+
17+
import java.util.List;
18+
import java.util.Set;
19+
20+
import static org.hamcrest.Matchers.equalTo;
21+
22+
public class HistogramPercentileErrorTests extends ErrorsForCasesWithoutExamplesTestCase {
23+
24+
@Override
25+
protected List<TestCaseSupplier> cases() {
26+
return paramsToSuppliers(HistogramPercentileTests.parameters());
27+
}
28+
29+
@Override
30+
protected Expression build(Source source, List<Expression> args) {
31+
return new HistogramPercentile(source, args.get(0), args.get(1));
32+
}
33+
34+
@Override
35+
protected Matcher<String> expectedTypeErrorMatcher(List<Set<DataType>> validPerPosition, List<DataType> signature) {
36+
return equalTo(typeErrorMessage(false, validPerPosition, signature, (v, p) -> switch (p) {
37+
case 0 -> "exponential_histogram";
38+
case 1 -> "numeric types";
39+
default -> throw new IllegalArgumentException("Unexpected parameter position: " + p);
40+
}));
41+
}
42+
}

0 commit comments

Comments
 (0)