Skip to content

Commit c7e1648

Browse files
committed
Speed up COALESCE significantly
``` before after (operation) Score Error Score Error Units coalesce_2_noop 75.949 ± 3.961 -> 0.010 ± 0.001 ns/op 99.9% coalesce_2_eager 99.299 ± 6.959 -> 4.292 ± 0.227 ns/op 95.7% coalesce_2_lazy 113.118 ± 5.747 -> 26.746 ± 0.954 ns/op 76.4% ``` We tend to advise folks that "COALESCE is faster than CASE", but, as of 8.16.0/elastic#112295 that wasn't the true. I was working with someone a few days ago to port a scripted_metric aggregation to ESQL and we saw COALESCE taking ~60% of the time. That won't do. The trouble is that CASE and COALESCE have to be *lazy*, meaning that operations like: ``` COALESCE(a, 1 / b) ``` should never emit a warning if `a` is not `null`, even if `b` is `0`. In 8.16/elastic#112295 CASE grew an optimization where it could operate non-lazily if it was flagged as "safe". This brings a similar optimization to COALESCE, see it above as "case_2_eager", a 95.7% improvement. It also brings and arguably more important optimization - entire-block execution for COALESCE. The schort version is that, if the first parameter of COALESCE returns no nulls we can return it without doing anything lazily. There are a few more cases, but the upshot is that COALESCE is pretyt much *free* in cases where long strings of results are `null` or not `null`. That's the `coalesce_2_noop` line. Finally, when there mixed null and non-null values we were using a single builder with some fairly inefficient paths. This specializes them per type and skips some slow null-checking where possible. That's the `coalesce_2_lazy` result, a more modest 76.4%.
1 parent 40794d0 commit c7e1648

File tree

3 files changed

+44
-29
lines changed
  • x-pack/plugin/esql
    • compute/src/main/java/org/elasticsearch/compute/operator
    • src
      • main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls
      • test/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls

3 files changed

+44
-29
lines changed

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/EvalOperator.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,11 @@ public Block eval(Page page) {
9696
public void close() {
9797

9898
}
99+
100+
@Override
101+
public String toString() {
102+
return "ConstantNull";
103+
}
99104
};
100105
}
101106

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.elasticsearch.common.io.stream.StreamInput;
1212
import org.elasticsearch.common.io.stream.StreamOutput;
1313
import org.elasticsearch.compute.data.Block;
14+
import org.elasticsearch.compute.operator.EvalOperator;
1415
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
1516
import org.elasticsearch.xpack.esql.core.expression.Expression;
1617
import org.elasticsearch.xpack.esql.core.expression.Expressions;
@@ -28,14 +29,14 @@
2829

2930
import java.io.IOException;
3031
import java.util.List;
31-
import java.util.Map;
3232
import java.util.stream.Stream;
3333

3434
import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN;
3535
import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT;
3636
import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_SHAPE;
3737
import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME;
3838
import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_NANOS;
39+
import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE;
3940
import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT;
4041
import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_SHAPE;
4142
import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER;
@@ -200,26 +201,16 @@ public boolean foldable() {
200201

201202
@Override
202203
public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
203-
return SUPPORTED.get(dataType()).toEvaluator(toEvaluator, children());
204+
return switch (dataType()) {
205+
case BOOLEAN -> CoalesceBooleanEvaluator.toEvaluator(toEvaluator, children());
206+
case DOUBLE, COUNTER_DOUBLE -> CoalesceDoubleEvaluator.toEvaluator(toEvaluator, children());
207+
case INTEGER, COUNTER_INTEGER -> CoalesceIntEvaluator.toEvaluator(toEvaluator, children());
208+
case LONG, DATE_NANOS, DATETIME, COUNTER_LONG, UNSIGNED_LONG -> CoalesceLongEvaluator.toEvaluator(toEvaluator, children());
209+
case KEYWORD, TEXT, SEMANTIC_TEXT, CARTESIAN_POINT, CARTESIAN_SHAPE, GEO_POINT, GEO_SHAPE, IP, VERSION ->
210+
CoalesceBytesRefEvaluator.toEvaluator(toEvaluator, children());
211+
case NULL -> EvalOperator.CONSTANT_NULL_FACTORY;
212+
case UNSUPPORTED, SHORT, BYTE, DATE_PERIOD, OBJECT, DOC_DATA_TYPE, SOURCE, TIME_DURATION, FLOAT, HALF_FLOAT, TSID_DATA_TYPE,
213+
SCALED_FLOAT, PARTIAL_AGG -> throw new UnsupportedOperationException("can't be coalesced");
214+
};
204215
}
205-
206-
interface BuildCoalesce {
207-
ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator, List<Expression> children);
208-
}
209-
210-
private static final Map<DataType, BuildCoalesce> SUPPORTED = Map.ofEntries(
211-
// We intend to support all types here.
212-
Map.entry(BOOLEAN, CoalesceBooleanEvaluator::toEvaluator),
213-
Map.entry(CARTESIAN_POINT, CoalesceBytesRefEvaluator::toEvaluator),
214-
Map.entry(CARTESIAN_SHAPE, CoalesceBytesRefEvaluator::toEvaluator),
215-
Map.entry(DATE_NANOS, CoalesceLongEvaluator::toEvaluator),
216-
Map.entry(DATETIME, CoalesceLongEvaluator::toEvaluator),
217-
Map.entry(GEO_POINT, CoalesceBytesRefEvaluator::toEvaluator),
218-
Map.entry(GEO_SHAPE, CoalesceBytesRefEvaluator::toEvaluator),
219-
Map.entry(INTEGER, CoalesceIntEvaluator::toEvaluator),
220-
Map.entry(IP, CoalesceBytesRefEvaluator::toEvaluator),
221-
Map.entry(KEYWORD, CoalesceBytesRefEvaluator::toEvaluator),
222-
Map.entry(LONG, CoalesceLongEvaluator::toEvaluator),
223-
Map.entry(VERSION, CoalesceBytesRefEvaluator::toEvaluator)
224-
);
225216
}

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/CoalesceTests.java

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import static org.elasticsearch.xpack.esql.EsqlTestUtils.randomLiteral;
4848
import static org.hamcrest.Matchers.equalTo;
4949
import static org.hamcrest.Matchers.is;
50+
import static org.hamcrest.Matchers.nullValue;
5051
import static org.hamcrest.Matchers.sameInstance;
5152

5253
public class CoalesceTests extends AbstractScalarFunctionTestCase {
@@ -57,7 +58,7 @@ public CoalesceTests(@Name("TestCase") Supplier<TestCaseSupplier.TestCase> testC
5758
@ParametersFactory
5859
public static Iterable<Object[]> parameters() {
5960
List<TestCaseSupplier> noNullsSuppliers = new ArrayList<>();
60-
VaragsTestCaseBuilder builder = new VaragsTestCaseBuilder(type -> "CoalesceEager");
61+
VaragsTestCaseBuilder builder = new VaragsTestCaseBuilder(type -> "Coalesce" + type + "Eager");
6162
builder.expectString(strings -> strings.filter(v -> v != null).findFirst());
6263
builder.expectLong(longs -> longs.filter(v -> v != null).findFirst());
6364
builder.expectInt(ints -> ints.filter(v -> v != null).findFirst());
@@ -72,7 +73,7 @@ public static Iterable<Object[]> parameters() {
7273
new TestCaseSupplier.TypedData(first, DataType.IP, "first"),
7374
new TestCaseSupplier.TypedData(second, DataType.IP, "second")
7475
),
75-
"CoalesceEagerEvaluator[values=[Attribute[channel=0], Attribute[channel=1]]]",
76+
"CoalesceBytesRefEagerEvaluator[values=[Attribute[channel=0], Attribute[channel=1]]]",
7677
DataType.IP,
7778
equalTo(first == null ? second : first)
7879
);
@@ -87,7 +88,7 @@ public static Iterable<Object[]> parameters() {
8788
new TestCaseSupplier.TypedData(first, DataType.VERSION, "first"),
8889
new TestCaseSupplier.TypedData(second, DataType.VERSION, "second")
8990
),
90-
"CoalesceEagerEvaluator[values=[Attribute[channel=0], Attribute[channel=1]]]",
91+
"CoalesceBytesRefEagerEvaluator[values=[Attribute[channel=0], Attribute[channel=1]]]",
9192
DataType.VERSION,
9293
equalTo(first == null ? second : first)
9394
);
@@ -100,7 +101,7 @@ public static Iterable<Object[]> parameters() {
100101
new TestCaseSupplier.TypedData(firstDate, DataType.DATETIME, "first"),
101102
new TestCaseSupplier.TypedData(secondDate, DataType.DATETIME, "second")
102103
),
103-
"CoalesceEagerEvaluator[values=[Attribute[channel=0], Attribute[channel=1]]]",
104+
"CoalesceLongEagerEvaluator[values=[Attribute[channel=0], Attribute[channel=1]]]",
104105
DataType.DATETIME,
105106
equalTo(firstDate == null ? secondDate : firstDate)
106107
);
@@ -113,7 +114,7 @@ public static Iterable<Object[]> parameters() {
113114
new TestCaseSupplier.TypedData(firstDate, DataType.DATE_NANOS, "first"),
114115
new TestCaseSupplier.TypedData(secondDate, DataType.DATE_NANOS, "second")
115116
),
116-
"CoalesceEagerEvaluator[values=[Attribute[channel=0], Attribute[channel=1]]]",
117+
"CoalesceLongEagerEvaluator[values=[Attribute[channel=0], Attribute[channel=1]]]",
117118
DataType.DATE_NANOS,
118119
equalTo(firstDate == null ? secondDate : firstDate)
119120
);
@@ -137,6 +138,20 @@ public static Iterable<Object[]> parameters() {
137138
suppliers.add(new TestCaseSupplier(nullCaseName(s, nullUpTo, true), types, () -> nullCase(s.get(), finalNullUpTo, true)));
138139
}
139140
}
141+
suppliers.add(
142+
new TestCaseSupplier(
143+
List.of(DataType.NULL, DataType.NULL),
144+
() -> new TestCaseSupplier.TestCase(
145+
List.of(
146+
new TestCaseSupplier.TypedData(null, DataType.NULL, "first"),
147+
new TestCaseSupplier.TypedData(null, DataType.NULL, "second")
148+
),
149+
"ConstantNull",
150+
DataType.NULL,
151+
nullValue()
152+
)
153+
)
154+
);
140155

141156
return parameterSuppliersFromTypedData(suppliers);
142157
}
@@ -175,7 +190,7 @@ protected static void addSpatialCombinations(List<TestCaseSupplier> suppliers) {
175190
TestCaseSupplier.testCaseSupplier(
176191
leftDataSupplier,
177192
rightDataSupplier,
178-
(l, r) -> equalTo("CoalesceEagerEvaluator[values=[Attribute[channel=0], Attribute[channel=1]]]"),
193+
(l, r) -> equalTo("CoalesceBytesRefEagerEvaluator[values=[Attribute[channel=0], Attribute[channel=1]]]"),
179194
dataType,
180195
(l, r) -> l
181196
)
@@ -243,7 +258,11 @@ public void testCoalesceNotNullable() {
243258
sub.add(between(0, sub.size()), randomLiteral(sub.get(sub.size() - 1).dataType()));
244259
Coalesce exp = build(Source.EMPTY, sub);
245260
// Known not to be nullable because it contains a non-null literal
246-
assertThat(exp.nullable(), equalTo(Nullability.FALSE));
261+
if (testCase.expectedType() == DataType.NULL) {
262+
assertThat(exp.nullable(), equalTo(Nullability.UNKNOWN));
263+
} else {
264+
assertThat(exp.nullable(), equalTo(Nullability.FALSE));
265+
}
247266
}
248267

249268
/**

0 commit comments

Comments
 (0)