Skip to content

Commit 53f6aef

Browse files
ES|QL: add local optimizations for constant_keyword
1 parent 6edc76d commit 53f6aef

File tree

6 files changed

+344
-0
lines changed

6 files changed

+344
-0
lines changed

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferIsNotNull;
1313
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferNonNullAggConstraint;
1414
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.LocalPropagateEmptyRelation;
15+
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceConstantKeywords;
1516
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceMissingFieldWithNull;
1617
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceTopNWithLimitAndSort;
1718
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
@@ -38,6 +39,7 @@ public class LocalLogicalPlanOptimizer extends ParameterizedRuleExecutor<Logical
3839
new Batch<>(
3940
"Local rewrite",
4041
Limiter.ONCE,
42+
new ReplaceConstantKeywords(),
4143
new ReplaceTopNWithLimitAndSort(),
4244
new ReplaceMissingFieldWithNull(),
4345
new InferIsNotNull(),
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.esql.optimizer.rules.logical.local;
9+
10+
import org.elasticsearch.index.IndexMode;
11+
import org.elasticsearch.xpack.esql.core.expression.Alias;
12+
import org.elasticsearch.xpack.esql.core.expression.Attribute;
13+
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
14+
import org.elasticsearch.xpack.esql.core.expression.Literal;
15+
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
16+
import org.elasticsearch.xpack.esql.optimizer.LocalLogicalOptimizerContext;
17+
import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
18+
import org.elasticsearch.xpack.esql.plan.logical.Eval;
19+
import org.elasticsearch.xpack.esql.plan.logical.Filter;
20+
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
21+
import org.elasticsearch.xpack.esql.plan.logical.OrderBy;
22+
import org.elasticsearch.xpack.esql.plan.logical.Project;
23+
import org.elasticsearch.xpack.esql.plan.logical.RegexExtract;
24+
import org.elasticsearch.xpack.esql.plan.logical.TopN;
25+
import org.elasticsearch.xpack.esql.rule.ParameterizedRule;
26+
27+
import java.util.ArrayList;
28+
import java.util.HashMap;
29+
import java.util.List;
30+
import java.util.Map;
31+
32+
/**
33+
* Look for any constant_keyword fields used in the plan and replaces them with their actual value.
34+
*/
35+
public class ReplaceConstantKeywords extends ParameterizedRule<LogicalPlan, LogicalPlan, LocalLogicalOptimizerContext> {
36+
37+
@Override
38+
public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLogicalOptimizerContext) {
39+
Map<Attribute, Alias> attrToValue = new HashMap<>();
40+
plan.forEachUp(EsRelation.class, esRelation -> {
41+
if (esRelation.indexMode() == IndexMode.STANDARD) {
42+
for (Attribute attribute : esRelation.output()) {
43+
var val = localLogicalOptimizerContext.searchStats().constantValue(attribute.name());
44+
if (val != null) {
45+
attrToValue.put(
46+
attribute,
47+
new Alias(attribute.source(), attribute.name(), Literal.of(attribute, val), attribute.id())
48+
);
49+
}
50+
}
51+
}
52+
});
53+
if (attrToValue.isEmpty()) {
54+
return plan;
55+
}
56+
return plan.transformUp(p -> replaceAttributes(p, attrToValue));
57+
}
58+
59+
private LogicalPlan replaceAttributes(LogicalPlan plan, Map<Attribute, Alias> attrToValue) {
60+
if (plan instanceof EsRelation relation) {
61+
// For any missing field, place an Eval right after the EsRelation to assign constant values to that attribute (using the same
62+
// name
63+
// id!), thus avoiding that InsertFieldExtrations inserts a field extraction later.
64+
// This means that an EsRelation[field1, field2, field3] where field1 and field 3 are constants, will be replaced by
65+
// Project[field1, field2, field3] <- keeps the ordering intact
66+
// \_Eval[field1 = value, field3 = value]
67+
// \_EsRelation[field1, field2, field3]
68+
List<Attribute> relationOutput = relation.output();
69+
List<NamedExpression> newProjections = new ArrayList<>(relationOutput.size());
70+
for (int i = 0, size = relationOutput.size(); i < size; i++) {
71+
Attribute attr = relationOutput.get(i);
72+
Alias alias = attrToValue.get(attr);
73+
newProjections.add(alias == null ? attr : alias);
74+
}
75+
76+
Eval eval = new Eval(plan.source(), relation, new ArrayList<>(attrToValue.values()));
77+
// This projection is redundant if there's another projection downstream (and no commands depend on the order until we hit it).
78+
return new Project(plan.source(), eval, newProjections.stream().map(NamedExpression::toAttribute).toList());
79+
}
80+
81+
if (plan instanceof Eval
82+
|| plan instanceof Filter
83+
|| plan instanceof OrderBy
84+
|| plan instanceof RegexExtract
85+
|| plan instanceof TopN) {
86+
return plan.transformExpressionsOnlyUp(FieldAttribute.class, f -> {
87+
Alias alias = attrToValue.get(f);
88+
return alias != null ? alias.child() : f;
89+
});
90+
}
91+
92+
return plan;
93+
}
94+
95+
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,39 @@ public boolean canUseEqualityOnSyntheticSourceDelegate(String name, String value
310310
return true;
311311
}
312312

313+
public String constantValue(String name) {
314+
String val = null;
315+
for (SearchExecutionContext ctx : contexts) {
316+
MappedFieldType f = ctx.getFieldType(name);
317+
if (f == null) {
318+
return null;
319+
}
320+
if (f instanceof ConstantFieldType cf) {
321+
var fetcher = cf.valueFetcher(ctx, null);
322+
String thisVal = null;
323+
try {
324+
// since the value is a constant, the doc _should_ be irrelevant
325+
List<Object> vals = fetcher.fetchValues(null, -1, null);
326+
Object objVal = vals.size() == 1 ? vals.get(0) : null;
327+
// we are considering only string values for now, since this can return "strange" things,
328+
// see IndexModeFieldType
329+
thisVal = objVal instanceof String ? (String) objVal : null;
330+
} catch (IOException iox) {}
331+
332+
if (thisVal == null) {
333+
// Value not yet set
334+
return null;
335+
}
336+
if (val == null) {
337+
val = thisVal;
338+
} else if (thisVal.equals(val) == false) {
339+
return null;
340+
}
341+
}
342+
}
343+
return val;
344+
}
345+
313346
private interface DocCountTester {
314347
Boolean test(LeafReader leafReader) throws IOException;
315348
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,14 @@ public interface SearchStats {
3939

4040
boolean canUseEqualityOnSyntheticSourceDelegate(String name, String value);
4141

42+
/**
43+
* Returns the value for a field if it's a constant (eg. a constant_keyword with only one value for the involved indices).
44+
* NULL if the field is not a constant.
45+
*/
46+
default String constantValue(String name) {
47+
return null;
48+
}
49+
4250
/**
4351
* When there are no search stats available, for example when there are no search contexts, we have static results.
4452
*/

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.elasticsearch.xpack.esql.core.expression.Expressions;
4343
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
4444
import org.elasticsearch.xpack.esql.core.expression.Literal;
45+
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
4546
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
4647
import org.elasticsearch.xpack.esql.core.tree.Source;
4748
import org.elasticsearch.xpack.esql.core.type.DataType;
@@ -69,6 +70,7 @@
6970
import org.elasticsearch.xpack.esql.plan.physical.FilterExec;
7071
import org.elasticsearch.xpack.esql.plan.physical.LimitExec;
7172
import org.elasticsearch.xpack.esql.plan.physical.LocalSourceExec;
73+
import org.elasticsearch.xpack.esql.plan.physical.MvExpandExec;
7274
import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;
7375
import org.elasticsearch.xpack.esql.plan.physical.ProjectExec;
7476
import org.elasticsearch.xpack.esql.plan.physical.TimeSeriesAggregateExec;
@@ -150,6 +152,18 @@ public boolean isSingleValue(String field) {
150152
}
151153
};
152154

155+
private final SearchStats CONSTANT_K_STATS = new TestSearchStats() {
156+
@Override
157+
public boolean isSingleValue(String field) {
158+
return true;
159+
}
160+
161+
@Override
162+
public String constantValue(String name) {
163+
return name.startsWith("constant_keyword") ? "foo" : null;
164+
}
165+
};
166+
153167
@ParametersFactory(argumentFormatting = PARAM_FORMATTING)
154168
public static List<Object[]> readScriptSpec() {
155169
return settings().stream().map(t -> {
@@ -1856,6 +1870,100 @@ public void testPushDownFieldExtractToTimeSeriesSource() {
18561870
assertTrue(timeSeriesSource.attrs().stream().noneMatch(EsQueryExec::isSourceAttribute));
18571871
}
18581872

1873+
/**
1874+
* LimitExec[1000[INTEGER]]
1875+
* \_ExchangeExec[[!alias_integer, boolean{f}#4, byte{f}#5, constant_keyword-foo{f}#6, date{f}#7, date_nanos{f}#8, double{f}#9,
1876+
* float{f}#10, half_float{f}#11, integer{f}#13, ip{f}#14, keyword{f}#15, long{f}#16, scaled_float{f}#12, !semantic_text,
1877+
* short{f}#18, text{f}#19, unsigned_long{f}#17, version{f}#20, wildcard{f}#21], false]
1878+
* \_ProjectExec[[!alias_integer, boolean{f}#4, byte{f}#5, constant_keyword-foo{r}#6, date{f}#7, date_nanos{f}#8, double{f}#9,
1879+
* float{f}#10, half_float{f}#11, integer{f}#13, ip{f}#14, keyword{f}#15, long{f}#16, scaled_float{f}#12,
1880+
* !semantic_text, short{f}#18, text{f}#19, unsigned_long{f}#17, version{f}#20, wildcard{f}#21]]
1881+
* \_FieldExtractExec[!alias_integer, boolean{f}#4, byte{f}#5, date{f}#7, ]
1882+
* \_EvalExec[[[66 6f 6f][KEYWORD] AS constant_keyword-foo]]
1883+
* \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#23], limit[1000], sort[] estimatedRowSize[412]
1884+
*/
1885+
public void testConstantKeywordWithMatchingFilter() {
1886+
String queryText = """
1887+
from test
1888+
| where `constant_keyword-foo` == "foo"
1889+
""";
1890+
var analyzer = makeAnalyzer("mapping-all-types.json");
1891+
var plan = plannerOptimizer.plan(queryText, CONSTANT_K_STATS, analyzer);
1892+
1893+
var limit = as(plan, LimitExec.class);
1894+
var exchange = as(limit.child(), ExchangeExec.class);
1895+
var project = as(exchange.child(), ProjectExec.class);
1896+
var field = as(project.child(), FieldExtractExec.class);
1897+
var eval = as(field.child(), EvalExec.class);
1898+
var query = as(eval.child(), EsQueryExec.class);
1899+
assertThat(as(query.limit(), Literal.class).value(), is(1000));
1900+
assertNull(query.query());
1901+
assertFalse(field.attributesToExtract().stream().map(NamedExpression::name).anyMatch(x -> x.equals("constant_keyword-foo")));
1902+
}
1903+
1904+
/**
1905+
* LimitExec[1000[INTEGER]]
1906+
* \_ExchangeExec[[!alias_integer, boolean{f}#4, byte{f}#5, constant_keyword-foo{f}#6, date{f}#7, date_nanos{f}#8, double{f}#9,
1907+
* float{f}#10, half_float{f}#11, integer{f}#13, ip{f}#14, keyword{f}#15, long{f}#16, scaled_float{f}#12, !semantic_text,
1908+
* short{f}#18, text{f}#19, unsigned_long{f}#17, version{f}#20, wildcard{f}#21], false]
1909+
* \_LocalSourceExec[[!alias_integer, boolean{f}#4, byte{f}#5, constant_keyword-foo{f}#6, date{f}#7, date_nanos{f}#8, double{f}#9,
1910+
* float{f}#10, half_float{f}#11, integer{f}#13, ip{f}#14, keyword{f}#15, long{f}#16, scaled_float{f}#12, !semantic_text,
1911+
* short{f}#18, text{f}#19, unsigned_long{f}#17, version{f}#20, wildcard{f}#21], EMPTY]
1912+
*/
1913+
public void testConstantKeywordWithNonMatchingFilter() {
1914+
String queryText = """
1915+
from test
1916+
| where `constant_keyword-foo` == "non-matching"
1917+
""";
1918+
var analyzer = makeAnalyzer("mapping-all-types.json");
1919+
var plan = plannerOptimizer.plan(queryText, CONSTANT_K_STATS, analyzer);
1920+
1921+
var limit = as(plan, LimitExec.class);
1922+
var exchange = as(limit.child(), ExchangeExec.class);
1923+
var source = as(exchange.child(), LocalSourceExec.class);
1924+
}
1925+
1926+
/**
1927+
* LimitExec[1000[INTEGER]]
1928+
* \_ExchangeExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{r}#25, date{f}#9, date_nanos{f}#10, double{f}#1
1929+
* 1, float{f}#12, half_float{f}#13, integer{f}#15, ip{f}#16, keyword{f}#17, long{f}#18, scaled_float{f}#14,
1930+
* !semantic_text, short{f}#20, text{f}#21, unsigned_long{f}#19, version{f}#22, wildcard{f}#23], false]
1931+
* \_ProjectExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{r}#25, date{f}#9, date_nanos{f}#10, double{f}#1
1932+
* 1, float{f}#12, half_float{f}#13, integer{f}#15, ip{f}#16, keyword{f}#17, long{f}#18, scaled_float{f}#14,
1933+
* !semantic_text, short{f}#20, text{f}#21, unsigned_long{f}#19, version{f}#22, wildcard{f}#23]]
1934+
* \_LimitExec[1000[INTEGER]]
1935+
* \_FilterExec[constant_keyword-foo{r}#25 == [66 6f 6f][KEYWORD]]
1936+
* \_MvExpandExec[constant_keyword-foo{f}#8,constant_keyword-foo{r}#25]
1937+
* \_ProjectExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{r}#8, date{f}#9, date_nanos{f}#10,
1938+
* double{f}#11, float{f}#12, half_float{f}#13, integer{f}#15, ip{f}#16, keyword{f}#17, long{f}#18, scaled_float{f}#14,
1939+
* !semantic_text, short{f}#20, text{f}#21, unsigned_long{f}#19, version{f}#22, wildcard{f}#23]]
1940+
* \_FieldExtractExec[!alias_integer, boolean{f}#6, byte{f}#7, date{f}#9, ..]
1941+
* \_EvalExec[[[66 6f 6f][KEYWORD] AS constant_keyword-foo]]
1942+
* \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#26], limit[], sort[] estimatedRowSize[412]
1943+
*/
1944+
public void testConstantKeywordExpandFilter() {
1945+
String queryText = """
1946+
from test
1947+
| mv_expand `constant_keyword-foo`
1948+
| where `constant_keyword-foo` == "foo"
1949+
""";
1950+
var analyzer = makeAnalyzer("mapping-all-types.json");
1951+
var plan = plannerOptimizer.plan(queryText, CONSTANT_K_STATS, analyzer);
1952+
1953+
var limit = as(plan, LimitExec.class);
1954+
var exchange = as(limit.child(), ExchangeExec.class);
1955+
var project = as(exchange.child(), ProjectExec.class);
1956+
var limit2 = as(project.child(), LimitExec.class);
1957+
var filter = as(limit2.child(), FilterExec.class);
1958+
var expand = as(filter.child(), MvExpandExec.class);
1959+
var project2 = as(expand.child(), ProjectExec.class);
1960+
var field = as(project2.child(), FieldExtractExec.class);
1961+
var eval = as(field.child(), EvalExec.class);
1962+
var query = as(eval.child(), EsQueryExec.class);
1963+
assertNull(query.query());
1964+
assertFalse(field.attributesToExtract().stream().map(NamedExpression::name).anyMatch(x -> x.equals("constant_keyword-foo")));
1965+
}
1966+
18591967
private QueryBuilder wrapWithSingleQuery(String query, QueryBuilder inner, String fieldName, Source source) {
18601968
return FilterTests.singleValueQuery(query, inner, fieldName, source);
18611969
}

0 commit comments

Comments
 (0)