Skip to content

Commit 174e9ae

Browse files
ES|QL: add local optimizations for constant_keyword (elastic#127549) (elastic#128294)
1 parent 17646bd commit 174e9ae

File tree

7 files changed

+467
-8
lines changed

7 files changed

+467
-8
lines changed

docs/changelog/127549.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 127549
2+
summary: Add local optimizations for `constant_keyword`
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalLogicalPlanOptimizer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferIsNotNull;
1313
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferNonNullAggConstraint;
1414
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.LocalPropagateEmptyRelation;
15-
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceMissingFieldWithNull;
15+
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceFieldWithConstantOrNull;
1616
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceTopNWithLimitAndSort;
1717
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
1818
import org.elasticsearch.xpack.esql.rule.ParameterizedRuleExecutor;
@@ -43,7 +43,7 @@ protected List<Batch<LogicalPlan>> batches() {
4343
"Local rewrite",
4444
Limiter.ONCE,
4545
new ReplaceTopNWithLimitAndSort(),
46-
new ReplaceMissingFieldWithNull(),
46+
new ReplaceFieldWithConstantOrNull(),
4747
new InferIsNotNull(),
4848
new InferNonNullAggConstraint()
4949
);
Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.elasticsearch.xpack.esql.core.expression.Alias;
1313
import org.elasticsearch.xpack.esql.core.expression.Attribute;
1414
import org.elasticsearch.xpack.esql.core.expression.AttributeSet;
15+
import org.elasticsearch.xpack.esql.core.expression.Expression;
1516
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
1617
import org.elasticsearch.xpack.esql.core.expression.Literal;
1718
import org.elasticsearch.xpack.esql.core.expression.NamedExpression;
@@ -28,22 +29,24 @@
2829
import org.elasticsearch.xpack.esql.rule.ParameterizedRule;
2930

3031
import java.util.ArrayList;
32+
import java.util.HashMap;
3133
import java.util.List;
3234
import java.util.Map;
3335
import java.util.function.Predicate;
3436

3537
/**
36-
* Look for any fields used in the plan that are missing locally and replace them with null.
38+
* Look for any fields used in the plan that are missing and replaces them with null or look for fields that are constant.
3739
* This should minimize the plan execution, in the best scenario skipping its execution all together.
3840
*/
39-
public class ReplaceMissingFieldWithNull extends ParameterizedRule<LogicalPlan, LogicalPlan, LocalLogicalOptimizerContext> {
41+
public class ReplaceFieldWithConstantOrNull extends ParameterizedRule<LogicalPlan, LogicalPlan, LocalLogicalOptimizerContext> {
4042

4143
@Override
4244
public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLogicalOptimizerContext) {
4345
// Fields from lookup indices don't need to be present on the node, and our search stats don't include them, anyway. Ignore them.
4446
var lookupFieldsBuilder = AttributeSet.builder();
47+
Map<Attribute, Expression> attrToConstant = new HashMap<>();
4548
plan.forEachUp(EsRelation.class, esRelation -> {
46-
// Looking only for indices in LOOKUP mode is correct: during parsing, we assign the expected mode and even if a lookup index
49+
// Looking for indices in LOOKUP mode is correct: during parsing, we assign the expected mode and even if a lookup index
4750
// is used in the FROM command, it will not be marked with LOOKUP mode there - but STANDARD.
4851
// It seems like we could instead just look for JOINs and walk down their right hand side to find lookup fields - but this does
4952
// not work as this rule also gets called just on the right hand side of a JOIN, which means that we don't always know that
@@ -52,17 +55,33 @@ public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLog
5255
if (esRelation.indexMode() == IndexMode.LOOKUP) {
5356
lookupFieldsBuilder.addAll(esRelation.output());
5457
}
58+
// find constant values only in the main indices
59+
else if (esRelation.indexMode() == IndexMode.STANDARD) {
60+
for (Attribute attribute : esRelation.output()) {
61+
if (attribute instanceof FieldAttribute fa) {
62+
// Do not use the attribute name, this can deviate from the field name for union types; use fieldName() instead.
63+
var val = localLogicalOptimizerContext.searchStats().constantValue(fa.fieldName());
64+
if (val != null) {
65+
attrToConstant.put(attribute, Literal.of(attribute, val));
66+
}
67+
}
68+
}
69+
}
5570
});
5671

5772
// Do not use the attribute name, this can deviate from the field name for union types; use fieldName() instead.
5873
// Also retain fields from lookup indices because we do not have stats for these.
5974
Predicate<FieldAttribute> shouldBeRetained = f -> (localLogicalOptimizerContext.searchStats().exists(f.fieldName())
6075
|| lookupFieldsBuilder.contains(f));
6176

62-
return plan.transformUp(p -> missingToNull(p, shouldBeRetained));
77+
return plan.transformUp(p -> replaceWithNullOrConstant(p, shouldBeRetained, attrToConstant));
6378
}
6479

65-
private LogicalPlan missingToNull(LogicalPlan plan, Predicate<FieldAttribute> shouldBeRetained) {
80+
private LogicalPlan replaceWithNullOrConstant(
81+
LogicalPlan plan,
82+
Predicate<FieldAttribute> shouldBeRetained,
83+
Map<Attribute, Expression> attrToConstant
84+
) {
6685
if (plan instanceof EsRelation relation) {
6786
// For any missing field, place an Eval right after the EsRelation to assign null values to that attribute (using the same name
6887
// id!), thus avoiding that InsertFieldExtrations inserts a field extraction later.
@@ -116,7 +135,13 @@ private LogicalPlan missingToNull(LogicalPlan plan, Predicate<FieldAttribute> sh
116135
|| plan instanceof OrderBy
117136
|| plan instanceof RegexExtract
118137
|| plan instanceof TopN) {
119-
return plan.transformExpressionsOnlyUp(FieldAttribute.class, f -> shouldBeRetained.test(f) ? f : Literal.of(f, null));
138+
return plan.transformExpressionsOnlyUp(FieldAttribute.class, f -> {
139+
if (attrToConstant.containsKey(f)) {// handle constant values field and use the value itself instead
140+
return attrToConstant.get(f);
141+
} else {// handle missing fields and replace them with null
142+
return shouldBeRetained.test(f) ? f : Literal.of(f, null);
143+
}
144+
});
120145
}
121146

122147
return plan;

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchContextStats.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,41 @@ public boolean canUseEqualityOnSyntheticSourceDelegate(String name, String value
305305
return true;
306306
}
307307

308+
public String constantValue(String name) {
309+
String val = null;
310+
for (SearchExecutionContext ctx : contexts) {
311+
MappedFieldType f = ctx.getFieldType(name);
312+
if (f == null) {
313+
return null;
314+
}
315+
if (f instanceof ConstantFieldType cf) {
316+
var fetcher = cf.valueFetcher(ctx, null);
317+
String thisVal = null;
318+
try {
319+
// since the value is a constant, the doc _should_ be irrelevant
320+
List<Object> vals = fetcher.fetchValues(null, -1, null);
321+
Object objVal = vals.size() == 1 ? vals.get(0) : null;
322+
// we are considering only string values for now, since this can return "strange" things,
323+
// see IndexModeFieldType
324+
thisVal = objVal instanceof String ? (String) objVal : null;
325+
} catch (IOException iox) {}
326+
327+
if (thisVal == null) {
328+
// Value not yet set
329+
return null;
330+
}
331+
if (val == null) {
332+
val = thisVal;
333+
} else if (thisVal.equals(val) == false) {
334+
return null;
335+
}
336+
} else {
337+
return null;
338+
}
339+
}
340+
return val;
341+
}
342+
308343
private interface DocCountTester {
309344
Boolean test(LeafReader leafReader) throws IOException;
310345
}

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,14 @@ public interface SearchStats {
3939

4040
boolean canUseEqualityOnSyntheticSourceDelegate(String name, String value);
4141

42+
/**
43+
* Returns the value for a field if it's a constant (eg. a constant_keyword with only one value for the involved indices).
44+
* NULL if the field is not a constant.
45+
*/
46+
default String constantValue(String name) {
47+
return null;
48+
}
49+
4250
/**
4351
* When there are no search stats available, for example when there are no search contexts, we have static results.
4452
*/

x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
5656
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
5757
import org.elasticsearch.xpack.esql.plan.physical.AggregateExec;
58+
import org.elasticsearch.xpack.esql.plan.physical.DissectExec;
5859
import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec;
5960
import org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec;
6061
import org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec.Stat;
@@ -65,6 +66,7 @@
6566
import org.elasticsearch.xpack.esql.plan.physical.FilterExec;
6667
import org.elasticsearch.xpack.esql.plan.physical.LimitExec;
6768
import org.elasticsearch.xpack.esql.plan.physical.LocalSourceExec;
69+
import org.elasticsearch.xpack.esql.plan.physical.MvExpandExec;
6870
import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;
6971
import org.elasticsearch.xpack.esql.plan.physical.ProjectExec;
7072
import org.elasticsearch.xpack.esql.plan.physical.TopNExec;
@@ -141,6 +143,18 @@ public boolean isSingleValue(String field) {
141143
}
142144
};
143145

146+
private final SearchStats CONSTANT_K_STATS = new TestSearchStats() {
147+
@Override
148+
public boolean isSingleValue(String field) {
149+
return true;
150+
}
151+
152+
@Override
153+
public String constantValue(String name) {
154+
return name.startsWith("constant_keyword") ? "foo" : null;
155+
}
156+
};
157+
144158
@ParametersFactory(argumentFormatting = PARAM_FORMATTING)
145159
public static List<Object[]> readScriptSpec() {
146160
return settings().stream().map(t -> {
@@ -1736,6 +1750,113 @@ public void testMatchFunctionWithPushableDisjunction() {
17361750
assertThat(esQuery.query().toString(), equalTo(expected.toString()));
17371751
}
17381752

1753+
/**
1754+
* LimitExec[1000[INTEGER]]
1755+
* \_ExchangeExec[[!alias_integer, boolean{f}#415, byte{f}#416, constant_keyword-foo{f}#417, date{f}#418, date_nanos{f}#419,
1756+
* double{f}#420, float{f}#421, half_float{f}#422, integer{f}#424, ip{f}#425, keyword{f}#426, long{f}#427, scaled_float{f}#423,
1757+
* !semantic_text, short{f}#429, text{f}#430, unsigned_long{f}#428, version{f}#431, wildcard{f}#432], false]
1758+
* \_ProjectExec[[!alias_integer, boolean{f}#415, byte{f}#416, constant_keyword-foo{f}#417, date{f}#418, date_nanos{f}#419,
1759+
* double{f}#420, float{f}#421, half_float{f}#422, integer{f}#424, ip{f}#425, keyword{f}#426, long{f}#427, scaled_float{f}#423,
1760+
* !semantic_text, short{f}#429, text{f}#430, unsigned_long{f}#428, version{f}#431, wildcard{f}#432]]
1761+
* \_FieldExtractExec[!alias_integer, boolean{f}#415, byte{f}#416, consta..]
1762+
* \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#434], limit[1000], sort[] estimatedRowSize[412]
1763+
*/
1764+
public void testConstantKeywordWithMatchingFilter() {
1765+
String queryText = """
1766+
from test
1767+
| where `constant_keyword-foo` == "foo"
1768+
""";
1769+
var analyzer = makeAnalyzer("mapping-all-types.json", new EnrichResolution());
1770+
var plan = plannerOptimizer.plan(queryText, CONSTANT_K_STATS, analyzer);
1771+
1772+
var limit = as(plan, LimitExec.class);
1773+
var exchange = as(limit.child(), ExchangeExec.class);
1774+
var project = as(exchange.child(), ProjectExec.class);
1775+
var field = as(project.child(), FieldExtractExec.class);
1776+
var query = as(field.child(), EsQueryExec.class);
1777+
assertThat(as(query.limit(), Literal.class).value(), is(1000));
1778+
assertNull(query.query());
1779+
}
1780+
1781+
/**
1782+
* LimitExec[1000[INTEGER]]
1783+
* \_ExchangeExec[[!alias_integer, boolean{f}#4, byte{f}#5, constant_keyword-foo{f}#6, date{f}#7, date_nanos{f}#8, double{f}#9,
1784+
* float{f}#10, half_float{f}#11, integer{f}#13, ip{f}#14, keyword{f}#15, long{f}#16, scaled_float{f}#12, !semantic_text,
1785+
* short{f}#18, text{f}#19, unsigned_long{f}#17, version{f}#20, wildcard{f}#21], false]
1786+
* \_LocalSourceExec[[!alias_integer, boolean{f}#4, byte{f}#5, constant_keyword-foo{f}#6, date{f}#7, date_nanos{f}#8, double{f}#9,
1787+
* float{f}#10, half_float{f}#11, integer{f}#13, ip{f}#14, keyword{f}#15, long{f}#16, scaled_float{f}#12, !semantic_text,
1788+
* short{f}#18, text{f}#19, unsigned_long{f}#17, version{f}#20, wildcard{f}#21], EMPTY]
1789+
*/
1790+
public void testConstantKeywordWithNonMatchingFilter() {
1791+
String queryText = """
1792+
from test
1793+
| where `constant_keyword-foo` == "non-matching"
1794+
""";
1795+
var analyzer = makeAnalyzer("mapping-all-types.json", new EnrichResolution());
1796+
var plan = plannerOptimizer.plan(queryText, CONSTANT_K_STATS, analyzer);
1797+
1798+
var limit = as(plan, LimitExec.class);
1799+
var exchange = as(limit.child(), ExchangeExec.class);
1800+
var source = as(exchange.child(), LocalSourceExec.class);
1801+
}
1802+
1803+
/**
1804+
* LimitExec[1000[INTEGER]]
1805+
* \_ExchangeExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{r}#25, date{f}#9, date_nanos{f}#10, double{f}#1...
1806+
* \_ProjectExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{r}#25, date{f}#9, date_nanos{f}#10, double{f}#1...
1807+
* \_FieldExtractExec[!alias_integer, boolean{f}#6, byte{f}#7, date{f}#9,
1808+
* \_LimitExec[1000[INTEGER]]
1809+
* \_FilterExec[constant_keyword-foo{r}#25 == [66 6f 6f][KEYWORD]]
1810+
* \_MvExpandExec[constant_keyword-foo{f}#8,constant_keyword-foo{r}#25]
1811+
* \_FieldExtractExec[constant_keyword-foo{f}#8]
1812+
* \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#26], limit[], sort[] estimatedRowSize[412]
1813+
*/
1814+
public void testConstantKeywordExpandFilter() {
1815+
String queryText = """
1816+
from test
1817+
| mv_expand `constant_keyword-foo`
1818+
| where `constant_keyword-foo` == "foo"
1819+
""";
1820+
var analyzer = makeAnalyzer("mapping-all-types.json", new EnrichResolution());
1821+
var plan = plannerOptimizer.plan(queryText, CONSTANT_K_STATS, analyzer);
1822+
1823+
var limit = as(plan, LimitExec.class);
1824+
var exchange = as(limit.child(), ExchangeExec.class);
1825+
var project = as(exchange.child(), ProjectExec.class);
1826+
var fieldExtract = as(project.child(), FieldExtractExec.class);
1827+
var limit2 = as(fieldExtract.child(), LimitExec.class);
1828+
var filter = as(limit2.child(), FilterExec.class);
1829+
var expand = as(filter.child(), MvExpandExec.class);
1830+
var field = as(expand.child(), FieldExtractExec.class); // MV_EXPAND is not optimized yet (it doesn't accept literals)
1831+
as(field.child(), EsQueryExec.class);
1832+
}
1833+
1834+
/**
1835+
* DissectExec[constant_keyword-foo{f}#8,Parser[pattern=%{bar}, appendSeparator=, ...
1836+
* \_LimitExec[1000[INTEGER]]
1837+
* \_ExchangeExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{f}#8, date{f}#9, date_nanos{f}#10, double{f}#11...
1838+
* \_ProjectExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{f}#8, date{f}#9, date_nanos{f}#10, double{f}#11...
1839+
* \_FieldExtractExec[!alias_integer, boolean{f}#6, byte{f}#7, constant_k..]
1840+
* \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#25], limit[1000], sort[] estimatedRowSize[462]
1841+
*/
1842+
public void testConstantKeywordDissectFilter() {
1843+
String queryText = """
1844+
from test
1845+
| dissect `constant_keyword-foo` "%{bar}"
1846+
| where `constant_keyword-foo` == "foo"
1847+
""";
1848+
var analyzer = makeAnalyzer("mapping-all-types.json", new EnrichResolution());
1849+
var plan = plannerOptimizer.plan(queryText, CONSTANT_K_STATS, analyzer);
1850+
1851+
var dissect = as(plan, DissectExec.class);
1852+
var limit = as(dissect.child(), LimitExec.class);
1853+
var exchange = as(limit.child(), ExchangeExec.class);
1854+
var project = as(exchange.child(), ProjectExec.class);
1855+
var field = as(project.child(), FieldExtractExec.class);
1856+
var query = as(field.child(), EsQueryExec.class);
1857+
assertNull(query.query());
1858+
}
1859+
17391860
private QueryBuilder wrapWithSingleQuery(String query, QueryBuilder inner, String fieldName, Source source) {
17401861
return FilterTests.singleValueQuery(query, inner, fieldName, source);
17411862
}

0 commit comments

Comments
 (0)