Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/127549.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 127549
summary: Add local optimizations for `constant_keyword`
area: ES|QL
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferIsNotNull;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.InferNonNullAggConstraint;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.LocalPropagateEmptyRelation;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceConstantKeywords;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceMissingFieldWithNull;
import org.elasticsearch.xpack.esql.optimizer.rules.logical.local.ReplaceTopNWithLimitAndSort;
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
Expand All @@ -38,6 +39,7 @@ public class LocalLogicalPlanOptimizer extends ParameterizedRuleExecutor<Logical
new Batch<>(
"Local rewrite",
Limiter.ONCE,
new ReplaceConstantKeywords(),
new ReplaceTopNWithLimitAndSort(),
new ReplaceMissingFieldWithNull(),
new InferIsNotNull(),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.esql.optimizer.rules.logical.local;

import org.elasticsearch.index.IndexMode;
import org.elasticsearch.xpack.esql.core.expression.Attribute;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.optimizer.LocalLogicalOptimizerContext;
import org.elasticsearch.xpack.esql.plan.logical.EsRelation;
import org.elasticsearch.xpack.esql.plan.logical.Eval;
import org.elasticsearch.xpack.esql.plan.logical.Filter;
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
import org.elasticsearch.xpack.esql.plan.logical.OrderBy;
import org.elasticsearch.xpack.esql.plan.logical.RegexExtract;
import org.elasticsearch.xpack.esql.plan.logical.TopN;
import org.elasticsearch.xpack.esql.rule.ParameterizedRule;

import java.util.HashMap;
import java.util.Map;

/**
* Look for any constant_keyword fields used in the plan and replaces them with their actual value.
*/
public class ReplaceConstantKeywords extends ParameterizedRule<LogicalPlan, LogicalPlan, LocalLogicalOptimizerContext> {

@Override
public LogicalPlan apply(LogicalPlan plan, LocalLogicalOptimizerContext localLogicalOptimizerContext) {
Map<Attribute, Expression> attrToValue = new HashMap<>();
plan.forEachUp(EsRelation.class, esRelation -> {
if (esRelation.indexMode() == IndexMode.STANDARD) {
for (Attribute attribute : esRelation.output()) {
var val = localLogicalOptimizerContext.searchStats().constantValue(attribute.name());
if (val != null) {
attrToValue.put(attribute, Literal.of(attribute, val));
}
}
}
});
if (attrToValue.isEmpty()) {
return plan;
}
return plan.transformUp(p -> replaceAttributes(p, attrToValue));
}

private LogicalPlan replaceAttributes(LogicalPlan plan, Map<Attribute, Expression> attrToValue) {
// This is slightly different from ReplaceMissingFieldWithNull.
// It's on purpose: reusing NameIDs is dangerous, and we have no evidence that adding an EVAL will actually lead to
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please, provide a more explanatory comment. I am looking for "reusing NameIDs is dangerous" explanation.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, having an eval doing the value "replacement" is not about performance, but about making use of other mechanisms that already exists in the optimizer to naturally "move"/"flow" the EVAL through the Nodes tree (like constant folding, moving literals on the right hand side of boolean expressions etc).

// practical performance benefits
if (plan instanceof Eval
|| plan instanceof Filter
|| plan instanceof OrderBy
|| plan instanceof RegexExtract
|| plan instanceof TopN) {
return plan.transformExpressionsOnlyUp(FieldAttribute.class, f -> attrToValue.getOrDefault(f, f));
}

return plan;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,41 @@ public boolean canUseEqualityOnSyntheticSourceDelegate(String name, String value
return true;
}

public String constantValue(String name) {
String val = null;
for (SearchExecutionContext ctx : contexts) {
MappedFieldType f = ctx.getFieldType(name);
if (f == null) {
return null;
}
if (f instanceof ConstantFieldType cf) {
var fetcher = cf.valueFetcher(ctx, null);
String thisVal = null;
try {
// since the value is a constant, the doc _should_ be irrelevant
List<Object> vals = fetcher.fetchValues(null, -1, null);
Object objVal = vals.size() == 1 ? vals.get(0) : null;
// we are considering only string values for now, since this can return "strange" things,
// see IndexModeFieldType
thisVal = objVal instanceof String ? (String) objVal : null;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe we tend to use BytesRefs encoded as utf-8.

} catch (IOException iox) {}

if (thisVal == null) {
// Value not yet set
return null;
}
if (val == null) {
val = thisVal;
} else if (thisVal.equals(val) == false) {
return null;
}
} else {
return null;
}
}
return val;
}

private interface DocCountTester {
Boolean test(LeafReader leafReader) throws IOException;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ public interface SearchStats {

boolean canUseEqualityOnSyntheticSourceDelegate(String name, String value);

/**
* Returns the value for a field if it's a constant (eg. a constant_keyword with only one value for the involved indices).
* NULL if the field is not a constant.
*/
default String constantValue(String name) {
return null;
}

/**
* When there are no search stats available, for example when there are no search contexts, we have static results.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
import org.elasticsearch.xpack.esql.plan.physical.AggregateExec;
import org.elasticsearch.xpack.esql.plan.physical.DissectExec;
import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec;
import org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec;
import org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec.Stat;
Expand All @@ -69,6 +70,7 @@
import org.elasticsearch.xpack.esql.plan.physical.FilterExec;
import org.elasticsearch.xpack.esql.plan.physical.LimitExec;
import org.elasticsearch.xpack.esql.plan.physical.LocalSourceExec;
import org.elasticsearch.xpack.esql.plan.physical.MvExpandExec;
import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan;
import org.elasticsearch.xpack.esql.plan.physical.ProjectExec;
import org.elasticsearch.xpack.esql.plan.physical.TimeSeriesAggregateExec;
Expand Down Expand Up @@ -150,6 +152,18 @@ public boolean isSingleValue(String field) {
}
};

private final SearchStats CONSTANT_K_STATS = new TestSearchStats() {
@Override
public boolean isSingleValue(String field) {
return true;
}

@Override
public String constantValue(String name) {
return name.startsWith("constant_keyword") ? "foo" : null;
}
};

@ParametersFactory(argumentFormatting = PARAM_FORMATTING)
public static List<Object[]> readScriptSpec() {
return settings().stream().map(t -> {
Expand Down Expand Up @@ -1856,6 +1870,113 @@ public void testPushDownFieldExtractToTimeSeriesSource() {
assertTrue(timeSeriesSource.attrs().stream().noneMatch(EsQueryExec::isSourceAttribute));
}

/**
* LimitExec[1000[INTEGER]]
* \_ExchangeExec[[!alias_integer, boolean{f}#415, byte{f}#416, constant_keyword-foo{f}#417, date{f}#418, date_nanos{f}#419,
* double{f}#420, float{f}#421, half_float{f}#422, integer{f}#424, ip{f}#425, keyword{f}#426, long{f}#427, scaled_float{f}#423,
* !semantic_text, short{f}#429, text{f}#430, unsigned_long{f}#428, version{f}#431, wildcard{f}#432], false]
* \_ProjectExec[[!alias_integer, boolean{f}#415, byte{f}#416, constant_keyword-foo{f}#417, date{f}#418, date_nanos{f}#419,
* double{f}#420, float{f}#421, half_float{f}#422, integer{f}#424, ip{f}#425, keyword{f}#426, long{f}#427, scaled_float{f}#423,
* !semantic_text, short{f}#429, text{f}#430, unsigned_long{f}#428, version{f}#431, wildcard{f}#432]]
* \_FieldExtractExec[!alias_integer, boolean{f}#415, byte{f}#416, consta..]
* \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#434], limit[1000], sort[] estimatedRowSize[412]
*/
public void testConstantKeywordWithMatchingFilter() {
String queryText = """
from test
| where `constant_keyword-foo` == "foo"
""";
var analyzer = makeAnalyzer("mapping-all-types.json");
var plan = plannerOptimizer.plan(queryText, CONSTANT_K_STATS, analyzer);

var limit = as(plan, LimitExec.class);
var exchange = as(limit.child(), ExchangeExec.class);
var project = as(exchange.child(), ProjectExec.class);
var field = as(project.child(), FieldExtractExec.class);
var query = as(field.child(), EsQueryExec.class);
assertThat(as(query.limit(), Literal.class).value(), is(1000));
assertNull(query.query());
}

/**
* LimitExec[1000[INTEGER]]
* \_ExchangeExec[[!alias_integer, boolean{f}#4, byte{f}#5, constant_keyword-foo{f}#6, date{f}#7, date_nanos{f}#8, double{f}#9,
* float{f}#10, half_float{f}#11, integer{f}#13, ip{f}#14, keyword{f}#15, long{f}#16, scaled_float{f}#12, !semantic_text,
* short{f}#18, text{f}#19, unsigned_long{f}#17, version{f}#20, wildcard{f}#21], false]
* \_LocalSourceExec[[!alias_integer, boolean{f}#4, byte{f}#5, constant_keyword-foo{f}#6, date{f}#7, date_nanos{f}#8, double{f}#9,
* float{f}#10, half_float{f}#11, integer{f}#13, ip{f}#14, keyword{f}#15, long{f}#16, scaled_float{f}#12, !semantic_text,
* short{f}#18, text{f}#19, unsigned_long{f}#17, version{f}#20, wildcard{f}#21], EMPTY]
*/
public void testConstantKeywordWithNonMatchingFilter() {
String queryText = """
from test
| where `constant_keyword-foo` == "non-matching"
""";
var analyzer = makeAnalyzer("mapping-all-types.json");
var plan = plannerOptimizer.plan(queryText, CONSTANT_K_STATS, analyzer);

var limit = as(plan, LimitExec.class);
var exchange = as(limit.child(), ExchangeExec.class);
var source = as(exchange.child(), LocalSourceExec.class);
}

/**
* LimitExec[1000[INTEGER]]
* \_ExchangeExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{r}#25, date{f}#9, date_nanos{f}#10, double{f}#1...
* \_ProjectExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{r}#25, date{f}#9, date_nanos{f}#10, double{f}#1...
* \_FieldExtractExec[!alias_integer, boolean{f}#6, byte{f}#7, date{f}#9,
* \_LimitExec[1000[INTEGER]]
* \_FilterExec[constant_keyword-foo{r}#25 == [66 6f 6f][KEYWORD]]
* \_MvExpandExec[constant_keyword-foo{f}#8,constant_keyword-foo{r}#25]
* \_FieldExtractExec[constant_keyword-foo{f}#8]
* \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#26], limit[], sort[] estimatedRowSize[412]
*/
public void testConstantKeywordExpandFilter() {
String queryText = """
from test
| mv_expand `constant_keyword-foo`
| where `constant_keyword-foo` == "foo"
""";
var analyzer = makeAnalyzer("mapping-all-types.json");
var plan = plannerOptimizer.plan(queryText, CONSTANT_K_STATS, analyzer);

var limit = as(plan, LimitExec.class);
var exchange = as(limit.child(), ExchangeExec.class);
var project = as(exchange.child(), ProjectExec.class);
var fieldExtract = as(project.child(), FieldExtractExec.class);
var limit2 = as(fieldExtract.child(), LimitExec.class);
var filter = as(limit2.child(), FilterExec.class);
var expand = as(filter.child(), MvExpandExec.class);
var field = as(expand.child(), FieldExtractExec.class); // MV_EXPAND is not optimized yet (it doesn't accept literals)
as(field.child(), EsQueryExec.class);
}

/**
* DissectExec[constant_keyword-foo{f}#8,Parser[pattern=%{bar}, appendSeparator=, ...
* \_LimitExec[1000[INTEGER]]
* \_ExchangeExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{f}#8, date{f}#9, date_nanos{f}#10, double{f}#11...
* \_ProjectExec[[!alias_integer, boolean{f}#6, byte{f}#7, constant_keyword-foo{f}#8, date{f}#9, date_nanos{f}#10, double{f}#11...
* \_FieldExtractExec[!alias_integer, boolean{f}#6, byte{f}#7, constant_k..]
* \_EsQueryExec[test], indexMode[standard], query[][_doc{f}#25], limit[1000], sort[] estimatedRowSize[462]
*/
public void testConstantKeywordDissectFilter() {
String queryText = """
from test
| dissect `constant_keyword-foo` "%{bar}"
| where `constant_keyword-foo` == "foo"
""";
var analyzer = makeAnalyzer("mapping-all-types.json");
var plan = plannerOptimizer.plan(queryText, CONSTANT_K_STATS, analyzer);

var dissect = as(plan, DissectExec.class);
var limit = as(dissect.child(), LimitExec.class);
var exchange = as(limit.child(), ExchangeExec.class);
var project = as(exchange.child(), ProjectExec.class);
var field = as(project.child(), FieldExtractExec.class);
var query = as(field.child(), EsQueryExec.class);
assertNull(query.query());
}

private QueryBuilder wrapWithSingleQuery(String query, QueryBuilder inner, String fieldName, Source source) {
return FilterTests.singleValueQuery(query, inner, fieldName, source);
}
Expand Down
Loading
Loading