-
Notifications
You must be signed in to change notification settings - Fork 25.6k
[ES|QL] Implicit casting string literal to intervals in EsqlScalarFunction and GroupingFunction #115814
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ES|QL] Implicit casting string literal to intervals in EsqlScalarFunction and GroupingFunction #115814
Changes from 14 commits
b801ba8
7c77785
b416e77
28c2fe8
d41b5d1
0ff8ca8
e0f7376
7cb0c7c
11fa25d
6a81ea6
f17b304
dc850d7
8b31b45
bace39a
4c39d94
a8b1b0f
26a0be7
0edf60f
9c9a95f
67b6885
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
pr: 115814 | ||
summary: "[ES|QL] Implicit casting string literal to intervals" | ||
area: ES|QL | ||
type: enhancement | ||
issues: | ||
- 115352 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,6 @@ | |
package org.elasticsearch.xpack.esql.analysis; | ||
|
||
import org.elasticsearch.common.logging.HeaderWarning; | ||
import org.elasticsearch.common.logging.LoggerMessageFormat; | ||
import org.elasticsearch.compute.data.Block; | ||
import org.elasticsearch.logging.Logger; | ||
import org.elasticsearch.xpack.core.enrich.EnrichPolicy; | ||
|
@@ -31,7 +30,6 @@ | |
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; | ||
import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttribute; | ||
import org.elasticsearch.xpack.esql.core.expression.UnresolvedStar; | ||
import org.elasticsearch.xpack.esql.core.expression.function.scalar.ScalarFunction; | ||
import org.elasticsearch.xpack.esql.core.expression.predicate.BinaryOperator; | ||
import org.elasticsearch.xpack.esql.core.expression.predicate.operator.comparison.BinaryComparison; | ||
import org.elasticsearch.xpack.esql.core.tree.Source; | ||
|
@@ -49,6 +47,7 @@ | |
import org.elasticsearch.xpack.esql.expression.function.FunctionDefinition; | ||
import org.elasticsearch.xpack.esql.expression.function.UnresolvedFunction; | ||
import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute; | ||
import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction; | ||
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; | ||
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction; | ||
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble; | ||
|
@@ -60,6 +59,7 @@ | |
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation; | ||
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In; | ||
import org.elasticsearch.xpack.esql.index.EsIndex; | ||
import org.elasticsearch.xpack.esql.parser.ParsingException; | ||
import org.elasticsearch.xpack.esql.plan.TableIdentifier; | ||
import org.elasticsearch.xpack.esql.plan.logical.Aggregate; | ||
import org.elasticsearch.xpack.esql.plan.logical.Drop; | ||
|
@@ -85,6 +85,8 @@ | |
import org.elasticsearch.xpack.esql.stats.FeatureMetric; | ||
import org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter; | ||
|
||
import java.time.Duration; | ||
import java.time.temporal.TemporalAmount; | ||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.BitSet; | ||
|
@@ -106,6 +108,7 @@ | |
import static org.elasticsearch.xpack.core.enrich.EnrichPolicy.GEO_MATCH_TYPE; | ||
import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN; | ||
import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; | ||
import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD; | ||
import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; | ||
import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT; | ||
import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; | ||
|
@@ -115,9 +118,11 @@ | |
import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; | ||
import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; | ||
import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; | ||
import static org.elasticsearch.xpack.esql.core.type.DataType.TIME_DURATION; | ||
import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; | ||
import static org.elasticsearch.xpack.esql.core.type.DataType.isTemporalAmount; | ||
import static org.elasticsearch.xpack.esql.stats.FeatureMetric.LIMIT; | ||
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.maybeParseTemporalAmount; | ||
|
||
/** | ||
* This class is part of the planner. Resolves references (such as variable and index names) and performs implicit casting. | ||
|
@@ -141,9 +146,14 @@ public class Analyzer extends ParameterizedRuleExecutor<LogicalPlan, AnalyzerCon | |
); | ||
var resolution = new Batch<>( | ||
"Resolution", | ||
/* | ||
* ImplicitCasting must be before ResolveRefs. Because a reference is created for a Bucket in Aggregate's aggregates, | ||
* resolving this reference before implicit casting may cause this reference to have customMessage=true, it prevents further | ||
* attempts to resolve this reference. | ||
*/ | ||
new ImplicitCasting(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change here makes me a bit nervous :-). I am surprised this isn't breaking any existent tests... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you remember why this was after There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was worried that tests would complain too :-). This is mainly for I tried keeping the order unchanged and handle it in I don't recall why There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Union types was/is tricky, too much back and forth and too many places where things are handled carefully. If @craigtaverner and @alex-spies are ok with this change, it's ok with me as well; they know best the union types area. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, putting the implicit casting up shouldn't harm as we loop over these resolution rules, anyway. This may also explain why no tests fail. However, I'm not sure this solution is complete: I see that we put ImplicitCasting up into first place to avoid issues with the verification of STATS. But this assumes that ImplicitCasting will never require resolved references to do its job inside STATS. I have a counterexample:
On the present PR, this results in
which doesn't look like ImplicitCasting is at fault - however, ImplicitCasting should be able to place a Update: Also, extracting the
|
||
new ResolveRefs(), | ||
new ResolveUnionTypes(), // Must be after ResolveRefs, so union types can be found | ||
new ImplicitCasting() | ||
new ResolveUnionTypes() // Must be after ResolveRefs, so union types can be found | ||
); | ||
var finish = new Batch<>("Finish Analysis", Limiter.ONCE, new AddImplicitLimit(), new UnionTypesCleanup()); | ||
rules = List.of(init, resolution, finish); | ||
|
@@ -951,7 +961,7 @@ private BitSet gatherPreAnalysisMetrics(LogicalPlan plan, BitSet b) { | |
} | ||
|
||
/** | ||
* Cast string literals in ScalarFunction, EsqlArithmeticOperation, BinaryComparison and In to desired data types. | ||
* Cast string literals in ScalarFunction, EsqlArithmeticOperation, BinaryComparison, In and GroupingFunction to desired data types. | ||
* For example, the string literals in the following expressions will be cast implicitly to the field data type on the left hand side. | ||
fang-xing-esql marked this conversation as resolved.
Show resolved
Hide resolved
|
||
* date > "2024-08-21" | ||
* date in ("2024-08-21", "2024-08-22", "2024-08-23") | ||
|
@@ -971,23 +981,29 @@ private BitSet gatherPreAnalysisMetrics(LogicalPlan plan, BitSet b) { | |
private static class ImplicitCasting extends ParameterizedRule<LogicalPlan, LogicalPlan, AnalyzerContext> { | ||
@Override | ||
public LogicalPlan apply(LogicalPlan plan, AnalyzerContext context) { | ||
return plan.transformExpressionsUp(ScalarFunction.class, e -> ImplicitCasting.cast(e, context.functionRegistry())); | ||
return plan.transformExpressionsUp( | ||
org.elasticsearch.xpack.esql.core.expression.function.Function.class, | ||
e -> ImplicitCasting.cast(e, context.functionRegistry()) | ||
); | ||
} | ||
|
||
private static Expression cast(ScalarFunction f, EsqlFunctionRegistry registry) { | ||
private static Expression cast(org.elasticsearch.xpack.esql.core.expression.function.Function f, EsqlFunctionRegistry registry) { | ||
if (f instanceof In in) { | ||
return processIn(in); | ||
} | ||
if (f instanceof EsqlScalarFunction esf) { | ||
return processScalarFunction(esf, registry); | ||
if (f instanceof EsqlScalarFunction || f instanceof GroupingFunction) { // exclude AggregateFunction until it is needed | ||
return processFunction(f, registry); | ||
fang-xing-esql marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
} | ||
if (f instanceof EsqlArithmeticOperation || f instanceof BinaryComparison) { | ||
return processBinaryOperator((BinaryOperator) f); | ||
} | ||
return f; | ||
} | ||
|
||
private static Expression processScalarFunction(EsqlScalarFunction f, EsqlFunctionRegistry registry) { | ||
private static Expression processFunction( | ||
org.elasticsearch.xpack.esql.core.expression.function.Function f, | ||
EsqlFunctionRegistry registry | ||
) { | ||
List<Expression> args = f.arguments(); | ||
List<DataType> targetDataTypes = registry.getDataTypeForStringLiteralConversion(f.getClass()); | ||
if (targetDataTypes == null || targetDataTypes.isEmpty()) { | ||
|
@@ -1010,9 +1026,11 @@ private static Expression processScalarFunction(EsqlScalarFunction f, EsqlFuncti | |
} | ||
if (targetDataType != DataType.NULL && targetDataType != DataType.UNSUPPORTED) { | ||
Expression e = castStringLiteral(arg, targetDataType); | ||
childrenChanged = true; | ||
newChildren.add(e); | ||
continue; | ||
if (e != arg) { | ||
childrenChanged = true; | ||
newChildren.add(e); | ||
continue; | ||
} | ||
} | ||
} | ||
} else if (dataType.isNumeric() && canCastMixedNumericTypes(f) && castNumericArgs) { | ||
|
@@ -1094,7 +1112,7 @@ private static Expression processIn(In in) { | |
return childrenChanged ? in.replaceChildren(newChildren) : in; | ||
} | ||
|
||
private static boolean canCastMixedNumericTypes(EsqlScalarFunction f) { | ||
private static boolean canCastMixedNumericTypes(org.elasticsearch.xpack.esql.core.expression.function.Function f) { | ||
return f instanceof Coalesce; | ||
} | ||
|
||
|
@@ -1141,19 +1159,37 @@ private static boolean supportsStringImplicitCasting(DataType type) { | |
return type == DATETIME || type == IP || type == VERSION || type == BOOLEAN; | ||
} | ||
|
||
public static Expression castStringLiteral(Expression from, DataType target) { | ||
private static UnresolvedAttribute unresolvedAttribute(Expression value, String type, Exception e) { | ||
String message = format( | ||
"Cannot convert string [{}] to [{}], error [{}]", | ||
value.fold(), | ||
type, | ||
(e instanceof ParsingException pe) ? pe.getErrorMessage() : e.getMessage() | ||
); | ||
return new UnresolvedAttribute(value.source(), String.valueOf(value.fold()), message); | ||
} | ||
|
||
private static Expression castStringLiteralToTemporalAmount(Expression from) { | ||
try { | ||
TemporalAmount result = maybeParseTemporalAmount(from.fold().toString().strip()); | ||
if (result == null) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes, I was wondering if it could. The (small, can be ignored ;) ) issue I see is that
And I'm not sure why, since also in the former case, as written, it has to be a temporal amount and it cannot be just one token (or empty). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Thanks for explaining it, now I understand the comments better! The validations of If it has zero or one token - If it has more than one token - |
||
return from; | ||
} | ||
DataType target = result instanceof Duration ? TIME_DURATION : DATE_PERIOD; | ||
return new Literal(from.source(), result, target); | ||
} catch (Exception e) { | ||
return unresolvedAttribute(from, DATE_PERIOD + " or " + TIME_DURATION, e); | ||
} | ||
} | ||
|
||
private static Expression castStringLiteral(Expression from, DataType target) { | ||
assert from.foldable(); | ||
try { | ||
Object to = EsqlDataTypeConverter.convert(from.fold(), target); | ||
return new Literal(from.source(), to, target); | ||
return isTemporalAmount(target) | ||
? castStringLiteralToTemporalAmount(from) | ||
: new Literal(from.source(), EsqlDataTypeConverter.convert(from.fold(), target), target); | ||
} catch (Exception e) { | ||
String message = LoggerMessageFormat.format( | ||
"Cannot convert string [{}] to [{}], error [{}]", | ||
from.fold(), | ||
target, | ||
e.getMessage() | ||
); | ||
return new UnresolvedAttribute(from.source(), String.valueOf(from.fold()), message); | ||
return unresolvedAttribute(from, target.toString(), e); | ||
} | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this not go into
NAME_OR_ALIAS_TO_TYPE
below?date
is an alias.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I looked into
NAME_OR_ALIAS_TO_TYPE
a bit further, and it is used byinline_cast
only for now. It should havedate
I think, so that we can support::date
, today only::datetime
is supported. I opened a separate issue to do it, as it requires additional tests, and I'd like to address it separately.