From f59d39d7ba9482c4a326744f9a8c39c2ce346436 Mon Sep 17 00:00:00 2001 From: Fang Xing <155562079+fang-xing-esql@users.noreply.github.com> Date: Wed, 13 Nov 2024 18:25:06 -0500 Subject: [PATCH] [ES|QL] Implicit casting string literal to intervals in EsqlScalarFunction and GroupingFunction (#115814) * implicit casting from string literals to datetime intervals (cherry picked from commit b37a829efad85f103fd61a26d8c0d1fcedfbe404) --- docs/changelog/115814.yaml | 6 + docs/reference/esql/implicit-casting.asciidoc | 40 ++++--- .../xpack/esql/core/type/DataType.java | 7 +- .../src/main/resources/bucket.csv-spec | 44 ++++++++ .../src/main/resources/date.csv-spec | 105 ++++++++++++++++++ .../xpack/esql/action/EsqlCapabilities.java | 7 +- .../xpack/esql/analysis/Analyzer.java | 86 ++++++++++---- .../function/EsqlFunctionRegistry.java | 47 +++++--- .../esql/type/EsqlDataTypeConverter.java | 72 ++++++++---- .../xpack/esql/analysis/VerifierTests.java | 66 +++++++++++ .../function/AbstractFunctionTestCase.java | 6 +- .../xpack/esql/parser/ExpressionTests.java | 2 +- .../test/esql/26_aggs_bucket.yml | 55 +++++++++ 13 files changed, 458 insertions(+), 85 deletions(-) create mode 100644 docs/changelog/115814.yaml diff --git a/docs/changelog/115814.yaml b/docs/changelog/115814.yaml new file mode 100644 index 0000000000000..34f1213272d6f --- /dev/null +++ b/docs/changelog/115814.yaml @@ -0,0 +1,6 @@ +pr: 115814 +summary: "[ES|QL] Implicit casting string literal to intervals" +area: ES|QL +type: enhancement +issues: + - 115352 diff --git a/docs/reference/esql/implicit-casting.asciidoc b/docs/reference/esql/implicit-casting.asciidoc index f0c0aa3d82063..ffb6d3fc35acb 100644 --- a/docs/reference/esql/implicit-casting.asciidoc +++ b/docs/reference/esql/implicit-casting.asciidoc @@ -5,7 +5,7 @@ Implicit casting ++++ -Often users will input `datetime`, `ip`, `version`, or geospatial objects as simple strings in their queries for use in predicates, functions, or expressions. {esql} provides <> to explicitly convert these strings into the desired data types. +Often users will input `date`, `ip`, `version`, `date_period` or `time_duration` as simple strings in their queries for use in predicates, functions, or expressions. {esql} provides <> to explicitly convert these strings into the desired data types. Without implicit casting users must explicitly code these `to_X` functions in their queries, when string literals don't match the target data types they are assigned or compared to. Here is an example of using `to_datetime` to explicitly perform a data type conversion. @@ -18,7 +18,7 @@ FROM employees | LIMIT 1 ---- -Implicit casting improves usability, by automatically converting string literals to the target data type. This is most useful when the target data type is `datetime`, `ip`, `version` or a geo spatial. It is natural to specify these as a string in queries. +Implicit casting improves usability, by automatically converting string literals to the target data type. This is most useful when the target data type is `date`, `ip`, `version`, `date_period` or `time_duration`. It is natural to specify these as a string in queries. The first query can be coded without calling the `to_datetime` function, as follows: @@ -38,16 +38,28 @@ The following table details which {esql} operations support implicit casting for [%header.monospaced.styled,format=dsv,separator=|] |=== -||ScalarFunction|BinaryComparison|ArithmeticOperation|InListPredicate|AggregateFunction -|DATETIME|Y|Y|Y|Y|N -|DOUBLE|Y|N|N|N|N -|LONG|Y|N|N|N|N -|INTEGER|Y|N|N|N|N -|IP|Y|Y|Y|Y|N -|VERSION|Y|Y|Y|Y|N -|GEO_POINT|Y|N|N|N|N -|GEO_SHAPE|Y|N|N|N|N -|CARTESIAN_POINT|Y|N|N|N|N -|CARTESIAN_SHAPE|Y|N|N|N|N -|BOOLEAN|Y|Y|Y|Y|N +||ScalarFunction*|Operator*|<>|<> +|DATE|Y|Y|Y|N +|IP|Y|Y|Y|N +|VERSION|Y|Y|Y|N +|BOOLEAN|Y|Y|Y|N +|DATE_PERIOD/TIME_DURATION|Y|N|Y|N |=== + +ScalarFunction* includes: + +<> + +<> + +<> + + +Operator* includes: + +<> + +<> + +<> + diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java index 9708a3ea0db85..347e6b43099fc 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java @@ -29,7 +29,6 @@ import java.util.function.Function; import static java.util.stream.Collectors.toMap; -import static java.util.stream.Collectors.toUnmodifiableMap; import static org.elasticsearch.xpack.esql.core.util.PlanStreamInput.readCachedStringWithVersionCheck; import static org.elasticsearch.xpack.esql.core.util.PlanStreamOutput.writeCachedStringWithVersionCheck; @@ -276,7 +275,7 @@ public enum DataType { private static final Collection STRING_TYPES = DataType.types().stream().filter(DataType::isString).toList(); - private static final Map NAME_TO_TYPE = TYPES.stream().collect(toUnmodifiableMap(DataType::typeName, t -> t)); + private static final Map NAME_TO_TYPE; private static final Map ES_TO_TYPE; @@ -287,6 +286,10 @@ public enum DataType { map.put("point", DataType.CARTESIAN_POINT); map.put("shape", DataType.CARTESIAN_SHAPE); ES_TO_TYPE = Collections.unmodifiableMap(map); + // DATETIME has different esType and typeName, add an entry in NAME_TO_TYPE with date as key + map = TYPES.stream().collect(toMap(DataType::typeName, t -> t)); + map.put("date", DataType.DATETIME); + NAME_TO_TYPE = Collections.unmodifiableMap(map); } private static final Map NAME_OR_ALIAS_TO_TYPE; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec index b8569ead94509..3be3decaf351c 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec @@ -716,3 +716,47 @@ FROM employees 2 |1985-10-01T00:00:00.000Z 4 |1985-11-01T00:00:00.000Z ; + +bucketByWeekInString +required_capability: implicit_casting_string_literal_to_temporal_amount +FROM employees +| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z" +| STATS hires_per_week = COUNT(*) BY week = BUCKET(hire_date, "1 week") +| SORT week +; + + hires_per_week:long | week:date +2 |1985-02-18T00:00:00.000Z +1 |1985-05-13T00:00:00.000Z +1 |1985-07-08T00:00:00.000Z +1 |1985-09-16T00:00:00.000Z +2 |1985-10-14T00:00:00.000Z +4 |1985-11-18T00:00:00.000Z +; + +bucketByMinuteInString +required_capability: implicit_casting_string_literal_to_temporal_amount + +FROM sample_data +| STATS min = min(@timestamp), max = MAX(@timestamp) BY bucket = BUCKET(@timestamp, "30 minutes") +| SORT min +; + + min:date | max:date | bucket:date +2023-10-23T12:15:03.360Z|2023-10-23T12:27:28.948Z|2023-10-23T12:00:00.000Z +2023-10-23T13:33:34.937Z|2023-10-23T13:55:01.543Z|2023-10-23T13:30:00.000Z +; + +bucketByMonthInString +required_capability: implicit_casting_string_literal_to_temporal_amount + +FROM sample_data +| EVAL adjusted = CASE(TO_LONG(@timestamp) % 2 == 0, @timestamp + 1 month, @timestamp + 2 years) +| STATS c = COUNT(*) BY b = BUCKET(adjusted, "1 month") +| SORT c +; + +c:long |b:date +3 |2025-10-01T00:00:00.000Z +4 |2023-11-01T00:00:00.000Z +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec index 945cf0a5e586d..87963077e3b3d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec @@ -1286,3 +1286,108 @@ ROW a = GREATEST(TO_DATETIME("1957-05-23T00:00:00Z"), TO_DATETIME("1958-02-19T00 a:datetime 1958-02-19T00:00:00 ; + +evalDateTruncMonthInString +required_capability: implicit_casting_string_literal_to_temporal_amount + +FROM employees +| SORT hire_date +| EVAL x = date_trunc("1 month", hire_date) +| KEEP emp_no, hire_date, x +| LIMIT 5; + +emp_no:integer | hire_date:date | x:date +10009 | 1985-02-18T00:00:00.000Z | 1985-02-01T00:00:00.000Z +10048 | 1985-02-24T00:00:00.000Z | 1985-02-01T00:00:00.000Z +10098 | 1985-05-13T00:00:00.000Z | 1985-05-01T00:00:00.000Z +10076 | 1985-07-09T00:00:00.000Z | 1985-07-01T00:00:00.000Z +10061 | 1985-09-17T00:00:00.000Z | 1985-09-01T00:00:00.000Z +; + +evalDateTruncHourInString +required_capability: implicit_casting_string_literal_to_temporal_amount + +FROM employees +| SORT hire_date +| EVAL x = date_trunc("240 hours", hire_date) +| KEEP emp_no, hire_date, x +| LIMIT 5; + +emp_no:integer | hire_date:date | x:date +10009 | 1985-02-18T00:00:00.000Z | 1985-02-11T00:00:00.000Z +10048 | 1985-02-24T00:00:00.000Z | 1985-02-21T00:00:00.000Z +10098 | 1985-05-13T00:00:00.000Z | 1985-05-12T00:00:00.000Z +10076 | 1985-07-09T00:00:00.000Z | 1985-07-01T00:00:00.000Z +10061 | 1985-09-17T00:00:00.000Z | 1985-09-09T00:00:00.000Z +; + +evalDateTruncDayInString +required_capability: implicit_casting_string_literal_to_temporal_amount + +FROM sample_data +| SORT @timestamp ASC +| EVAL t = DATE_TRUNC("1 day", @timestamp) +| KEEP t; + +t:date +2023-10-23T00:00:00 +2023-10-23T00:00:00 +2023-10-23T00:00:00 +2023-10-23T00:00:00 +2023-10-23T00:00:00 +2023-10-23T00:00:00 +2023-10-23T00:00:00 +; + +evalDateTruncMinuteInString +required_capability: implicit_casting_string_literal_to_temporal_amount + +FROM sample_data +| SORT @timestamp ASC +| EVAL t = DATE_TRUNC("1 minute", @timestamp) +| KEEP t; + +t:date +2023-10-23T12:15:00 +2023-10-23T12:27:00 +2023-10-23T13:33:00 +2023-10-23T13:51:00 +2023-10-23T13:52:00 +2023-10-23T13:53:00 +2023-10-23T13:55:00 +; + +evalDateTruncDayInStringNull +required_capability: implicit_casting_string_literal_to_temporal_amount + +FROM employees +| WHERE emp_no == 10040 +| EVAL x = date_trunc("1 day", birth_date) +| KEEP emp_no, birth_date, x; + +emp_no:integer | birth_date:date | x:date +10040 | null | null +; + +evalDateTruncYearInString +required_capability: implicit_casting_string_literal_to_temporal_amount + +ROW a = 1 +| EVAL year_hired = DATE_TRUNC("1 year", "1991-06-26T00:00:00.000Z") +; + +a:integer | year_hired:date +1 | 1991-01-01T00:00:00.000Z +; + +filteringWithTemporalAmountInString +required_capability: implicit_casting_string_literal_to_temporal_amount + +FROM employees +| SORT emp_no +| WHERE birth_date < "2024-01-01" - 70 years +| STATS cnt = count(*); + +cnt:long +19 +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 821ba709bae4b..8442fd66a4423 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -475,7 +475,12 @@ public enum Cap { * - Introduce BinaryPlan and co * - Refactor INLINESTATS and LOOKUP as a JOIN block */ - JOIN_PLANNING_V1(Build.current().isSnapshot()); + JOIN_PLANNING_V1(Build.current().isSnapshot()), + + /** + * Support implicit casting from string literal to DATE_PERIOD or TIME_DURATION. + */ + IMPLICIT_CASTING_STRING_LITERAL_TO_TEMPORAL_AMOUNT; private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 9c173795d0ab1..562d42a94483f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -8,7 +8,6 @@ package org.elasticsearch.xpack.esql.analysis; import org.elasticsearch.common.logging.HeaderWarning; -import org.elasticsearch.common.logging.LoggerMessageFormat; import org.elasticsearch.compute.data.Block; import org.elasticsearch.logging.Logger; import org.elasticsearch.xpack.core.enrich.EnrichPolicy; @@ -31,7 +30,6 @@ import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttribute; import org.elasticsearch.xpack.esql.core.expression.UnresolvedStar; -import org.elasticsearch.xpack.esql.core.expression.function.scalar.ScalarFunction; import org.elasticsearch.xpack.esql.core.expression.predicate.BinaryOperator; import org.elasticsearch.xpack.esql.core.expression.predicate.operator.comparison.BinaryComparison; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -49,6 +47,7 @@ import org.elasticsearch.xpack.esql.expression.function.FunctionDefinition; import org.elasticsearch.xpack.esql.expression.function.UnresolvedFunction; import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute; +import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction; import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.FoldablesConvertFunction; @@ -61,6 +60,7 @@ import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In; import org.elasticsearch.xpack.esql.index.EsIndex; +import org.elasticsearch.xpack.esql.parser.ParsingException; import org.elasticsearch.xpack.esql.plan.TableIdentifier; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Drop; @@ -86,6 +86,8 @@ import org.elasticsearch.xpack.esql.stats.FeatureMetric; import org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter; +import java.time.Duration; +import java.time.temporal.TemporalAmount; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; @@ -107,6 +109,7 @@ import static org.elasticsearch.xpack.core.enrich.EnrichPolicy.GEO_MATCH_TYPE; import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN; import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD; import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; @@ -116,9 +119,11 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; +import static org.elasticsearch.xpack.esql.core.type.DataType.TIME_DURATION; import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; import static org.elasticsearch.xpack.esql.core.type.DataType.isTemporalAmount; import static org.elasticsearch.xpack.esql.stats.FeatureMetric.LIMIT; +import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.maybeParseTemporalAmount; /** * This class is part of the planner. Resolves references (such as variable and index names) and performs implicit casting. @@ -142,9 +147,14 @@ public class Analyzer extends ParameterizedRuleExecutor( "Resolution", + /* + * ImplicitCasting must be before ResolveRefs. Because a reference is created for a Bucket in Aggregate's aggregates, + * resolving this reference before implicit casting may cause this reference to have customMessage=true, it prevents further + * attempts to resolve this reference. + */ + new ImplicitCasting(), new ResolveRefs(), - new ResolveUnionTypes(), // Must be after ResolveRefs, so union types can be found - new ImplicitCasting() + new ResolveUnionTypes() // Must be after ResolveRefs, so union types can be found ); var finish = new Batch<>("Finish Analysis", Limiter.ONCE, new AddImplicitLimit(), new UnionTypesCleanup()); rules = List.of(init, resolution, finish); @@ -952,13 +962,15 @@ private BitSet gatherPreAnalysisMetrics(LogicalPlan plan, BitSet b) { } /** - * Cast string literals in ScalarFunction, EsqlArithmeticOperation, BinaryComparison and In to desired data types. + * Cast string literals in ScalarFunction, EsqlArithmeticOperation, BinaryComparison, In and GroupingFunction to desired data types. * For example, the string literals in the following expressions will be cast implicitly to the field data type on the left hand side. * date > "2024-08-21" * date in ("2024-08-21", "2024-08-22", "2024-08-23") * date = "2024-08-21" + 3 days * ip == "127.0.0.1" * version != "1.0" + * bucket(dateField, "1 month") + * date_trunc("1 minute", dateField) * * If the inputs to Coalesce are mixed numeric types, cast the rest of the numeric field or value to the first numeric data type if * applicable. For example, implicit casting converts: @@ -972,15 +984,18 @@ private BitSet gatherPreAnalysisMetrics(LogicalPlan plan, BitSet b) { private static class ImplicitCasting extends ParameterizedRule { @Override public LogicalPlan apply(LogicalPlan plan, AnalyzerContext context) { - return plan.transformExpressionsUp(ScalarFunction.class, e -> ImplicitCasting.cast(e, context.functionRegistry())); + return plan.transformExpressionsUp( + org.elasticsearch.xpack.esql.core.expression.function.Function.class, + e -> ImplicitCasting.cast(e, context.functionRegistry()) + ); } - private static Expression cast(ScalarFunction f, EsqlFunctionRegistry registry) { + private static Expression cast(org.elasticsearch.xpack.esql.core.expression.function.Function f, EsqlFunctionRegistry registry) { if (f instanceof In in) { return processIn(in); } - if (f instanceof EsqlScalarFunction esf) { - return processScalarFunction(esf, registry); + if (f instanceof EsqlScalarFunction || f instanceof GroupingFunction) { // exclude AggregateFunction until it is needed + return processScalarOrGroupingFunction(f, registry); } if (f instanceof EsqlArithmeticOperation || f instanceof BinaryComparison) { return processBinaryOperator((BinaryOperator) f); @@ -988,7 +1003,10 @@ private static Expression cast(ScalarFunction f, EsqlFunctionRegistry registry) return f; } - private static Expression processScalarFunction(EsqlScalarFunction f, EsqlFunctionRegistry registry) { + private static Expression processScalarOrGroupingFunction( + org.elasticsearch.xpack.esql.core.expression.function.Function f, + EsqlFunctionRegistry registry + ) { List args = f.arguments(); List targetDataTypes = registry.getDataTypeForStringLiteralConversion(f.getClass()); if (targetDataTypes == null || targetDataTypes.isEmpty()) { @@ -1011,9 +1029,11 @@ private static Expression processScalarFunction(EsqlScalarFunction f, EsqlFuncti } if (targetDataType != DataType.NULL && targetDataType != DataType.UNSUPPORTED) { Expression e = castStringLiteral(arg, targetDataType); - childrenChanged = true; - newChildren.add(e); - continue; + if (e != arg) { + childrenChanged = true; + newChildren.add(e); + continue; + } } } } else if (dataType.isNumeric() && canCastMixedNumericTypes(f) && castNumericArgs) { @@ -1095,7 +1115,7 @@ private static Expression processIn(In in) { return childrenChanged ? in.replaceChildren(newChildren) : in; } - private static boolean canCastMixedNumericTypes(EsqlScalarFunction f) { + private static boolean canCastMixedNumericTypes(org.elasticsearch.xpack.esql.core.expression.function.Function f) { return f instanceof Coalesce; } @@ -1142,19 +1162,37 @@ private static boolean supportsStringImplicitCasting(DataType type) { return type == DATETIME || type == IP || type == VERSION || type == BOOLEAN; } - public static Expression castStringLiteral(Expression from, DataType target) { + private static UnresolvedAttribute unresolvedAttribute(Expression value, String type, Exception e) { + String message = format( + "Cannot convert string [{}] to [{}], error [{}]", + value.fold(), + type, + (e instanceof ParsingException pe) ? pe.getErrorMessage() : e.getMessage() + ); + return new UnresolvedAttribute(value.source(), String.valueOf(value.fold()), message); + } + + private static Expression castStringLiteralToTemporalAmount(Expression from) { + try { + TemporalAmount result = maybeParseTemporalAmount(from.fold().toString().strip()); + if (result == null) { + return from; + } + DataType target = result instanceof Duration ? TIME_DURATION : DATE_PERIOD; + return new Literal(from.source(), result, target); + } catch (Exception e) { + return unresolvedAttribute(from, DATE_PERIOD + " or " + TIME_DURATION, e); + } + } + + private static Expression castStringLiteral(Expression from, DataType target) { assert from.foldable(); try { - Object to = EsqlDataTypeConverter.convert(from.fold(), target); - return new Literal(from.source(), to, target); + return isTemporalAmount(target) + ? castStringLiteralToTemporalAmount(from) + : new Literal(from.source(), EsqlDataTypeConverter.convert(from.fold(), target), target); } catch (Exception e) { - String message = LoggerMessageFormat.format( - "Cannot convert string [{}] to [{}], error [{}]", - from.fold(), - target, - e.getMessage() - ); - return new UnresolvedAttribute(from.source(), String.valueOf(from.fold()), message); + return unresolvedAttribute(from, target.toString(), e); } } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 7a6ff79d79a65..8ee97ca80a361 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -159,27 +159,30 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_SHAPE; import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD; import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_SHAPE; import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; import static org.elasticsearch.xpack.esql.core.type.DataType.IP; -import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; -import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; +import static org.elasticsearch.xpack.esql.core.type.DataType.TIME_DURATION; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED; import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; +import static org.elasticsearch.xpack.esql.core.type.DataType.isString; public class EsqlFunctionRegistry { - private static final Map, List> dataTypesForStringLiteralConversion = new LinkedHashMap<>(); + private static final Map, List> DATA_TYPES_FOR_STRING_LITERAL_CONVERSIONS = new LinkedHashMap<>(); - private static final Map dataTypeCastingPriority; + private static final Map DATA_TYPE_CASTING_PRIORITY; static { List typePriorityList = Arrays.asList( DATETIME, + DATE_PERIOD, + TIME_DURATION, DOUBLE, LONG, INTEGER, @@ -193,9 +196,9 @@ public class EsqlFunctionRegistry { UNSIGNED_LONG, UNSUPPORTED ); - dataTypeCastingPriority = new HashMap<>(); + DATA_TYPE_CASTING_PRIORITY = new HashMap<>(); for (int i = 0; i < typePriorityList.size(); i++) { - dataTypeCastingPriority.put(typePriorityList.get(i), i); + DATA_TYPE_CASTING_PRIORITY.put(typePriorityList.get(i), i); } } @@ -256,7 +259,7 @@ public Collection listFunctions(String pattern) { .collect(toList()); } - private FunctionDefinition[][] functions() { + private static FunctionDefinition[][] functions() { return new FunctionDefinition[][] { // grouping functions new FunctionDefinition[] { def(Bucket.class, Bucket::new, "bucket", "bin"), }, @@ -435,6 +438,11 @@ public static String normalizeName(String name) { } public record ArgSignature(String name, String[] type, String description, boolean optional, DataType targetDataType) { + + public ArgSignature(String name, String[] type, String description, boolean optional) { + this(name, type, description, optional, UNSUPPORTED); + } + @Override public String toString() { return "ArgSignature{" @@ -475,17 +483,24 @@ public List argDescriptions() { } } - public static DataType getTargetType(String[] names) { + /** + * Build a list target data types, which is used by ImplicitCasting to convert string literals to a target data type. + */ + private static DataType getTargetType(String[] names) { List types = new ArrayList<>(); for (String name : names) { - types.add(DataType.fromEs(name)); - } - if (types.contains(KEYWORD) || types.contains(TEXT)) { - return UNSUPPORTED; + DataType type = DataType.fromTypeName(name); + if (type != null && type != UNSUPPORTED) { // A type should not be null or UNSUPPORTED, just a sanity check here + // If the function takes strings as input, there is no need to cast a string literal to it. + // Return UNSUPPORTED means that ImplicitCasting doesn't support this argument, and it will be skipped by ImplicitCasting. + if (isString(type)) { + return UNSUPPORTED; + } + types.add(type); + } } - return types.stream() - .min((dt1, dt2) -> dataTypeCastingPriority.get(dt1).compareTo(dataTypeCastingPriority.get(dt2))) + .min((dt1, dt2) -> DATA_TYPE_CASTING_PRIORITY.get(dt1).compareTo(DATA_TYPE_CASTING_PRIORITY.get(dt2))) .orElse(UNSUPPORTED); } @@ -557,7 +572,7 @@ private void buildDataTypesForStringLiteralConversion(FunctionDefinition[]... gr for (FunctionDefinition[] group : groupFunctions) { for (FunctionDefinition def : group) { FunctionDescription signature = description(def); - dataTypesForStringLiteralConversion.put( + DATA_TYPES_FOR_STRING_LITERAL_CONVERSIONS.put( def.clazz(), signature.args().stream().map(EsqlFunctionRegistry.ArgSignature::targetDataType).collect(Collectors.toList()) ); @@ -566,7 +581,7 @@ private void buildDataTypesForStringLiteralConversion(FunctionDefinition[]... gr } public List getDataTypeForStringLiteralConversion(Class clazz) { - return dataTypesForStringLiteralConversion.get(clazz); + return DATA_TYPES_FOR_STRING_LITERAL_CONVERSIONS.get(clazz); } private static class SnapshotFunctionRegistry extends EsqlFunctionRegistry { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java index 7fb998e82001e..93fba06aab988 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java @@ -276,27 +276,11 @@ public static TemporalAmount parseTemporalAmount(Object val, DataType expectedTy return null; } StringBuilder value = new StringBuilder(); - StringBuilder qualifier = new StringBuilder(); - StringBuilder nextBuffer = value; - boolean lastWasSpace = false; - for (char c : str.trim().toCharArray()) { - if (c == ' ') { - if (lastWasSpace == false) { - nextBuffer = nextBuffer == value ? qualifier : null; - } - lastWasSpace = true; - continue; - } - if (nextBuffer == null) { - throw new ParsingException(Source.EMPTY, errorMessage, val, expectedType); - } - nextBuffer.append(c); - lastWasSpace = false; - } - - if ((value.isEmpty() || qualifier.isEmpty()) == false) { + StringBuilder temporalUnit = new StringBuilder(); + separateValueAndTemporalUnitForTemporalAmount(str.strip(), value, temporalUnit, errorMessage, expectedType.toString()); + if ((value.isEmpty() || temporalUnit.isEmpty()) == false) { try { - TemporalAmount result = parseTemporalAmount(Integer.parseInt(value.toString()), qualifier.toString(), Source.EMPTY); + TemporalAmount result = parseTemporalAmount(Integer.parseInt(value.toString()), temporalUnit.toString(), Source.EMPTY); if (DataType.DATE_PERIOD == expectedType && result instanceof Period || DataType.TIME_DURATION == expectedType && result instanceof Duration) { return result; @@ -314,6 +298,48 @@ public static TemporalAmount parseTemporalAmount(Object val, DataType expectedTy throw new ParsingException(Source.EMPTY, errorMessage, val, expectedType); } + public static TemporalAmount maybeParseTemporalAmount(String str) { + // The string literal can be either Date_Period or Time_Duration, derive the data type from its temporal unit + String errorMessage = "Cannot parse [{}] to {}"; + String expectedTypes = DATE_PERIOD + " or " + TIME_DURATION; + StringBuilder value = new StringBuilder(); + StringBuilder temporalUnit = new StringBuilder(); + separateValueAndTemporalUnitForTemporalAmount(str, value, temporalUnit, errorMessage, expectedTypes); + if ((value.isEmpty() || temporalUnit.isEmpty()) == false) { + try { + return parseTemporalAmount(Integer.parseInt(value.toString()), temporalUnit.toString(), Source.EMPTY); + } catch (NumberFormatException ex) { + throw new ParsingException(Source.EMPTY, errorMessage, str, expectedTypes); + } + } + return null; + } + + private static void separateValueAndTemporalUnitForTemporalAmount( + String temporalAmount, + StringBuilder value, + StringBuilder temporalUnit, + String errorMessage, + String expectedType + ) { + StringBuilder nextBuffer = value; + boolean lastWasSpace = false; + for (char c : temporalAmount.toCharArray()) { + if (c == ' ') { + if (lastWasSpace == false) { + nextBuffer = nextBuffer == value ? temporalUnit : null; + } + lastWasSpace = true; + continue; + } + if (nextBuffer == null) { + throw new ParsingException(Source.EMPTY, errorMessage, temporalAmount, expectedType); + } + nextBuffer.append(c); + lastWasSpace = false; + } + } + /** * Converts arbitrary object to the desired data type. *

@@ -401,10 +427,10 @@ public static DataType commonType(DataType left, DataType right) { } // generally supporting abbreviations from https://en.wikipedia.org/wiki/Unit_of_time - public static TemporalAmount parseTemporalAmount(Number value, String qualifier, Source source) throws InvalidArgumentException, + public static TemporalAmount parseTemporalAmount(Number value, String temporalUnit, Source source) throws InvalidArgumentException, ArithmeticException, ParsingException { try { - return switch (INTERVALS.valueOf(qualifier.toUpperCase(Locale.ROOT))) { + return switch (INTERVALS.valueOf(temporalUnit.toUpperCase(Locale.ROOT))) { case MILLISECOND, MILLISECONDS, MS -> Duration.ofMillis(safeToLong(value)); case SECOND, SECONDS, SEC, S -> Duration.ofSeconds(safeToLong(value)); case MINUTE, MINUTES, MIN -> Duration.ofMinutes(safeToLong(value)); @@ -417,7 +443,7 @@ public static TemporalAmount parseTemporalAmount(Number value, String qualifier, case YEAR, YEARS, YR, Y -> Period.ofYears(safeToInt(safeToLong(value))); }; } catch (IllegalArgumentException e) { - throw new ParsingException(source, "Unexpected time interval qualifier: '{}'", qualifier); + throw new ParsingException(source, "Unexpected temporal unit: '{}'", temporalUnit); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 0a34d6cd848bb..d9225d266c213 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1667,6 +1667,72 @@ public void testToDatePeriodToTimeDurationWithInvalidType() { ); } + public void testIntervalAsString() { + // DateTrunc + for (String interval : List.of("1 minu", "1 dy", "1.5 minutes", "0.5 days", "minutes 1", "day 5")) { + assertThat( + error("from types | EVAL x = date_trunc(\"" + interval + "\", \"1991-06-26T00:00:00.000Z\")"), + containsString("1:35: Cannot convert string [" + interval + "] to [DATE_PERIOD or TIME_DURATION]") + ); + assertThat( + error("from types | EVAL x = \"1991-06-26T00:00:00.000Z\", y = date_trunc(\"" + interval + "\", x::datetime)"), + containsString("1:67: Cannot convert string [" + interval + "] to [DATE_PERIOD or TIME_DURATION]") + ); + } + for (String interval : List.of("1", "0.5", "invalid")) { + assertThat( + error("from types | EVAL x = date_trunc(\"" + interval + "\", \"1991-06-26T00:00:00.000Z\")"), + containsString( + "1:24: first argument of [date_trunc(\"" + + interval + + "\", \"1991-06-26T00:00:00.000Z\")] must be [dateperiod or timeduration], found value [\"" + + interval + + "\"] type [keyword]" + ) + ); + assertThat( + error("from types | EVAL x = \"1991-06-26T00:00:00.000Z\", y = date_trunc(\"" + interval + "\", x::datetime)"), + containsString( + "1:56: first argument of [date_trunc(\"" + + interval + + "\", x::datetime)] " + + "must be [dateperiod or timeduration], found value [\"" + + interval + + "\"] type [keyword]" + ) + ); + } + + // Bucket + assertEquals( + "1:52: Cannot convert string [1 yar] to [DATE_PERIOD or TIME_DURATION], error [Unexpected temporal unit: 'yar']", + error("from test | stats max(emp_no) by bucket(hire_date, \"1 yar\")") + ); + assertEquals( + "1:52: Cannot convert string [1 hur] to [DATE_PERIOD or TIME_DURATION], error [Unexpected temporal unit: 'hur']", + error("from test | stats max(emp_no) by bucket(hire_date, \"1 hur\")") + ); + assertEquals( + "1:58: Cannot convert string [1 mu] to [DATE_PERIOD or TIME_DURATION], error [Unexpected temporal unit: 'mu']", + error("from test | stats max = max(emp_no) by bucket(hire_date, \"1 mu\") | sort max ") + ); + assertEquals( + "1:34: second argument of [bucket(hire_date, \"1\")] must be [integral, date_period or time_duration], " + + "found value [\"1\"] type [keyword]", + error("from test | stats max(emp_no) by bucket(hire_date, \"1\")") + ); + assertEquals( + "1:40: second argument of [bucket(hire_date, \"1\")] must be [integral, date_period or time_duration], " + + "found value [\"1\"] type [keyword]", + error("from test | stats max = max(emp_no) by bucket(hire_date, \"1\") | sort max ") + ); + assertEquals( + "1:68: second argument of [bucket(y, \"1\")] must be [integral, date_period or time_duration], " + + "found value [\"1\"] type [keyword]", + error("from test | eval x = emp_no, y = hire_date | stats max = max(x) by bucket(y, \"1\") | sort max ") + ); + } + private void query(String query) { defaultAnalyzer.analyze(parser.createStatement(query)); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java index 1e189c6eab038..1ecb471e882fe 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractFunctionTestCase.java @@ -879,8 +879,7 @@ public static void renderDocs() throws IOException { "elseValue", trueValue.type(), "The value that's returned when no condition evaluates to `true`.", - true, - EsqlFunctionRegistry.getTargetType(trueValue.type()) + true ); description = new EsqlFunctionRegistry.FunctionDescription( description.name(), @@ -1085,8 +1084,7 @@ private static void renderDocsForOperators(String name) throws IOException { String[] type = paramInfo == null ? new String[] { "?" } : paramInfo.type(); String desc = paramInfo == null ? "" : paramInfo.description().replace('\n', ' '); boolean optional = paramInfo == null ? false : paramInfo.optional(); - DataType targetDataType = EsqlFunctionRegistry.getTargetType(type); - args.add(new EsqlFunctionRegistry.ArgSignature(paramName, type, desc, optional, targetDataType)); + args.add(new EsqlFunctionRegistry.ArgSignature(paramName, type, desc, optional)); } } renderKibanaFunctionDefinition(name, functionInfo, args, likeOrInOperator(name)); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/ExpressionTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/ExpressionTests.java index 67b4dd71260aa..0177747d27243 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/ExpressionTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/ExpressionTests.java @@ -431,7 +431,7 @@ public void testDatePeriodLiterals() { } public void testUnknownNumericQualifier() { - assertParsingException(() -> whereExpression("1 decade"), "Unexpected time interval qualifier: 'decade'"); + assertParsingException(() -> whereExpression("1 decade"), "Unexpected temporal unit: 'decade'"); } public void testQualifiedDecimalLiteral() { diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/26_aggs_bucket.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/26_aggs_bucket.yml index ea7684fb69a09..9fbe69ac05f0a 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/26_aggs_bucket.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/26_aggs_bucket.yml @@ -234,3 +234,58 @@ - match: { values.2.1: "2024-08-01T00:00:00.000Z" } - match: { values.3.0: 1 } - match: { values.3.1: "2024-09-01T00:00:00.000Z" } + +--- +"Datetime interval as string": + - requires: + test_runner_features: [allowed_warnings_regex, capabilities] + capabilities: + - method: POST + path: /_query + parameters: [ ] + capabilities: [ implicit_casting_string_literal_to_temporal_amount ] + reason: "interval in parameters as string" + + - do: + indices.create: + index: test_bucket + body: + mappings: + properties: + ts : + type : date + + - do: + bulk: + refresh: true + body: + - { "index": { "_index": "test_bucket" } } + - { "ts": "2024-06-16" } + - { "index": { "_index": "test_bucket" } } + - { "ts": "2024-07-16" } + - { "index": { "_index": "test_bucket" } } + - { "ts": "2024-08-16" } + - { "index": { "_index": "test_bucket" } } + - { "ts": "2024-09-16" } + + - do: + allowed_warnings_regex: + - "No limit defined, adding default limit of \\[.*\\]" + esql.query: + body: + query: 'FROM test_bucket | STATS c = COUNT(*) BY b = BUCKET(ts, ?bucket) | SORT b' + params: [{"bucket" : "1 month"}] + + - match: { columns.0.name: c } + - match: { columns.0.type: long } + - match: { columns.1.name: b } + - match: { columns.1.type: date } + - length: { values: 4 } + - match: { values.0.0: 1 } + - match: { values.0.1: "2024-06-01T00:00:00.000Z" } + - match: { values.1.0: 1 } + - match: { values.1.1: "2024-07-01T00:00:00.000Z" } + - match: { values.2.0: 1 } + - match: { values.2.1: "2024-08-01T00:00:00.000Z" } + - match: { values.3.0: 1 } + - match: { values.3.1: "2024-09-01T00:00:00.000Z" }