Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
b801ba8
implicit casting from string literals to datetime intervals
fang-xing-esql Oct 29, 2024
7c77785
Merge branch 'main' into implicit-casting-string-literal-to-intervals
fang-xing-esql Oct 29, 2024
b416e77
fix test
fang-xing-esql Oct 29, 2024
28c2fe8
clean up
fang-xing-esql Oct 29, 2024
d41b5d1
Update docs/changelog/115814.yaml
fang-xing-esql Oct 29, 2024
0ff8ca8
refactor
fang-xing-esql Oct 30, 2024
e0f7376
Merge branch 'main' into implicit-casting-string-literal-to-intervals
elasticmachine Oct 30, 2024
7cb0c7c
more tests
fang-xing-esql Oct 30, 2024
11fa25d
Merge branch 'main' into implicit-casting-string-literal-to-intervals
fang-xing-esql Oct 30, 2024
6a81ea6
Merge branch 'main' into implicit-casting-string-literal-to-intervals
fang-xing-esql Oct 31, 2024
f17b304
Merge branch 'main' into implicit-casting-string-literal-to-intervals
elasticmachine Oct 31, 2024
dc850d7
Merge branch 'main' into implicit-casting-string-literal-to-intervals
fang-xing-esql Nov 4, 2024
8b31b45
remove implicit casting to intervals for arithmetic operations
fang-xing-esql Nov 4, 2024
bace39a
make functions() static
fang-xing-esql Nov 5, 2024
4c39d94
Merge branch 'main' into implicit-casting-string-literal-to-intervals
fang-xing-esql Nov 5, 2024
a8b1b0f
updated docs and addressed review comments
fang-xing-esql Nov 5, 2024
26a0be7
Merge branch 'main' into implicit-casting-string-literal-to-intervals
elasticmachine Nov 6, 2024
0edf60f
Merge branch 'main' into implicit-casting-string-literal-to-intervals
elasticmachine Nov 6, 2024
9c9a95f
Merge branch 'main' into implicit-casting-string-literal-to-intervals
fang-xing-esql Nov 8, 2024
67b6885
make getTargetType private and update docs
fang-xing-esql Nov 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/115814.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 115814
summary: "[ES|QL] Implicit casting string literal to intervals"
area: ES|QL
type: enhancement
issues:
- 115352
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
import java.util.function.Function;

import static java.util.stream.Collectors.toMap;
import static java.util.stream.Collectors.toUnmodifiableMap;
import static org.elasticsearch.xpack.esql.core.util.PlanStreamInput.readCachedStringWithVersionCheck;
import static org.elasticsearch.xpack.esql.core.util.PlanStreamOutput.writeCachedStringWithVersionCheck;

Expand Down Expand Up @@ -276,7 +275,7 @@ public enum DataType {

private static final Collection<DataType> STRING_TYPES = DataType.types().stream().filter(DataType::isString).toList();

private static final Map<String, DataType> NAME_TO_TYPE = TYPES.stream().collect(toUnmodifiableMap(DataType::typeName, t -> t));
private static final Map<String, DataType> NAME_TO_TYPE;

private static final Map<String, DataType> ES_TO_TYPE;

Expand All @@ -287,6 +286,10 @@ public enum DataType {
map.put("point", DataType.CARTESIAN_POINT);
map.put("shape", DataType.CARTESIAN_SHAPE);
ES_TO_TYPE = Collections.unmodifiableMap(map);
// DATETIME has different esType and typeName, add an entry in NAME_TO_TYPE with date as key
map = TYPES.stream().collect(toMap(DataType::typeName, t -> t));
map.put("date", DataType.DATETIME);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this not go into NAME_OR_ALIAS_TO_TYPE below? date is an alias.

Copy link
Member Author

@fang-xing-esql fang-xing-esql Nov 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I looked into NAME_OR_ALIAS_TO_TYPE a bit further, and it is used by inline_cast only for now. It should have date I think, so that we can support ::date, today only ::datetime is supported. I opened a separate issue to do it, as it requires additional tests, and I'd like to address it separately.

NAME_TO_TYPE = Collections.unmodifiableMap(map);
}

private static final Map<String, DataType> NAME_OR_ALIAS_TO_TYPE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -716,3 +716,47 @@ FROM employees
2 |1985-10-01T00:00:00.000Z
4 |1985-11-01T00:00:00.000Z
;

bucketByWeekInString
required_capability: implicit_casting_string_literal_to_temporal_amount
FROM employees
| WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z"
| STATS hires_per_week = COUNT(*) BY week = BUCKET(hire_date, "1 week")
| SORT week
;

hires_per_week:long | week:date
2 |1985-02-18T00:00:00.000Z
1 |1985-05-13T00:00:00.000Z
1 |1985-07-08T00:00:00.000Z
1 |1985-09-16T00:00:00.000Z
2 |1985-10-14T00:00:00.000Z
4 |1985-11-18T00:00:00.000Z
;

bucketByMinuteInString
required_capability: implicit_casting_string_literal_to_temporal_amount

FROM sample_data
| STATS min = min(@timestamp), max = MAX(@timestamp) BY bucket = BUCKET(@timestamp, "30 minutes")
| SORT min
;

min:date | max:date | bucket:date
2023-10-23T12:15:03.360Z|2023-10-23T12:27:28.948Z|2023-10-23T12:00:00.000Z
2023-10-23T13:33:34.937Z|2023-10-23T13:55:01.543Z|2023-10-23T13:30:00.000Z
;

bucketByMonthInString
required_capability: implicit_casting_string_literal_to_temporal_amount

FROM sample_data
| EVAL adjusted = CASE(TO_LONG(@timestamp) % 2 == 0, @timestamp + 1 month, @timestamp + 2 years)
| STATS c = COUNT(*) BY b = BUCKET(adjusted, "1 month")
| SORT c
;

c:long |b:date
3 |2025-10-01T00:00:00.000Z
4 |2023-11-01T00:00:00.000Z
;
105 changes: 105 additions & 0 deletions x-pack/plugin/esql/qa/testFixtures/src/main/resources/date.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -1286,3 +1286,108 @@ ROW a = GREATEST(TO_DATETIME("1957-05-23T00:00:00Z"), TO_DATETIME("1958-02-19T00
a:datetime
1958-02-19T00:00:00
;

evalDateTruncMonthInString
required_capability: implicit_casting_string_literal_to_temporal_amount

FROM employees
| SORT hire_date
| EVAL x = date_trunc("1 month", hire_date)
| KEEP emp_no, hire_date, x
| LIMIT 5;

emp_no:integer | hire_date:date | x:date
10009 | 1985-02-18T00:00:00.000Z | 1985-02-01T00:00:00.000Z
10048 | 1985-02-24T00:00:00.000Z | 1985-02-01T00:00:00.000Z
10098 | 1985-05-13T00:00:00.000Z | 1985-05-01T00:00:00.000Z
10076 | 1985-07-09T00:00:00.000Z | 1985-07-01T00:00:00.000Z
10061 | 1985-09-17T00:00:00.000Z | 1985-09-01T00:00:00.000Z
;

evalDateTruncHourInString
required_capability: implicit_casting_string_literal_to_temporal_amount

FROM employees
| SORT hire_date
| EVAL x = date_trunc("240 hours", hire_date)
| KEEP emp_no, hire_date, x
| LIMIT 5;

emp_no:integer | hire_date:date | x:date
10009 | 1985-02-18T00:00:00.000Z | 1985-02-11T00:00:00.000Z
10048 | 1985-02-24T00:00:00.000Z | 1985-02-21T00:00:00.000Z
10098 | 1985-05-13T00:00:00.000Z | 1985-05-12T00:00:00.000Z
10076 | 1985-07-09T00:00:00.000Z | 1985-07-01T00:00:00.000Z
10061 | 1985-09-17T00:00:00.000Z | 1985-09-09T00:00:00.000Z
;

evalDateTruncDayInString
required_capability: implicit_casting_string_literal_to_temporal_amount

FROM sample_data
| SORT @timestamp ASC
| EVAL t = DATE_TRUNC("1 day", @timestamp)
| KEEP t;

t:date
2023-10-23T00:00:00
2023-10-23T00:00:00
2023-10-23T00:00:00
2023-10-23T00:00:00
2023-10-23T00:00:00
2023-10-23T00:00:00
2023-10-23T00:00:00
;

evalDateTruncMinuteInString
required_capability: implicit_casting_string_literal_to_temporal_amount

FROM sample_data
| SORT @timestamp ASC
| EVAL t = DATE_TRUNC("1 minute", @timestamp)
| KEEP t;

t:date
2023-10-23T12:15:00
2023-10-23T12:27:00
2023-10-23T13:33:00
2023-10-23T13:51:00
2023-10-23T13:52:00
2023-10-23T13:53:00
2023-10-23T13:55:00
;

evalDateTruncDayInStringNull
required_capability: implicit_casting_string_literal_to_temporal_amount

FROM employees
| WHERE emp_no == 10040
| EVAL x = date_trunc("1 day", birth_date)
| KEEP emp_no, birth_date, x;

emp_no:integer | birth_date:date | x:date
10040 | null | null
;

evalDateTruncYearInString
required_capability: implicit_casting_string_literal_to_temporal_amount

ROW a = 1
| EVAL year_hired = DATE_TRUNC("1 year", "1991-06-26T00:00:00.000Z")
;

a:integer | year_hired:date
1 | 1991-01-01T00:00:00.000Z
;

filteringWithTemporalAmountInString
required_capability: implicit_casting_string_literal_to_temporal_amount

FROM employees
| SORT emp_no
| WHERE birth_date < "2024-01-01" - 70 years
| STATS cnt = count(*);

cnt:long
19
;
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,12 @@ public enum Cap {
* - Introduce BinaryPlan and co
* - Refactor INLINESTATS and LOOKUP as a JOIN block
*/
JOIN_PLANNING_V1(Build.current().isSnapshot());
JOIN_PLANNING_V1(Build.current().isSnapshot()),

/**
* Support implicit casting from string literal to DATE_PERIOD or TIME_DURATION.
*/
IMPLICIT_CASTING_STRING_LITERAL_TO_TEMPORAL_AMOUNT;

private final boolean enabled;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
package org.elasticsearch.xpack.esql.analysis;

import org.elasticsearch.common.logging.HeaderWarning;
import org.elasticsearch.common.logging.LoggerMessageFormat;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.logging.Logger;
import org.elasticsearch.xpack.core.enrich.EnrichPolicy;
Expand All @@ -31,7 +30,6 @@
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
import org.elasticsearch.xpack.esql.core.expression.UnresolvedAttribute;
import org.elasticsearch.xpack.esql.core.expression.UnresolvedStar;
import org.elasticsearch.xpack.esql.core.expression.function.scalar.ScalarFunction;
import org.elasticsearch.xpack.esql.core.expression.predicate.BinaryOperator;
import org.elasticsearch.xpack.esql.core.expression.predicate.operator.comparison.BinaryComparison;
import org.elasticsearch.xpack.esql.core.tree.Source;
Expand All @@ -49,6 +47,7 @@
import org.elasticsearch.xpack.esql.expression.function.FunctionDefinition;
import org.elasticsearch.xpack.esql.expression.function.UnresolvedFunction;
import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute;
import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction;
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble;
Expand All @@ -60,6 +59,7 @@
import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.EsqlArithmeticOperation;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In;
import org.elasticsearch.xpack.esql.index.EsIndex;
import org.elasticsearch.xpack.esql.parser.ParsingException;
import org.elasticsearch.xpack.esql.plan.TableIdentifier;
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
import org.elasticsearch.xpack.esql.plan.logical.Drop;
Expand All @@ -85,6 +85,8 @@
import org.elasticsearch.xpack.esql.stats.FeatureMetric;
import org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter;

import java.time.Duration;
import java.time.temporal.TemporalAmount;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
Expand All @@ -106,6 +108,7 @@
import static org.elasticsearch.xpack.core.enrich.EnrichPolicy.GEO_MATCH_TYPE;
import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN;
import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME;
import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD;
import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE;
import static org.elasticsearch.xpack.esql.core.type.DataType.FLOAT;
import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT;
Expand All @@ -115,9 +118,11 @@
import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD;
import static org.elasticsearch.xpack.esql.core.type.DataType.LONG;
import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;
import static org.elasticsearch.xpack.esql.core.type.DataType.TIME_DURATION;
import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION;
import static org.elasticsearch.xpack.esql.core.type.DataType.isTemporalAmount;
import static org.elasticsearch.xpack.esql.stats.FeatureMetric.LIMIT;
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.maybeParseTemporalAmount;

/**
* This class is part of the planner. Resolves references (such as variable and index names) and performs implicit casting.
Expand All @@ -141,9 +146,14 @@ public class Analyzer extends ParameterizedRuleExecutor<LogicalPlan, AnalyzerCon
);
var resolution = new Batch<>(
"Resolution",
/*
* ImplicitCasting must be before ResolveRefs. Because a reference is created for a Bucket in Aggregate's aggregates,
* resolving this reference before implicit casting may cause this reference to have customMessage=true, it prevents further
* attempts to resolve this reference.
*/
new ImplicitCasting(),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change here makes me a bit nervous :-). I am surprised this isn't breaking any existent tests...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you remember why this was after ResolveUnionTypes?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was worried that tests would complain too :-).

This is mainly for Bucket, as a reference(alias) is created for it in the aggregates that need to be resolved separately(on top of the Bucket in the grouping key), I haven't seen another function that needs this change yet.

I tried keeping the order unchanged and handle it in ImplicitCasting, it becomes a little messy. After ResolveRefs, the reference to the original Bucket is marked with customMessage=true, which prevents further attempts to resolve this reference. If we deal with it in the ImplicitCasting we have to add some special code for Bucket there to find out all of the reference to the Bucket in the aggregates and remove customMessage=true from the UnresolvedAttributes. Changing the order seems to be a more clean approach, although it does look more risky.

I don't recall why ImplicitCasting was after ResolveUnionTypes, the ImplicitCasting rule was added beforeResolveUnionTypes. I couldn't think of any reason why ImplicitCasting has to be after ResolveUnionTypes, they seem to be independent of each other, will double check with @craigtaverner or @alex-spies.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Union types was/is tricky, too much back and forth and too many places where things are handled carefully. If @craigtaverner and @alex-spies are ok with this change, it's ok with me as well; they know best the union types area.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think ResolveUnionTypes needed to come after ResolveRefs, and so it was inserted immediately after that, which put it before ImplicitCasting. I don't remember there being any particular reason it needed to occur before ImplicitCasting. But I understand the nervousness. I wonder if there are possible cases where an implicit cast would affect union-types? At first glance I would say no, because this implicit casting seems to be about literals only, not fields, and union types is all about fields, so they should not interact.

Copy link
Contributor

@alex-spies alex-spies Nov 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, putting the implicit casting up shouldn't harm as we loop over these resolution rules, anyway. This may also explain why no tests fail.

However, I'm not sure this solution is complete: I see that we put ImplicitCasting up into first place to avoid issues with the verification of STATS. But this assumes that ImplicitCasting will never require resolved references to do its job inside STATS.

I have a counterexample:

FROM sample_data
| EVAL date = "2024-01-01"::datetime
| STATS min = min(@timestamp), max = MAX(@timestamp) BY c = (date == "2024-01-01")

On the present PR, this results in

Found 1 problem
line 3:3: Plan [Aggregate[STANDARD,[c{r}#5],[MIN(@timestamp{f}#14,true[BOOLEAN]) AS min, MAX(@timestamp{f}#14,true[BOOLEAN]) AS max, ?c]]] optimized incorrectly due to missing references [?c]
java.lang.IllegalStateException: Found 1 problem
line 3:3: Plan [Aggregate[STANDARD,[c{r}#5],[MIN(@timestamp{f}#14,true[BOOLEAN]) AS min, MAX(@timestamp{f}#14,true[BOOLEAN]) AS max, ?c]]] optimized incorrectly due to missing references [?c]
	at __randomizedtesting.SeedInfo.seed([806B2D5E53444BB7:83F1284FDB8264F]:0)
	at org.elasticsearch.xpack.esql.CsvTests.bucket.X(bucket.csv-spec:691)
	at org.elasticsearch.xpack.esql.optimizer.LogicalVerifier.verify(LogicalVerifier.java:40)
	at org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer.optimize(LogicalPlanOptimizer.java:100)
	at org.elasticsearch.xpack.esql.session.EsqlSession.optimizedPlan(EsqlSession.java:528)
	at org.elasticsearch.xpack.esql.CsvTests.executePlan(CsvTests.java:443)
	at org.elasticsearch.xpack.esql.CsvTests.doTest(CsvTests.java:295)
	at org.elasticsearch.xpack.esql.CsvTests.test(CsvTests.java:278)
	at java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103)
	at java.base/java.lang.reflect.Method.invoke(Method.java:580)

which doesn't look like ImplicitCasting is at fault - however, ImplicitCasting should be able to place a ::datetime into the BY clause, and doing so manually fixes the query and makes it run fine.

Update: Also, extracting the BY clause so that ImplicitCasting does actually apply also works fine with this PR:

FROM sample_data
| EVAL date = "2024-01-01"::datetime
| EVAL c = (date == "2024-01-01")
| STATS min = min(@timestamp), max = MAX(@timestamp) BY c

new ResolveRefs(),
new ResolveUnionTypes(), // Must be after ResolveRefs, so union types can be found
new ImplicitCasting()
new ResolveUnionTypes() // Must be after ResolveRefs, so union types can be found
);
var finish = new Batch<>("Finish Analysis", Limiter.ONCE, new AddImplicitLimit(), new UnionTypesCleanup());
rules = List.of(init, resolution, finish);
Expand Down Expand Up @@ -951,7 +961,7 @@ private BitSet gatherPreAnalysisMetrics(LogicalPlan plan, BitSet b) {
}

/**
* Cast string literals in ScalarFunction, EsqlArithmeticOperation, BinaryComparison and In to desired data types.
* Cast string literals in ScalarFunction, EsqlArithmeticOperation, BinaryComparison, In and GroupingFunction to desired data types.
* For example, the string literals in the following expressions will be cast implicitly to the field data type on the left hand side.
* date > "2024-08-21"
* date in ("2024-08-21", "2024-08-22", "2024-08-23")
Expand All @@ -971,23 +981,29 @@ private BitSet gatherPreAnalysisMetrics(LogicalPlan plan, BitSet b) {
private static class ImplicitCasting extends ParameterizedRule<LogicalPlan, LogicalPlan, AnalyzerContext> {
@Override
public LogicalPlan apply(LogicalPlan plan, AnalyzerContext context) {
return plan.transformExpressionsUp(ScalarFunction.class, e -> ImplicitCasting.cast(e, context.functionRegistry()));
return plan.transformExpressionsUp(
org.elasticsearch.xpack.esql.core.expression.function.Function.class,
e -> ImplicitCasting.cast(e, context.functionRegistry())
);
}

private static Expression cast(ScalarFunction f, EsqlFunctionRegistry registry) {
private static Expression cast(org.elasticsearch.xpack.esql.core.expression.function.Function f, EsqlFunctionRegistry registry) {
if (f instanceof In in) {
return processIn(in);
}
if (f instanceof EsqlScalarFunction esf) {
return processScalarFunction(esf, registry);
if (f instanceof EsqlScalarFunction || f instanceof GroupingFunction) { // exclude AggregateFunction until it is needed
return processFunction(f, registry);
}
if (f instanceof EsqlArithmeticOperation || f instanceof BinaryComparison) {
return processBinaryOperator((BinaryOperator) f);
}
return f;
}

private static Expression processScalarFunction(EsqlScalarFunction f, EsqlFunctionRegistry registry) {
private static Expression processFunction(
org.elasticsearch.xpack.esql.core.expression.function.Function f,
EsqlFunctionRegistry registry
) {
List<Expression> args = f.arguments();
List<DataType> targetDataTypes = registry.getDataTypeForStringLiteralConversion(f.getClass());
if (targetDataTypes == null || targetDataTypes.isEmpty()) {
Expand All @@ -1010,9 +1026,11 @@ private static Expression processScalarFunction(EsqlScalarFunction f, EsqlFuncti
}
if (targetDataType != DataType.NULL && targetDataType != DataType.UNSUPPORTED) {
Expression e = castStringLiteral(arg, targetDataType);
childrenChanged = true;
newChildren.add(e);
continue;
if (e != arg) {
childrenChanged = true;
newChildren.add(e);
continue;
}
}
}
} else if (dataType.isNumeric() && canCastMixedNumericTypes(f) && castNumericArgs) {
Expand Down Expand Up @@ -1094,7 +1112,7 @@ private static Expression processIn(In in) {
return childrenChanged ? in.replaceChildren(newChildren) : in;
}

private static boolean canCastMixedNumericTypes(EsqlScalarFunction f) {
private static boolean canCastMixedNumericTypes(org.elasticsearch.xpack.esql.core.expression.function.Function f) {
return f instanceof Coalesce;
}

Expand Down Expand Up @@ -1141,19 +1159,37 @@ private static boolean supportsStringImplicitCasting(DataType type) {
return type == DATETIME || type == IP || type == VERSION || type == BOOLEAN;
}

public static Expression castStringLiteral(Expression from, DataType target) {
private static UnresolvedAttribute unresolvedAttribute(Expression value, String type, Exception e) {
String message = format(
"Cannot convert string [{}] to [{}], error [{}]",
value.fold(),
type,
(e instanceof ParsingException pe) ? pe.getErrorMessage() : e.getMessage()
);
return new UnresolvedAttribute(value.source(), String.valueOf(value.fold()), message);
}

private static Expression castStringLiteralToTemporalAmount(Expression from) {
try {
TemporalAmount result = maybeParseTemporalAmount(from.fold().toString().strip());
if (result == null) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is null acceptable in any case here?
I find reading the flow of calls a bit "uneven": castString...() will try to maybeParse...(), which if it returns null is OK, but if it can't, it'll throw. Can maybeParse...() throw on empty value? Do we need to "maybe" parse?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybeParseTemporalAmount might return a null, if the literal string does not follow the format of a temporal(there is space between integer and unit), for example "1", "1w", "" etc. It doesn't throw on empty value. The verification of a non temporal format string is deferred to resolveType() to return a proper message. Here are a few examples that the error message is from resolveType():

stats max(emp_no) by bucket(emp_no, "5") returns second argument of [bucket(emp_no, "5")] must be [numeric], found value ["5"] type [keyword]

stats max(emp_no) by bucket(hire_date, "5") returns second argument of [bucket(hire_date, "5")] must be [integral, date_period or time_duration], found value ["5"] type [keyword]

stats max(emp_no) by bucket(hire_date, "5w") returns second argument of [bucket(hire_date, "5w")] must be [integral, date_period or time_duration], found value ["5w"] type [keyword]

stats max(emp_no) by bucket(hire_date, "") returns second argument of [bucket(hire_date, "")] must be [integral, date_period or time_duration], found value [""] type [keyword]

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't throw on empty value.

Yes, I was wondering if it could.

The (small, can be ignored ;) ) issue I see is that maybeParseTemporalAmount() is only invoked if isTemporalAmount(). Meaning that it has to be one.
But the failure case is not treated uniformly:

  • If it's not well formatted because there's just one token (or zero, but that's just theoretical, I think, b/c of the grammar), it returns a null.
  • If it's not well formatted because the first token isn't a number, or the second one isn't a defined unit, it'll throw.

And I'm not sure why, since also in the former case, as written, it has to be a temporal amount and it cannot be just one token (or empty).

Copy link
Member Author

@fang-xing-esql fang-xing-esql Nov 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't throw on empty value.

Yes, I was wondering if it could.

The (small, can be ignored ;) ) issue I see is that maybeParseTemporalAmount() is only invoked if isTemporalAmount(). Meaning that it has to be one. But the failure case is not treated uniformly:

  • If it's not well formatted because there's just one token (or zero, but that's just theoretical, I think, b/c of the grammar), it returns a null.
  • If it's not well formatted because the first token isn't a number, or the second one isn't a defined unit, it'll throw.

And I'm not sure why, since also in the former case, as written, it has to be a temporal amount and it cannot be just one token (or empty).

Thanks for explaining it, now I understand the comments better! The validations of Bucket's second argument happen in two places, one in ImplicitCasting, the other in Bucket.resolveType(). Bucket is kind of unique, as its second argument - buckets's data type depends on its first argument - field's data type, and it is checked in resolveType(), ImplicitCasting doesn't check the data type of the other arguments in the same function in order to decide which data type to convert to.

If it has zero or one token - ImplicitCasting keeps it unchanged, and defer it to Bucket.resolveType() throws a proper error message depending on the data type of its first argument, the error message may suggest a temporal or not - this is just for the purpose of getting a proper error message.

If it has more than one token - ImplicitCasting validates the string, and throw error for invalid values.

return from;
}
DataType target = result instanceof Duration ? TIME_DURATION : DATE_PERIOD;
return new Literal(from.source(), result, target);
} catch (Exception e) {
return unresolvedAttribute(from, DATE_PERIOD + " or " + TIME_DURATION, e);
}
}

private static Expression castStringLiteral(Expression from, DataType target) {
assert from.foldable();
try {
Object to = EsqlDataTypeConverter.convert(from.fold(), target);
return new Literal(from.source(), to, target);
return isTemporalAmount(target)
? castStringLiteralToTemporalAmount(from)
: new Literal(from.source(), EsqlDataTypeConverter.convert(from.fold(), target), target);
} catch (Exception e) {
String message = LoggerMessageFormat.format(
"Cannot convert string [{}] to [{}], error [{}]",
from.fold(),
target,
e.getMessage()
);
return new UnresolvedAttribute(from.source(), String.valueOf(from.fold()), message);
return unresolvedAttribute(from, target.toString(), e);
}
}
}
Expand Down
Loading