Skip to content

Commit 70e9155

Browse files
authored
catch errors when simplifying cast(lit(...), ...) and bubble those up (#18332)
- Fixes #18326
1 parent c3e49fb commit 70e9155

File tree

9 files changed

+81
-20
lines changed

9 files changed

+81
-20
lines changed

datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,18 @@ impl TreeNodeRewriter for ConstEvaluator<'_> {
571571
ConstSimplifyResult::NotSimplified(s, m) => {
572572
Ok(Transformed::no(Expr::Literal(s, m)))
573573
}
574-
ConstSimplifyResult::SimplifyRuntimeError(_, expr) => {
574+
ConstSimplifyResult::SimplifyRuntimeError(err, expr) => {
575+
// For CAST expressions with literal inputs, propagate the error at plan time rather than deferring to execution time.
576+
// This provides clearer error messages and fails fast.
577+
if let Expr::Cast(Cast { ref expr, .. })
578+
| Expr::TryCast(TryCast { ref expr, .. }) = expr
579+
{
580+
if matches!(expr.as_ref(), Expr::Literal(_, _)) {
581+
return Err(err);
582+
}
583+
}
584+
// For other expressions (like CASE, COALESCE), preserve the original
585+
// to allow short-circuit evaluation at execution time
575586
Ok(Transformed::yes(expr))
576587
}
577588
},
@@ -4968,6 +4979,56 @@ mod tests {
49684979
);
49694980
}
49704981

4982+
#[test]
4983+
fn simplify_cast_literal() {
4984+
// Test that CAST(literal) expressions are evaluated at plan time
4985+
4986+
// CAST(123 AS Int64) should become 123i64
4987+
let expr = Expr::Cast(Cast::new(Box::new(lit(123i32)), DataType::Int64));
4988+
let expected = lit(123i64);
4989+
assert_eq!(simplify(expr), expected);
4990+
4991+
// CAST(1761630189642 AS Timestamp(Nanosecond, Some("+00:00")))
4992+
// Integer to timestamp cast
4993+
let expr = Expr::Cast(Cast::new(
4994+
Box::new(lit(1761630189642i64)),
4995+
DataType::Timestamp(
4996+
arrow::datatypes::TimeUnit::Nanosecond,
4997+
Some("+00:00".into()),
4998+
),
4999+
));
5000+
// Should evaluate to a timestamp literal
5001+
let result = simplify(expr);
5002+
match result {
5003+
Expr::Literal(ScalarValue::TimestampNanosecond(Some(val), tz), _) => {
5004+
assert_eq!(val, 1761630189642i64);
5005+
assert_eq!(tz.as_deref(), Some("+00:00"));
5006+
}
5007+
other => panic!("Expected TimestampNanosecond literal, got: {other:?}"),
5008+
}
5009+
5010+
// Test CAST of invalid string to timestamp - should return an error at plan time
5011+
// This represents the case from the issue: CAST(Utf8("1761630189642") AS Timestamp)
5012+
// "1761630189642" is NOT a valid timestamp string format
5013+
let expr = Expr::Cast(Cast::new(
5014+
Box::new(lit("1761630189642")),
5015+
DataType::Timestamp(
5016+
arrow::datatypes::TimeUnit::Nanosecond,
5017+
Some("+00:00".into()),
5018+
),
5019+
));
5020+
5021+
// The simplification should now fail with an error at plan time
5022+
let schema = test_schema();
5023+
let props = ExecutionProps::new();
5024+
let simplifier =
5025+
ExprSimplifier::new(SimplifyContext::new(&props).with_schema(schema));
5026+
let result = simplifier.simplify(expr);
5027+
assert!(result.is_err(), "Expected error for invalid cast");
5028+
let err_msg = result.unwrap_err().to_string();
5029+
assert_contains!(err_msg, "Error parsing timestamp");
5030+
}
5031+
49715032
fn if_not_null(expr: Expr, then: bool) -> Expr {
49725033
Expr::Case(Case {
49735034
expr: Some(expr.is_not_null().into()),

datafusion/sqllogictest/test_files/arrow_typeof.slt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ select arrow_cast(interval '30 minutes', 'Duration(Second)');
316316
----
317317
0 days 0 hours 30 mins 0 secs
318318

319-
query error DataFusion error: This feature is not implemented: Unsupported CAST from Utf8 to Duration\(s\)
319+
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*This feature is not implemented: Unsupported CAST from Utf8 to Duration\(s\)
320320
select arrow_cast('30 minutes', 'Duration(Second)');
321321

322322

@@ -337,7 +337,7 @@ select arrow_cast(timestamp '2000-01-01T00:00:00Z', 'Timestamp(Nanosecond, Some(
337337
----
338338
2000-01-01T00:00:00+08:00
339339

340-
statement error DataFusion error: Arrow error: Parser error: Invalid timezone "\+25:00": failed to parse timezone
340+
statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Parser error: Invalid timezone "\+25:00": failed to parse timezone
341341
select arrow_cast(timestamp '2000-01-01T00:00:00', 'Timestamp(Nanosecond, Some( "+25:00" ))');
342342

343343

@@ -406,7 +406,7 @@ select arrow_cast([1], 'FixedSizeList(1, Int64)');
406406
----
407407
[1]
408408

409-
query error DataFusion error: Arrow error: Cast error: Cannot cast to FixedSizeList\(4\): value at index 0 has length 3
409+
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast to FixedSizeList\(4\): value at index 0 has length 3
410410
select arrow_cast(make_array(1, 2, 3), 'FixedSizeList(4, Int64)');
411411

412412
query ?

datafusion/sqllogictest/test_files/cte.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -764,7 +764,7 @@ WITH RECURSIVE my_cte AS (
764764

765765
# Test issue: https://github.com/apache/datafusion/issues/9794
766766
# Non-recursive term and recursive term have different types, and cannot be casted
767-
query error DataFusion error: Arrow error: Cast error: Cannot cast string 'abc' to value of Int64 type
767+
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'abc' to value of Int64 type
768768
WITH RECURSIVE my_cte AS (
769769
SELECT 1 AS a
770770
UNION ALL

datafusion/sqllogictest/test_files/errors.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ SELECT
145145
LIMIT 5;
146146

147147

148-
query error DataFusion error: Arrow error: Cast error: Cannot cast string 'foo' to value of Int64 type
148+
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'foo' to value of Int64 type
149149
create table foo as values (1), ('foo');
150150

151151
query error DataFusion error: Error during planning: Substring without for/from is not valid

datafusion/sqllogictest/test_files/map.slt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ SELECT MAKE_MAP('POST', 41, 'HEAD', 53, 'PATCH', 30);
155155
----
156156
{POST: 41, HEAD: 53, PATCH: 30}
157157

158-
query error DataFusion error: Arrow error: Cast error: Cannot cast string 'ab' to value of Int64 type
158+
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'ab' to value of Int64 type
159159
SELECT MAKE_MAP('POST', 41, 'HEAD', 'ab', 'PATCH', 30);
160160

161161
# Map keys can not be NULL
@@ -523,7 +523,7 @@ SELECT MAP { 'a': 1, 'b': 3 };
523523
----
524524
{a: 1, b: 3}
525525

526-
query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
526+
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
527527
SELECT MAP { 'a': 1, 2: 3 };
528528

529529
# accessing map with non-string key
@@ -670,7 +670,7 @@ SELECT map_entries(MAP { 'a': 1, 'b': 3 });
670670
----
671671
[{key: a, value: 1}, {key: b, value: 3}]
672672

673-
query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
673+
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
674674
SELECT map_entries(MAP { 'a': 1, 2: 3 });
675675

676676
query ?
@@ -721,7 +721,7 @@ SELECT map_keys(MAP { 'a': 1, 'b': 3 });
721721
----
722722
[a, b]
723723

724-
query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
724+
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
725725
SELECT map_keys(MAP { 'a': 1, 2: 3 });
726726

727727
query ?
@@ -768,7 +768,7 @@ NULL
768768

769769
# Tests for map_values
770770

771-
query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
771+
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
772772
SELECT map_values(MAP { 'a': 1, 2: 3 });
773773

774774
query ?

datafusion/sqllogictest/test_files/nullif.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ select nullif(1.0, 2);
112112
----
113113
1
114114

115-
query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
115+
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
116116
select nullif(2, 'a');
117117

118118
query T

datafusion/sqllogictest/test_files/select.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1775,7 +1775,7 @@ DROP TABLE test;
17751775
query error DataFusion error: Arrow error: Parser error: Error parsing timestamp from 'I AM NOT A TIMESTAMP': error parsing date
17761776
SELECT to_timestamp('I AM NOT A TIMESTAMP');
17771777

1778-
query error DataFusion error: Arrow error: Cast error: Cannot cast string '' to value of Int32 type
1778+
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string '' to value of Int32 type
17791779
SELECT CAST('' AS int);
17801780

17811781
# See issue: https://github.com/apache/datafusion/issues/8978

datafusion/sqllogictest/test_files/struct.slt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ Struct("r": nullable Utf8, "c": nullable Float64)
492492
statement ok
493493
drop table t;
494494

495-
query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Float64 type
495+
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Float64 type
496496
create table t as values({r: 'a', c: 1}), ({c: 2.3, r: 'b'});
497497

498498
##################################
@@ -554,14 +554,14 @@ statement ok
554554
drop table t;
555555

556556
# row() with incorrect order
557-
statement error DataFusion error: Arrow error: Cast error: Cannot cast string 'blue' to value of Float32 type
558-
create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as values
557+
statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'blue' to value of Float32 type
558+
create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as values
559559
(row('red', 1), row(2.3, 'blue')),
560560
(row('purple', 1), row('green', 2.3));
561561

562562
# out of order struct literal
563563
# TODO: This query should not fail
564-
statement error DataFusion error: Arrow error: Cast error: Cannot cast string 'b' to value of Int32 type
564+
statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'b' to value of Int32 type
565565
create table t(a struct(r varchar, c int)) as values ({r: 'a', c: 1}), ({c: 2, r: 'b'});
566566

567567
##################################

datafusion/sqllogictest/test_files/timestamps.slt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -691,11 +691,11 @@ select
691691
----
692692
08:09:10.123456789 13:14:15.123456 13:14:15.123 13:14:15
693693

694-
query error DataFusion error: Arrow error: Cast error: Cannot cast string 'not a time' to value of Time64\(ns\) type
694+
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'not a time' to value of Time64\(ns\) type
695695
SELECT TIME 'not a time' as time;
696696

697697
# invalid time
698-
query error DataFusion error: Arrow error: Cast error: Cannot cast string '24:01:02' to value of Time64\(ns\) type
698+
query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string '24:01:02' to value of Time64\(ns\) type
699699
SELECT TIME '24:01:02' as time;
700700

701701
# invalid timezone
@@ -3271,7 +3271,7 @@ statement error The to_local_time function can only accept Timestamp as the arg
32713271
select to_local_time('2024-04-01T00:00:20Z');
32723272

32733273
# invalid timezone
3274-
statement error DataFusion error: Arrow error: Parser error: Invalid timezone "Europe/timezone": failed to parse timezone
3274+
statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Parser error: Invalid timezone "Europe/timezone": failed to parse timezone
32753275
select to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/timezone');
32763276

32773277
# valid query

0 commit comments

Comments
 (0)