Skip to content

Commit 4cc2188

Browse files
committed
use cast wrapper in starts_with
1 parent a5791bc commit 4cc2188

File tree

4 files changed

+15
-20
lines changed

4 files changed

+15
-20
lines changed

datafusion/functions/src/string/starts_with.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use crate::utils::make_scalar_function;
2626
use datafusion_common::types::logical_string;
2727
use datafusion_common::{internal_err, Result, ScalarValue};
2828
use datafusion_expr::{
29-
Coercion, ColumnarValue, Documentation, Expr, Like, ScalarFunctionArgs,
29+
cast, Coercion, ColumnarValue, Documentation, Expr, Like, ScalarFunctionArgs,
3030
ScalarUDFImpl, Signature, TypeSignatureClass, Volatility,
3131
};
3232
use datafusion_macros::user_doc;
@@ -137,7 +137,7 @@ impl ScalarUDFImpl for StartsWithFunc {
137137

138138
return Ok(ExprSimplifyResult::Simplified(Expr::Like(Like {
139139
negated: false,
140-
expr: Box::new(args[0].clone()),
140+
expr: Box::new(cast(args[0].clone(), scalar_value.data_type())),
141141
pattern: Box::new(like_expr),
142142
escape_char: None,
143143
case_insensitive: false,

datafusion/physical-expr/src/expressions/like.rs

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ use std::hash::Hash;
1919
use std::{any::Any, sync::Arc};
2020

2121
use crate::PhysicalExpr;
22-
use arrow::compute::can_cast_types;
2322
use arrow::datatypes::{DataType, Schema};
2423
use arrow::record_batch::RecordBatch;
2524
use datafusion_common::{internal_err, Result};
@@ -122,10 +121,7 @@ impl PhysicalExpr for LikeExpr {
122121
fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
123122
use arrow::compute::*;
124123
let lhs = self.expr.evaluate(batch)?;
125-
let rhs = self
126-
.pattern
127-
.evaluate(batch)?
128-
.cast_to(&lhs.data_type(), None)?;
124+
let rhs = self.pattern.evaluate(batch)?;
129125
match (self.negated, self.case_insensitive) {
130126
(false, false) => apply_cmp(&lhs, &rhs, like),
131127
(false, true) => apply_cmp(&lhs, &rhs, ilike),
@@ -169,10 +165,7 @@ pub fn like(
169165
) -> Result<Arc<dyn PhysicalExpr>> {
170166
let expr_type = &expr.data_type(input_schema)?;
171167
let pattern_type = &pattern.data_type(input_schema)?;
172-
if !expr_type.eq(pattern_type)
173-
&& !can_cast_types(expr_type, pattern_type)
174-
&& !can_like_type(expr_type)
175-
{
168+
if !expr_type.eq(pattern_type) && !can_like_type(expr_type) {
176169
return internal_err!(
177170
"The type of {expr_type} AND {pattern_type} of like physical should be same"
178171
);

datafusion/sqllogictest/test_files/parquet.slt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -619,13 +619,13 @@ query TT
619619
explain select * from foo where starts_with(column1, 'f');
620620
----
621621
logical_plan
622-
01)Filter: foo.column1 LIKE Utf8("f%")
623-
02)--TableScan: foo projection=[column1], partial_filters=[foo.column1 LIKE Utf8("f%")]
622+
01)Filter: CAST(foo.column1 AS Utf8) LIKE Utf8("f%")
623+
02)--TableScan: foo projection=[column1], partial_filters=[CAST(foo.column1 AS Utf8) LIKE Utf8("f%")]
624624
physical_plan
625625
01)CoalesceBatchesExec: target_batch_size=8192
626-
02)--FilterExec: column1@0 LIKE f%
626+
02)--FilterExec: CAST(column1@0 AS Utf8) LIKE f%
627627
03)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
628-
04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/foo.parquet]]}, projection=[column1], file_type=parquet, predicate=column1@0 LIKE f%, pruning_predicate=column1_null_count@2 != row_count@3 AND column1_min@0 <= g AND f <= column1_max@1, required_guarantees=[]
628+
04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet/foo.parquet]]}, projection=[column1], file_type=parquet, predicate=CAST(column1@0 AS Utf8) LIKE f%
629629

630630
statement ok
631631
drop table foo

datafusion/sqllogictest/test_files/string/string_view.slt

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -355,8 +355,9 @@ EXPLAIN SELECT
355355
FROM test;
356356
----
357357
logical_plan
358-
01)Projection: test.column1_utf8view LIKE Utf8("äöüß%") AS c1, CASE test.column1_utf8view IS NOT NULL WHEN Boolean(true) THEN Boolean(true) END AS c2, starts_with(test.column1_utf8view, Utf8View(NULL)) AS c3, starts_with(Utf8View(NULL), test.column1_utf8view) AS c4
359-
02)--TableScan: test projection=[column1_utf8view]
358+
01)Projection: __common_expr_1 LIKE Utf8("äöüß%") AS c1, CASE __common_expr_1 IS NOT NULL WHEN Boolean(true) THEN Boolean(true) END AS c2, starts_with(test.column1_utf8view, Utf8View(NULL)) AS c3, starts_with(Utf8View(NULL), test.column1_utf8view) AS c4
359+
02)--Projection: CAST(test.column1_utf8view AS Utf8) AS __common_expr_1, test.column1_utf8view
360+
03)----TableScan: test projection=[column1_utf8view]
360361

361362
## Test STARTS_WITH is rewitten to LIKE when the pattern is a constant
362363
query TT
@@ -370,8 +371,9 @@ EXPLAIN SELECT
370371
FROM test;
371372
----
372373
logical_plan
373-
01)Projection: test.column1_utf8 LIKE Utf8("foo\%%") AS c1, test.column1_large_utf8 LIKE Utf8("foo\%%") AS c2, test.column1_utf8view LIKE Utf8("foo\%%") AS c3, test.column1_utf8 LIKE Utf8("f_o%") AS c4, test.column1_large_utf8 LIKE Utf8("f_o%") AS c5, test.column1_utf8view LIKE Utf8("f_o%") AS c6
374-
02)--TableScan: test projection=[column1_utf8, column1_large_utf8, column1_utf8view]
374+
01)Projection: __common_expr_1 LIKE Utf8("foo\%%") AS c1, __common_expr_2 LIKE Utf8("foo\%%") AS c2, __common_expr_3 LIKE Utf8("foo\%%") AS c3, __common_expr_1 LIKE Utf8("f_o%") AS c4, __common_expr_2 LIKE Utf8("f_o%") AS c5, __common_expr_3 LIKE Utf8("f_o%") AS c6
375+
02)--Projection: CAST(test.column1_utf8 AS Utf8) AS __common_expr_1, CAST(test.column1_large_utf8 AS Utf8) AS __common_expr_2, CAST(test.column1_utf8view AS Utf8) AS __common_expr_3
376+
03)----TableScan: test projection=[column1_utf8, column1_large_utf8, column1_utf8view]
375377

376378
## Test STARTS_WITH works with column arguments
377379
query TT
@@ -940,7 +942,7 @@ EXPLAIN SELECT
940942
FROM test;
941943
----
942944
logical_plan
943-
01)Projection: test.column1_utf8view LIKE Utf8("foo%") AS c, starts_with(test.column1_utf8view, test.column2_utf8view) AS c2
945+
01)Projection: CAST(test.column1_utf8view AS Utf8) LIKE Utf8("foo%") AS c, starts_with(test.column1_utf8view, test.column2_utf8view) AS c2
944946
02)--TableScan: test projection=[column1_utf8view, column2_utf8view]
945947

946948
## Ensure no casts for TRANSLATE

0 commit comments

Comments
 (0)