Skip to content

Commit 63baa7c

Browse files
andygroveMazterQyou
authored andcommitted
Add SQL planner support for Like, ILike and SimilarTo, with optional escape character (apache#3101)
* Make Like a top-level Expr * revert some changes * add type validation * Revert physical plan changes and reduce scope of the PR * Revert more changes * Revert more changes * clippy * address feedback * revert change to test * revert more changes
1 parent 1393097 commit 63baa7c

File tree

3 files changed

+136
-31
lines changed

3 files changed

+136
-31
lines changed

datafusion/core/src/physical_plan/planner.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ use arrow::datatypes::{Schema, SchemaRef};
6868
use arrow::{compute::can_cast_types, datatypes::DataType};
6969
use async_trait::async_trait;
7070
use datafusion_common::OuterQueryCursor;
71+
use datafusion_expr::expr_fn::binary_expr;
7172
use datafusion_physical_expr::expressions::{any, OuterColumn};
7273
use futures::future::BoxFuture;
7374
use futures::{FutureExt, StreamExt, TryStreamExt};
@@ -1080,6 +1081,46 @@ pub fn create_physical_expr(
10801081
)?;
10811082
binary(lhs, *op, rhs, input_schema)
10821083
}
1084+
Expr::Like {
1085+
negated,
1086+
expr,
1087+
pattern,
1088+
escape_char,
1089+
} => {
1090+
if escape_char.is_some() {
1091+
return Err(DataFusionError::Execution(
1092+
"LIKE does not support escape_char".to_string(),
1093+
));
1094+
}
1095+
let op = if *negated {
1096+
Operator::NotLike
1097+
} else {
1098+
Operator::Like
1099+
};
1100+
let bin_expr =
1101+
binary_expr(expr.as_ref().clone(), op, pattern.as_ref().clone());
1102+
create_physical_expr(&bin_expr, input_dfschema, input_schema, execution_props)
1103+
}
1104+
Expr::ILike {
1105+
negated,
1106+
expr,
1107+
pattern,
1108+
escape_char,
1109+
} => {
1110+
if escape_char.is_some() {
1111+
return Err(DataFusionError::Execution(
1112+
"ILIKE does not support escape_char".to_string(),
1113+
));
1114+
}
1115+
let op = if *negated {
1116+
Operator::NotILike
1117+
} else {
1118+
Operator::ILike
1119+
};
1120+
let bin_expr =
1121+
binary_expr(expr.as_ref().clone(), op, pattern.as_ref().clone());
1122+
create_physical_expr(&bin_expr, input_dfschema, input_schema, execution_props)
1123+
}
10831124
Expr::Case {
10841125
expr,
10851126
when_then_expr,

datafusion/core/src/sql/planner.rs

Lines changed: 41 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ use crate::logical_plan::Expr::Alias;
3131
use crate::logical_plan::{
3232
and, builder::expand_qualified_wildcard, builder::expand_wildcard, col, lit,
3333
normalize_col, rewrite_udtfs_to_columns, Column, CreateMemoryTable, DFSchema,
34-
DFSchemaRef, DropTable, Expr, LogicalPlan, LogicalPlanBuilder, Operator, PlanType,
35-
ToDFSchema, ToStringifiedPlan,
34+
DFSchemaRef, DropTable, Expr, ExprSchemable, LogicalPlan, LogicalPlanBuilder,
35+
Operator, PlanType, ToDFSchema, ToStringifiedPlan,
3636
};
3737
use crate::optimizer::utils::exprlist_to_columns;
3838
use crate::prelude::JoinType;
@@ -2002,42 +2002,52 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
20022002
}
20032003

20042004
SQLExpr::Like { negated, expr, pattern, escape_char } => {
2005-
match escape_char {
2006-
Some(_) => {
2007-
// to support this we will need to introduce `Expr::Like` instead
2008-
// of treating it like a binary expression
2009-
Err(DataFusionError::NotImplemented("LIKE with ESCAPE is not yet supported".to_string()))
2010-
},
2011-
_ => {
2012-
Ok(Expr::BinaryExpr {
2013-
left: Box::new(self.sql_expr_to_logical_expr(*expr, schema,)?),
2014-
op: if negated { Operator::NotLike } else { Operator::Like },
2015-
right: Box::new(self.sql_expr_to_logical_expr(*pattern, schema)?),
2016-
})
2017-
}
2005+
let pattern = self.sql_expr_to_logical_expr(*pattern, schema)?;
2006+
let pattern_type = pattern.get_type(schema)?;
2007+
if pattern_type != DataType::Utf8 && pattern_type != DataType::Null {
2008+
return Err(DataFusionError::Plan(
2009+
"Invalid pattern in LIKE expression".to_string(),
2010+
));
20182011
}
2012+
Ok(Expr::Like {
2013+
negated,
2014+
expr: Box::new(self.sql_expr_to_logical_expr(*expr, schema)?),
2015+
pattern: Box::new(pattern),
2016+
escape_char
2017+
2018+
})
20192019
}
20202020

20212021
SQLExpr::ILike { negated, expr, pattern, escape_char } => {
2022-
match escape_char {
2023-
Some(_) => {
2024-
// to support this we will need to introduce `Expr::ILike` instead
2025-
// of treating it like a binary expression
2026-
Err(DataFusionError::NotImplemented("ILIKE with ESCAPE is not yet supported".to_string()))
2027-
},
2028-
_ => {
2029-
Ok(Expr::BinaryExpr {
2030-
left: Box::new(self.sql_expr_to_logical_expr(*expr, schema,)?),
2031-
op: if negated { Operator::NotILike } else { Operator::ILike },
2032-
right: Box::new(self.sql_expr_to_logical_expr(*pattern, schema)?),
2033-
})
2034-
}
2022+
let pattern = self.sql_expr_to_logical_expr(*pattern, schema)?;
2023+
let pattern_type = pattern.get_type(schema)?;
2024+
if pattern_type != DataType::Utf8 && pattern_type != DataType::Null {
2025+
return Err(DataFusionError::Plan(
2026+
"Invalid pattern in ILIKE expression".to_string(),
2027+
));
20352028
}
2029+
Ok(Expr::ILike {
2030+
negated,
2031+
expr: Box::new(self.sql_expr_to_logical_expr(*expr, schema)?),
2032+
pattern: Box::new(pattern),
2033+
escape_char
2034+
})
20362035
}
20372036

2038-
SQLExpr::SimilarTo { .. } => {
2039-
// https://github.com/apache/arrow-datafusion/issues/3099
2040-
Err(DataFusionError::NotImplemented("SIMILAR TO is not yet supported".to_string()))
2037+
SQLExpr::SimilarTo { negated, expr, pattern, escape_char } => {
2038+
let pattern = self.sql_expr_to_logical_expr(*pattern, schema)?;
2039+
let pattern_type = pattern.get_type(schema)?;
2040+
if pattern_type != DataType::Utf8 && pattern_type != DataType::Null {
2041+
return Err(DataFusionError::Plan(
2042+
"Invalid pattern in SIMILAR TO expression".to_string(),
2043+
));
2044+
}
2045+
Ok(Expr::SimilarTo {
2046+
negated,
2047+
expr: Box::new(self.sql_expr_to_logical_expr(*expr, schema)?),
2048+
pattern: Box::new(pattern),
2049+
escape_char
2050+
})
20412051
}
20422052

20432053
SQLExpr::BinaryOp {

datafusion/proto/src/lib.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,60 @@ mod roundtrip_tests {
699699
roundtrip_expr_test!(test_expr, ctx);
700700
}
701701

702+
#[test]
703+
fn roundtrip_like() {
704+
fn like(negated: bool, escape_char: Option<char>) {
705+
let test_expr = Expr::Like {
706+
negated,
707+
expr: Box::new(col("col")),
708+
pattern: Box::new(lit("[0-9]+")),
709+
escape_char,
710+
};
711+
let ctx = SessionContext::new();
712+
roundtrip_expr_test!(test_expr, ctx);
713+
}
714+
like(true, Some('X'));
715+
like(false, Some('\\'));
716+
like(true, None);
717+
like(false, None);
718+
}
719+
720+
#[test]
721+
fn roundtrip_ilike() {
722+
fn ilike(negated: bool, escape_char: Option<char>) {
723+
let test_expr = Expr::ILike {
724+
negated,
725+
expr: Box::new(col("col")),
726+
pattern: Box::new(lit("[0-9]+")),
727+
escape_char,
728+
};
729+
let ctx = SessionContext::new();
730+
roundtrip_expr_test!(test_expr, ctx);
731+
}
732+
ilike(true, Some('X'));
733+
ilike(false, Some('\\'));
734+
ilike(true, None);
735+
ilike(false, None);
736+
}
737+
738+
#[test]
739+
fn roundtrip_similar_to() {
740+
fn similar_to(negated: bool, escape_char: Option<char>) {
741+
let test_expr = Expr::SimilarTo {
742+
negated,
743+
expr: Box::new(col("col")),
744+
pattern: Box::new(lit("[0-9]+")),
745+
escape_char,
746+
};
747+
let ctx = SessionContext::new();
748+
roundtrip_expr_test!(test_expr, ctx);
749+
}
750+
similar_to(true, Some('X'));
751+
similar_to(false, Some('\\'));
752+
similar_to(true, None);
753+
similar_to(false, None);
754+
}
755+
702756
#[test]
703757
fn roundtrip_approx_percentile_cont() {
704758
let test_expr = Expr::AggregateFunction {

0 commit comments

Comments
 (0)