Skip to content

Commit 1a575a0

Browse files
authored
feat(spark): implement Spark string function like/ilike (#16962)
* feat: spark like/ilike * update
1 parent 9d6f923 commit 1a575a0

File tree

5 files changed

+474
-5
lines changed

5 files changed

+474
-5
lines changed
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow::array::ArrayRef;
19+
use arrow::compute::ilike;
20+
use arrow::datatypes::DataType;
21+
use datafusion_common::{exec_err, Result};
22+
use datafusion_expr::ColumnarValue;
23+
use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
24+
use datafusion_functions::utils::make_scalar_function;
25+
use std::any::Any;
26+
use std::sync::Arc;
27+
28+
/// ILIKE function for case-insensitive pattern matching
29+
/// <https://spark.apache.org/docs/latest/api/sql/index.html#ilike>
30+
#[derive(Debug)]
31+
pub struct SparkILike {
32+
signature: Signature,
33+
}
34+
35+
impl Default for SparkILike {
36+
fn default() -> Self {
37+
Self::new()
38+
}
39+
}
40+
41+
impl SparkILike {
42+
pub fn new() -> Self {
43+
Self {
44+
signature: Signature::string(2, Volatility::Immutable),
45+
}
46+
}
47+
}
48+
49+
impl ScalarUDFImpl for SparkILike {
50+
fn as_any(&self) -> &dyn Any {
51+
self
52+
}
53+
54+
fn name(&self) -> &str {
55+
"ilike"
56+
}
57+
58+
fn signature(&self) -> &Signature {
59+
&self.signature
60+
}
61+
62+
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
63+
Ok(DataType::Boolean)
64+
}
65+
66+
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
67+
make_scalar_function(spark_ilike, vec![])(&args.args)
68+
}
69+
}
70+
71+
/// Returns true if str matches pattern (case insensitive).
72+
pub fn spark_ilike(args: &[ArrayRef]) -> Result<ArrayRef> {
73+
if args.len() != 2 {
74+
return exec_err!("ilike function requires exactly 2 arguments");
75+
}
76+
77+
let result = ilike(&args[0], &args[1])?;
78+
Ok(Arc::new(result))
79+
}
80+
81+
#[cfg(test)]
82+
mod tests {
83+
use super::*;
84+
use crate::function::utils::test::test_scalar_function;
85+
use arrow::array::{Array, BooleanArray};
86+
use arrow::datatypes::DataType::Boolean;
87+
use datafusion_common::{Result, ScalarValue};
88+
use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
89+
90+
macro_rules! test_ilike_string_invoke {
91+
($INPUT1:expr, $INPUT2:expr, $EXPECTED:expr) => {
92+
test_scalar_function!(
93+
SparkILike::new(),
94+
vec![
95+
ColumnarValue::Scalar(ScalarValue::Utf8($INPUT1)),
96+
ColumnarValue::Scalar(ScalarValue::Utf8($INPUT2))
97+
],
98+
$EXPECTED,
99+
bool,
100+
Boolean,
101+
BooleanArray
102+
);
103+
104+
test_scalar_function!(
105+
SparkILike::new(),
106+
vec![
107+
ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT1)),
108+
ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT2))
109+
],
110+
$EXPECTED,
111+
bool,
112+
Boolean,
113+
BooleanArray
114+
);
115+
116+
test_scalar_function!(
117+
SparkILike::new(),
118+
vec![
119+
ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT1)),
120+
ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT2))
121+
],
122+
$EXPECTED,
123+
bool,
124+
Boolean,
125+
BooleanArray
126+
);
127+
};
128+
}
129+
130+
#[test]
131+
fn test_ilike_invoke() -> Result<()> {
132+
test_ilike_string_invoke!(
133+
Some(String::from("Spark")),
134+
Some(String::from("_park")),
135+
Ok(Some(true))
136+
);
137+
test_ilike_string_invoke!(
138+
Some(String::from("Spark")),
139+
Some(String::from("_PARK")),
140+
Ok(Some(true))
141+
);
142+
test_ilike_string_invoke!(
143+
Some(String::from("SPARK")),
144+
Some(String::from("_park")),
145+
Ok(Some(true))
146+
);
147+
test_ilike_string_invoke!(
148+
Some(String::from("Spark")),
149+
Some(String::from("sp%")),
150+
Ok(Some(true))
151+
);
152+
test_ilike_string_invoke!(
153+
Some(String::from("Spark")),
154+
Some(String::from("SP%")),
155+
Ok(Some(true))
156+
);
157+
test_ilike_string_invoke!(
158+
Some(String::from("Spark")),
159+
Some(String::from("%ARK")),
160+
Ok(Some(true))
161+
);
162+
test_ilike_string_invoke!(
163+
Some(String::from("Spark")),
164+
Some(String::from("xyz")),
165+
Ok(Some(false))
166+
);
167+
test_ilike_string_invoke!(None, Some(String::from("_park")), Ok(None));
168+
test_ilike_string_invoke!(Some(String::from("Spark")), None, Ok(None));
169+
test_ilike_string_invoke!(None, None, Ok(None));
170+
171+
Ok(())
172+
}
173+
}
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow::array::ArrayRef;
19+
use arrow::compute::like;
20+
use arrow::datatypes::DataType;
21+
use datafusion_common::{exec_err, Result};
22+
use datafusion_expr::ColumnarValue;
23+
use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
24+
use datafusion_functions::utils::make_scalar_function;
25+
use std::any::Any;
26+
use std::sync::Arc;
27+
28+
/// LIKE function for case-sensitive pattern matching
29+
/// <https://spark.apache.org/docs/latest/api/sql/index.html#like>
30+
#[derive(Debug)]
31+
pub struct SparkLike {
32+
signature: Signature,
33+
}
34+
35+
impl Default for SparkLike {
36+
fn default() -> Self {
37+
Self::new()
38+
}
39+
}
40+
41+
impl SparkLike {
42+
pub fn new() -> Self {
43+
Self {
44+
signature: Signature::string(2, Volatility::Immutable),
45+
}
46+
}
47+
}
48+
49+
impl ScalarUDFImpl for SparkLike {
50+
fn as_any(&self) -> &dyn Any {
51+
self
52+
}
53+
54+
fn name(&self) -> &str {
55+
"like"
56+
}
57+
58+
fn signature(&self) -> &Signature {
59+
&self.signature
60+
}
61+
62+
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
63+
Ok(DataType::Boolean)
64+
}
65+
66+
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
67+
make_scalar_function(spark_like, vec![])(&args.args)
68+
}
69+
}
70+
71+
/// Returns true if str matches pattern (case sensitive).
72+
pub fn spark_like(args: &[ArrayRef]) -> Result<ArrayRef> {
73+
if args.len() != 2 {
74+
return exec_err!("like function requires exactly 2 arguments");
75+
}
76+
77+
let result = like(&args[0], &args[1])?;
78+
Ok(Arc::new(result))
79+
}
80+
81+
#[cfg(test)]
82+
mod tests {
83+
use super::*;
84+
use crate::function::utils::test::test_scalar_function;
85+
use arrow::array::{Array, BooleanArray};
86+
use arrow::datatypes::DataType::Boolean;
87+
use datafusion_common::{Result, ScalarValue};
88+
use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
89+
90+
macro_rules! test_like_string_invoke {
91+
($INPUT1:expr, $INPUT2:expr, $EXPECTED:expr) => {
92+
test_scalar_function!(
93+
SparkLike::new(),
94+
vec![
95+
ColumnarValue::Scalar(ScalarValue::Utf8($INPUT1)),
96+
ColumnarValue::Scalar(ScalarValue::Utf8($INPUT2))
97+
],
98+
$EXPECTED,
99+
bool,
100+
Boolean,
101+
BooleanArray
102+
);
103+
104+
test_scalar_function!(
105+
SparkLike::new(),
106+
vec![
107+
ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT1)),
108+
ColumnarValue::Scalar(ScalarValue::LargeUtf8($INPUT2))
109+
],
110+
$EXPECTED,
111+
bool,
112+
Boolean,
113+
BooleanArray
114+
);
115+
116+
test_scalar_function!(
117+
SparkLike::new(),
118+
vec![
119+
ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT1)),
120+
ColumnarValue::Scalar(ScalarValue::Utf8View($INPUT2))
121+
],
122+
$EXPECTED,
123+
bool,
124+
Boolean,
125+
BooleanArray
126+
);
127+
};
128+
}
129+
130+
#[test]
131+
fn test_like_invoke() -> Result<()> {
132+
test_like_string_invoke!(
133+
Some(String::from("Spark")),
134+
Some(String::from("_park")),
135+
Ok(Some(true))
136+
);
137+
test_like_string_invoke!(
138+
Some(String::from("Spark")),
139+
Some(String::from("_PARK")),
140+
Ok(Some(false)) // case-sensitive
141+
);
142+
test_like_string_invoke!(
143+
Some(String::from("SPARK")),
144+
Some(String::from("_park")),
145+
Ok(Some(false)) // case-sensitive
146+
);
147+
test_like_string_invoke!(
148+
Some(String::from("Spark")),
149+
Some(String::from("Sp%")),
150+
Ok(Some(true))
151+
);
152+
test_like_string_invoke!(
153+
Some(String::from("Spark")),
154+
Some(String::from("SP%")),
155+
Ok(Some(false)) // case-sensitive
156+
);
157+
test_like_string_invoke!(
158+
Some(String::from("Spark")),
159+
Some(String::from("%ark")),
160+
Ok(Some(true))
161+
);
162+
test_like_string_invoke!(
163+
Some(String::from("Spark")),
164+
Some(String::from("%ARK")),
165+
Ok(Some(false)) // case-sensitive
166+
);
167+
test_like_string_invoke!(
168+
Some(String::from("Spark")),
169+
Some(String::from("xyz")),
170+
Ok(Some(false))
171+
);
172+
test_like_string_invoke!(None, Some(String::from("_park")), Ok(None));
173+
test_like_string_invoke!(Some(String::from("Spark")), None, Ok(None));
174+
test_like_string_invoke!(None, None, Ok(None));
175+
176+
Ok(())
177+
}
178+
}

datafusion/spark/src/function/string/mod.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
pub mod ascii;
1919
pub mod char;
20+
pub mod ilike;
21+
pub mod like;
2022
pub mod luhn_check;
2123

2224
use datafusion_expr::ScalarUDF;
@@ -25,6 +27,8 @@ use std::sync::Arc;
2527

2628
make_udf_function!(ascii::SparkAscii, ascii);
2729
make_udf_function!(char::SparkChar, char);
30+
make_udf_function!(ilike::SparkILike, ilike);
31+
make_udf_function!(like::SparkLike, like);
2832
make_udf_function!(luhn_check::SparkLuhnCheck, luhn_check);
2933

3034
pub mod expr_fn {
@@ -40,6 +44,16 @@ pub mod expr_fn {
4044
"Returns the ASCII character having the binary equivalent to col. If col is larger than 256 the result is equivalent to char(col % 256).",
4145
arg1
4246
));
47+
export_functions!((
48+
ilike,
49+
"Returns true if str matches pattern (case insensitive).",
50+
str pattern
51+
));
52+
export_functions!((
53+
like,
54+
"Returns true if str matches pattern (case sensitive).",
55+
str pattern
56+
));
4357
export_functions!((
4458
luhn_check,
4559
"Returns whether the input string of digits is valid according to the Luhn algorithm.",
@@ -48,5 +62,5 @@ pub mod expr_fn {
4862
}
4963

5064
pub fn functions() -> Vec<Arc<ScalarUDF>> {
51-
vec![ascii(), char(), luhn_check()]
65+
vec![ascii(), char(), ilike(), like(), luhn_check()]
5266
}

0 commit comments

Comments
 (0)