Skip to content

Commit 1ce4b51

Browse files
authored
Various refactors to string functions (#19402)
## Which issue does this PR close? N/A ## Rationale for this change <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> Clean up some signatures & unnecessary code in string functions ## What changes are included in this PR? <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> Various refactors, see comments. ## Are these changes tested? <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> Existing tests. ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> No. <!-- If there are any breaking changes to public APIs, please add the `api change` label. -->
1 parent f1e5c94 commit 1ce4b51

File tree

8 files changed

+56
-73
lines changed

8 files changed

+56
-73
lines changed

datafusion/functions/src/string/ends_with.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ use arrow::compute::kernels::comparison::ends_with as arrow_ends_with;
2323
use arrow::datatypes::DataType;
2424

2525
use datafusion_common::types::logical_string;
26+
use datafusion_common::utils::take_function_args;
2627
use datafusion_common::{Result, ScalarValue, exec_err};
2728
use datafusion_expr::binary::{binary_to_string_coercion, string_coercion};
2829
use datafusion_expr::{
@@ -95,12 +96,7 @@ impl ScalarUDFImpl for EndsWithFunc {
9596
}
9697

9798
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
98-
let [str_arg, suffix_arg] = args.args.as_slice() else {
99-
return exec_err!(
100-
"ends_with was called with {} arguments, expected 2",
101-
args.args.len()
102-
);
103-
};
99+
let [str_arg, suffix_arg] = take_function_args(self.name(), &args.args)?;
104100

105101
// Determine the common type for coercion
106102
let coercion_type = string_coercion(

datafusion/functions/src/string/ltrim.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use std::any::Any;
2121
use std::sync::Arc;
2222

2323
use crate::string::common::*;
24-
use crate::utils::{make_scalar_function, utf8_to_str_type};
24+
use crate::utils::make_scalar_function;
2525
use datafusion_common::types::logical_string;
2626
use datafusion_common::{Result, exec_err};
2727
use datafusion_expr::function::Hint;
@@ -115,11 +115,7 @@ impl ScalarUDFImpl for LtrimFunc {
115115
}
116116

117117
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
118-
if arg_types[0] == DataType::Utf8View {
119-
Ok(DataType::Utf8View)
120-
} else {
121-
utf8_to_str_type(&arg_types[0], "ltrim")
122-
}
118+
Ok(arg_types[0].clone())
123119
}
124120

125121
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {

datafusion/functions/src/string/rtrim.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use std::any::Any;
2121
use std::sync::Arc;
2222

2323
use crate::string::common::*;
24-
use crate::utils::{make_scalar_function, utf8_to_str_type};
24+
use crate::utils::make_scalar_function;
2525
use datafusion_common::types::logical_string;
2626
use datafusion_common::{Result, exec_err};
2727
use datafusion_expr::function::Hint;
@@ -115,11 +115,7 @@ impl ScalarUDFImpl for RtrimFunc {
115115
}
116116

117117
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
118-
if arg_types[0] == DataType::Utf8View {
119-
Ok(DataType::Utf8View)
120-
} else {
121-
utf8_to_str_type(&arg_types[0], "rtrim")
122-
}
118+
Ok(arg_types[0].clone())
123119
}
124120

125121
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {

datafusion/functions/src/string/split_part.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,11 @@ use arrow::array::{AsArray, GenericStringBuilder};
2424
use arrow::datatypes::DataType;
2525
use datafusion_common::ScalarValue;
2626
use datafusion_common::cast::as_int64_array;
27+
use datafusion_common::types::{NativeType, logical_int64, logical_string};
2728
use datafusion_common::{DataFusionError, Result, exec_err};
28-
use datafusion_expr::{ColumnarValue, Documentation, TypeSignature, Volatility};
29+
use datafusion_expr::{
30+
Coercion, ColumnarValue, Documentation, TypeSignatureClass, Volatility,
31+
};
2932
use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature};
3033
use datafusion_macros::user_doc;
3134
use std::any::Any;
@@ -60,19 +63,16 @@ impl Default for SplitPartFunc {
6063

6164
impl SplitPartFunc {
6265
pub fn new() -> Self {
63-
use DataType::*;
6466
Self {
65-
signature: Signature::one_of(
67+
signature: Signature::coercible(
6668
vec![
67-
TypeSignature::Exact(vec![Utf8View, Utf8View, Int64]),
68-
TypeSignature::Exact(vec![Utf8View, Utf8, Int64]),
69-
TypeSignature::Exact(vec![Utf8View, LargeUtf8, Int64]),
70-
TypeSignature::Exact(vec![Utf8, Utf8View, Int64]),
71-
TypeSignature::Exact(vec![Utf8, Utf8, Int64]),
72-
TypeSignature::Exact(vec![LargeUtf8, Utf8View, Int64]),
73-
TypeSignature::Exact(vec![LargeUtf8, Utf8, Int64]),
74-
TypeSignature::Exact(vec![Utf8, LargeUtf8, Int64]),
75-
TypeSignature::Exact(vec![LargeUtf8, LargeUtf8, Int64]),
69+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
70+
Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
71+
Coercion::new_implicit(
72+
TypeSignatureClass::Native(logical_int64()),
73+
vec![TypeSignatureClass::Integer],
74+
NativeType::Int64,
75+
),
7676
],
7777
Volatility::Immutable,
7878
),

datafusion/functions/src/string/starts_with.rs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use std::sync::Arc;
2121
use arrow::array::{ArrayRef, Scalar};
2222
use arrow::compute::kernels::comparison::starts_with as arrow_starts_with;
2323
use arrow::datatypes::DataType;
24+
use datafusion_common::utils::take_function_args;
2425
use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
2526
use datafusion_expr::type_coercion::binary::{
2627
binary_to_string_coercion, string_coercion,
@@ -92,12 +93,7 @@ impl ScalarUDFImpl for StartsWithFunc {
9293
}
9394

9495
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
95-
let [str_arg, prefix_arg] = args.args.as_slice() else {
96-
return exec_err!(
97-
"starts_with was called with {} arguments, expected 2",
98-
args.args.len()
99-
);
100-
};
96+
let [str_arg, prefix_arg] = take_function_args(self.name(), &args.args)?;
10197

10298
// Determine the common type for coercion
10399
let coercion_type = string_coercion(

datafusion/functions/src/string/to_hex.rs

Lines changed: 34 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,16 @@ use std::sync::Arc;
2121
use crate::utils::make_scalar_function;
2222
use arrow::array::{Array, ArrayRef, StringArray};
2323
use arrow::buffer::{Buffer, OffsetBuffer};
24-
use arrow::datatypes::DataType::{
25-
Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64, Utf8,
26-
};
2724
use arrow::datatypes::{
2825
ArrowNativeType, ArrowPrimitiveType, DataType, Int8Type, Int16Type, Int32Type,
2926
Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
3027
};
31-
use datafusion_common::Result;
3228
use datafusion_common::cast::as_primitive_array;
33-
use datafusion_common::{exec_err, plan_err};
34-
35-
use datafusion_expr::{ColumnarValue, Documentation};
36-
use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility};
37-
use datafusion_expr_common::signature::TypeSignature::Exact;
29+
use datafusion_common::{Result, ScalarValue, exec_err};
30+
use datafusion_expr::{
31+
Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
32+
TypeSignatureClass, Volatility,
33+
};
3834
use datafusion_macros::user_doc;
3935

4036
/// Hex lookup table for fast conversion
@@ -201,17 +197,8 @@ impl Default for ToHexFunc {
201197
impl ToHexFunc {
202198
pub fn new() -> Self {
203199
Self {
204-
signature: Signature::one_of(
205-
vec![
206-
Exact(vec![Int8]),
207-
Exact(vec![Int16]),
208-
Exact(vec![Int32]),
209-
Exact(vec![Int64]),
210-
Exact(vec![UInt8]),
211-
Exact(vec![UInt16]),
212-
Exact(vec![UInt32]),
213-
Exact(vec![UInt64]),
214-
],
200+
signature: Signature::coercible(
201+
vec![Coercion::new_exact(TypeSignatureClass::Integer)],
215202
Volatility::Immutable,
216203
),
217204
}
@@ -231,25 +218,37 @@ impl ScalarUDFImpl for ToHexFunc {
231218
&self.signature
232219
}
233220

234-
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
235-
Ok(match arg_types[0] {
236-
Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64 => Utf8,
237-
_ => {
238-
return plan_err!("The to_hex function can only accept integers.");
239-
}
240-
})
221+
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
222+
Ok(DataType::Utf8)
241223
}
242224

243225
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
244226
match args.args[0].data_type() {
245-
Int64 => make_scalar_function(to_hex::<Int64Type>, vec![])(&args.args),
246-
UInt64 => make_scalar_function(to_hex::<UInt64Type>, vec![])(&args.args),
247-
Int32 => make_scalar_function(to_hex::<Int32Type>, vec![])(&args.args),
248-
UInt32 => make_scalar_function(to_hex::<UInt32Type>, vec![])(&args.args),
249-
Int16 => make_scalar_function(to_hex::<Int16Type>, vec![])(&args.args),
250-
UInt16 => make_scalar_function(to_hex::<UInt16Type>, vec![])(&args.args),
251-
Int8 => make_scalar_function(to_hex::<Int8Type>, vec![])(&args.args),
252-
UInt8 => make_scalar_function(to_hex::<UInt8Type>, vec![])(&args.args),
227+
DataType::Null => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))),
228+
DataType::Int64 => {
229+
make_scalar_function(to_hex::<Int64Type>, vec![])(&args.args)
230+
}
231+
DataType::UInt64 => {
232+
make_scalar_function(to_hex::<UInt64Type>, vec![])(&args.args)
233+
}
234+
DataType::Int32 => {
235+
make_scalar_function(to_hex::<Int32Type>, vec![])(&args.args)
236+
}
237+
DataType::UInt32 => {
238+
make_scalar_function(to_hex::<UInt32Type>, vec![])(&args.args)
239+
}
240+
DataType::Int16 => {
241+
make_scalar_function(to_hex::<Int16Type>, vec![])(&args.args)
242+
}
243+
DataType::UInt16 => {
244+
make_scalar_function(to_hex::<UInt16Type>, vec![])(&args.args)
245+
}
246+
DataType::Int8 => {
247+
make_scalar_function(to_hex::<Int8Type>, vec![])(&args.args)
248+
}
249+
DataType::UInt8 => {
250+
make_scalar_function(to_hex::<UInt8Type>, vec![])(&args.args)
251+
}
253252
other => exec_err!("Unsupported data type {other:?} for function to_hex"),
254253
}
255254
}

datafusion/functions/src/string/uuid.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ impl Default for UuidFunc {
5656
impl UuidFunc {
5757
pub fn new() -> Self {
5858
Self {
59-
signature: Signature::exact(vec![], Volatility::Volatile),
59+
signature: Signature::nullary(Volatility::Volatile),
6060
}
6161
}
6262
}

datafusion/sqllogictest/test_files/encoding.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ select decode('', null) from test;
7373
query error DataFusion error: This feature is not implemented: Encoding must be a scalar; array specified encoding is not yet supported
7474
select decode('', hex_field) from test;
7575

76-
query error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'to_hex' function
76+
query error DataFusion error: Error during planning: Internal error: Expect TypeSignatureClass::Integer but received NativeType::String, DataType: Utf8View
7777
select to_hex(hex_field) from test;
7878

7979
query error DataFusion error: Execution error: Failed to decode value using base64

0 commit comments

Comments
 (0)