Skip to content

Commit df153c2

Browse files
Optimize char expression (#16076)
1 parent 20bb7e6 commit df153c2

File tree

2 files changed

+31
-29
lines changed

2 files changed

+31
-29
lines changed

datafusion/spark/src/function/string/char.rs

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,13 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use arrow::array::ArrayRef;
19+
use arrow::array::GenericStringBuilder;
20+
use arrow::datatypes::DataType;
21+
use arrow::datatypes::DataType::Int64;
22+
use arrow::datatypes::DataType::Utf8;
1823
use std::{any::Any, sync::Arc};
1924

20-
use arrow::{
21-
array::{ArrayRef, StringArray},
22-
datatypes::{
23-
DataType,
24-
DataType::{Int64, Utf8},
25-
},
26-
};
27-
2825
use datafusion_common::{cast::as_int64_array, exec_err, Result, ScalarValue};
2926
use datafusion_expr::{
3027
ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
@@ -33,25 +30,25 @@ use datafusion_expr::{
3330
/// Spark-compatible `char` expression
3431
/// <https://spark.apache.org/docs/latest/api/sql/index.html#char>
3532
#[derive(Debug)]
36-
pub struct SparkChar {
33+
pub struct CharFunc {
3734
signature: Signature,
3835
}
3936

40-
impl Default for SparkChar {
37+
impl Default for CharFunc {
4138
fn default() -> Self {
4239
Self::new()
4340
}
4441
}
4542

46-
impl SparkChar {
43+
impl CharFunc {
4744
pub fn new() -> Self {
4845
Self {
4946
signature: Signature::uniform(1, vec![Int64], Volatility::Immutable),
5047
}
5148
}
5249
}
5350

54-
impl ScalarUDFImpl for SparkChar {
51+
impl ScalarUDFImpl for CharFunc {
5552
fn as_any(&self) -> &dyn Any {
5653
self
5754
}
@@ -106,25 +103,30 @@ fn spark_chr(args: &[ColumnarValue]) -> Result<ColumnarValue> {
106103
fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
107104
let integer_array = as_int64_array(&args[0])?;
108105

109-
// first map is the iterator, second is for the `Option<_>`
110-
let result = integer_array
111-
.iter()
112-
.map(|integer: Option<i64>| {
113-
integer
114-
.map(|integer| {
115-
if integer < 0 {
116-
return Ok("".to_string()); // Return empty string for negative integers
117-
}
106+
let mut builder = GenericStringBuilder::<i32>::with_capacity(
107+
integer_array.len(),
108+
integer_array.len(),
109+
);
110+
111+
for integer_opt in integer_array {
112+
match integer_opt {
113+
Some(integer) => {
114+
if integer < 0 {
115+
builder.append_value(""); // empty string for negative numbers.
116+
} else {
118117
match core::char::from_u32((integer % 256) as u32) {
119-
Some(ch) => Ok(ch.to_string()),
118+
Some(ch) => builder.append_value(ch.to_string()),
120119
None => {
121-
exec_err!("requested character not compatible for encoding.")
120+
return exec_err!(
121+
"requested character not compatible for encoding."
122+
)
122123
}
123124
}
124-
})
125-
.transpose()
126-
})
127-
.collect::<Result<StringArray>>()?;
125+
}
126+
}
127+
None => builder.append_null(),
128+
}
129+
}
128130

129-
Ok(Arc::new(result) as ArrayRef)
131+
Ok(Arc::new(builder.finish()) as ArrayRef)
130132
}

datafusion/spark/src/function/string/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use datafusion_functions::make_udf_function;
2626
use std::sync::Arc;
2727

2828
make_udf_function!(ascii::SparkAscii, ascii);
29-
make_udf_function!(char::SparkChar, char);
29+
make_udf_function!(char::CharFunc, char);
3030
make_udf_function!(ilike::SparkILike, ilike);
3131
make_udf_function!(like::SparkLike, like);
3232
make_udf_function!(luhn_check::SparkLuhnCheck, luhn_check);

0 commit comments

Comments
 (0)