Skip to content

Commit 8458946

Browse files
authored
added custom nullability for char (#19268)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Closes #19170 ## What changes are included in this PR? - includes custom nullability for `char`.
1 parent 49cfee0 commit 8458946

File tree

1 file changed

+56
-3
lines changed
  • datafusion/spark/src/function/string

1 file changed

+56
-3
lines changed

datafusion/spark/src/function/string/char.rs

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@
1717

1818
use arrow::array::ArrayRef;
1919
use arrow::array::GenericStringBuilder;
20-
use arrow::datatypes::DataType;
2120
use arrow::datatypes::DataType::Int64;
2221
use arrow::datatypes::DataType::Utf8;
22+
use arrow::datatypes::{DataType, Field, FieldRef};
2323
use std::{any::Any, sync::Arc};
2424

2525
use datafusion_common::{cast::as_int64_array, exec_err, Result, ScalarValue};
2626
use datafusion_expr::{
27-
ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
27+
ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature,
28+
Volatility,
2829
};
2930

3031
/// Spark-compatible `char` expression
@@ -62,12 +63,19 @@ impl ScalarUDFImpl for CharFunc {
6263
}
6364

6465
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
65-
Ok(Utf8)
66+
datafusion_common::internal_err!(
67+
"return_type should not be called, use return_field_from_args instead"
68+
)
6669
}
6770

6871
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
6972
spark_chr(&args.args)
7073
}
74+
75+
fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
76+
let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
77+
Ok(Arc::new(Field::new(self.name(), Utf8, nullable)))
78+
}
7179
}
7280

7381
/// Returns the ASCII character having the binary equivalent to the input expression.
@@ -130,3 +138,48 @@ fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
130138

131139
Ok(Arc::new(builder.finish()) as ArrayRef)
132140
}
141+
142+
#[test]
143+
fn test_char_nullability() -> Result<()> {
144+
use arrow::datatypes::{DataType::Utf8, Field, FieldRef};
145+
use datafusion_expr::ReturnFieldArgs;
146+
use std::sync::Arc;
147+
148+
let func = CharFunc::new();
149+
150+
let nullable_field: FieldRef = Arc::new(Field::new("col", Int64, true));
151+
152+
let out_nullable = func.return_field_from_args(ReturnFieldArgs {
153+
arg_fields: &[nullable_field],
154+
scalar_arguments: &[None],
155+
})?;
156+
157+
assert!(
158+
out_nullable.is_nullable(),
159+
"char(col) should be nullable when input column is nullable"
160+
);
161+
assert_eq!(
162+
out_nullable.data_type(),
163+
&Utf8,
164+
"char always returns Utf8 regardless of input type"
165+
);
166+
167+
let non_nullable_field: FieldRef = Arc::new(Field::new("col", Int64, false));
168+
169+
let out_non_nullable = func.return_field_from_args(ReturnFieldArgs {
170+
arg_fields: &[non_nullable_field],
171+
scalar_arguments: &[None],
172+
})?;
173+
174+
assert!(
175+
!out_non_nullable.is_nullable(),
176+
"char(col) should NOT be nullable when input column is NOT nullable"
177+
);
178+
assert_eq!(
179+
out_non_nullable.data_type(),
180+
&Utf8,
181+
"char always returns Utf8 regardless of input type"
182+
);
183+
184+
Ok(())
185+
}

0 commit comments

Comments
 (0)