Skip to content

Commit 3aa0ab7

Browse files
Fix: SparkAscii nullability to depend on input nullability (#19531)
Closes #19169 ## Rationale for this change: The current implementation of `SparkAscii` UDF uses the default `is_nullable` which always returns true. This is incorrect because the output should only be nullable if the input argument is nullable. This change implements proper null propagation behavior by using `return_field_from_args` . ## Changes in PR: - Implemented return_field_from_args for SparkAscii to properly compute output nullability based on input argument nullability - Changed `return_type` to `return internal_err!` since `return_field_from_args` is now used (following the pattern used by other Spark functions like ilike, concat, elt) - Added unit tests verifying the nullability behavior: - Output is nullable when input is nullable - Output is non-nullable when input is non-nullable ## Test Coverage: Yes, tests are included to verify the change. ## User-facing Changes: No user-facing changes. Co-authored-by: Jefffrey <[email protected]>
1 parent 36df145 commit 3aa0ab7

File tree

1 file changed

+58
-6
lines changed
  • datafusion/spark/src/function/string

1 file changed

+58
-6
lines changed

datafusion/spark/src/function/string/ascii.rs

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,14 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::datatypes::DataType;
19-
use datafusion_common::Result;
18+
use std::sync::Arc;
19+
20+
use arrow::datatypes::{DataType, Field, FieldRef};
2021
use datafusion_common::types::{NativeType, logical_string};
21-
use datafusion_expr::ColumnarValue;
22+
use datafusion_common::{Result, internal_err};
2223
use datafusion_expr::{
23-
Coercion, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignatureClass,
24-
Volatility,
24+
Coercion, ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl,
25+
Signature, TypeSignatureClass, Volatility,
2526
};
2627
use datafusion_functions::string::ascii::ascii;
2728
use datafusion_functions::utils::make_scalar_function;
@@ -75,10 +76,61 @@ impl ScalarUDFImpl for SparkAscii {
7576
}
7677

7778
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
78-
Ok(DataType::Int32)
79+
internal_err!("return_field_from_args should be used instead")
80+
}
81+
82+
fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
83+
// ascii returns an Int32 value
84+
// The result is nullable only if any of the input arguments is nullable
85+
let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
86+
Ok(Arc::new(Field::new("ascii", DataType::Int32, nullable)))
7987
}
8088

8189
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
8290
make_scalar_function(ascii, vec![])(&args.args)
8391
}
8492
}
93+
94+
#[cfg(test)]
95+
mod tests {
96+
use super::*;
97+
use datafusion_expr::ReturnFieldArgs;
98+
99+
#[test]
100+
fn test_return_field_nullable_input() {
101+
let ascii_func = SparkAscii::new();
102+
let nullable_field = Arc::new(Field::new("input", DataType::Utf8, true));
103+
104+
let result = ascii_func
105+
.return_field_from_args(ReturnFieldArgs {
106+
arg_fields: &[nullable_field],
107+
scalar_arguments: &[],
108+
})
109+
.unwrap();
110+
111+
assert_eq!(result.data_type(), &DataType::Int32);
112+
assert!(
113+
result.is_nullable(),
114+
"Output should be nullable when input is nullable"
115+
);
116+
}
117+
118+
#[test]
119+
fn test_return_field_non_nullable_input() {
120+
let ascii_func = SparkAscii::new();
121+
let non_nullable_field = Arc::new(Field::new("input", DataType::Utf8, false));
122+
123+
let result = ascii_func
124+
.return_field_from_args(ReturnFieldArgs {
125+
arg_fields: &[non_nullable_field],
126+
scalar_arguments: &[],
127+
})
128+
.unwrap();
129+
130+
assert_eq!(result.data_type(), &DataType::Int32);
131+
assert!(
132+
!result.is_nullable(),
133+
"Output should not be nullable when input is not nullable"
134+
);
135+
}
136+
}

0 commit comments

Comments
 (0)