Skip to content

Commit 3879b24

Browse files
fix: spark sha1 nullability reporting (#19242)
## Which issue does this PR close? - Closes #19158 ## Rationale for this change The Spark `sha1` UDF was always marked nullable because it relied on the default nullability; Spark semantics require the output nullability to follow the input (or null scalar) instead. ## What changes are included in this PR? - Implement `return_field_from_args` for `SparkSha1` to report `Utf8` with nullability derived from argument fields or null scalar args, and guard `return_type`. - Add a nullability unit test covering non-nullable input, nullable input, and null scalar cases. ## Are these changes tested? Yes, ## Are there any user-facing changes? No behavior change beyond correct nullability reporting for `sha1` results.
1 parent fe11ad6 commit 3879b24

File tree

1 file changed

+54
-4
lines changed
  • datafusion/spark/src/function/hash

1 file changed

+54
-4
lines changed

datafusion/spark/src/function/hash/sha1.rs

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use std::fmt::Write;
2020
use std::sync::Arc;
2121

2222
use arrow::array::{ArrayRef, StringArray};
23-
use arrow::datatypes::DataType;
23+
use arrow::datatypes::{DataType, Field, FieldRef};
2424
use datafusion_common::cast::{
2525
as_binary_array, as_binary_view_array, as_fixed_size_binary_array,
2626
as_large_binary_array,
@@ -29,8 +29,8 @@ use datafusion_common::types::{NativeType, logical_string};
2929
use datafusion_common::utils::take_function_args;
3030
use datafusion_common::{Result, internal_err};
3131
use datafusion_expr::{
32-
Coercion, ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature,
33-
TypeSignatureClass, Volatility,
32+
Coercion, ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl,
33+
Signature, TypeSignatureClass, Volatility,
3434
};
3535
use datafusion_functions::utils::make_scalar_function;
3636
use sha1::{Digest, Sha1};
@@ -82,7 +82,17 @@ impl ScalarUDFImpl for SparkSha1 {
8282
}
8383

8484
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
85-
Ok(DataType::Utf8)
85+
internal_err!("return_field_from_args should be used instead")
86+
}
87+
88+
fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
89+
let nullable = args.arg_fields.iter().any(|f| f.is_nullable())
90+
|| args
91+
.scalar_arguments
92+
.iter()
93+
.any(|scalar| scalar.is_some_and(|s| s.is_null()));
94+
95+
Ok(Arc::new(Field::new(self.name(), DataType::Utf8, nullable)))
8696
}
8797

8898
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
@@ -132,3 +142,43 @@ fn spark_sha1(args: &[ArrayRef]) -> Result<ArrayRef> {
132142
}
133143
}
134144
}
145+
146+
#[cfg(test)]
147+
mod tests {
148+
use super::*;
149+
use datafusion_common::ScalarValue;
150+
151+
#[test]
152+
fn test_sha1_nullability() -> Result<()> {
153+
let func = SparkSha1::new();
154+
155+
// Non-nullable input keeps output non-nullable
156+
let non_nullable: FieldRef = Arc::new(Field::new("col", DataType::Binary, false));
157+
let out = func.return_field_from_args(ReturnFieldArgs {
158+
arg_fields: &[Arc::clone(&non_nullable)],
159+
scalar_arguments: &[None],
160+
})?;
161+
assert!(!out.is_nullable());
162+
assert_eq!(out.data_type(), &DataType::Utf8);
163+
164+
// Nullable input makes output nullable
165+
let nullable: FieldRef = Arc::new(Field::new("col", DataType::Binary, true));
166+
let out = func.return_field_from_args(ReturnFieldArgs {
167+
arg_fields: &[Arc::clone(&nullable)],
168+
scalar_arguments: &[None],
169+
})?;
170+
assert!(out.is_nullable());
171+
assert_eq!(out.data_type(), &DataType::Utf8);
172+
173+
// Null scalar argument also makes output nullable
174+
let null_scalar = ScalarValue::Binary(None);
175+
let out = func.return_field_from_args(ReturnFieldArgs {
176+
arg_fields: &[Arc::clone(&non_nullable)],
177+
scalar_arguments: &[Some(&null_scalar)],
178+
})?;
179+
assert!(out.is_nullable());
180+
assert_eq!(out.data_type(), &DataType::Utf8);
181+
182+
Ok(())
183+
}
184+
}

0 commit comments

Comments
 (0)