Skip to content

Commit 67b526a

Browse files
authored
Refactor Spark crc32 & sha1 to remove unnecessary scalar argument check (#19466)
If we have a scalar argument that is null, that means the datatype it is from is already nullable, so theres no need to check both; we only need to check the nullability of the datatype
1 parent 902d3b3 commit 67b526a

File tree

2 files changed

+2
-31
lines changed

2 files changed

+2
-31
lines changed

datafusion/spark/src/function/hash/crc32.rs

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,7 @@ impl ScalarUDFImpl for SparkCrc32 {
7979
}
8080

8181
fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
82-
let nullable = args.arg_fields.iter().any(|f| f.is_nullable())
83-
|| args
84-
.scalar_arguments
85-
.iter()
86-
.any(|scalar| scalar.is_some_and(|s| s.is_null()));
82+
let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
8783
Ok(Arc::new(Field::new(self.name(), DataType::Int64, nullable)))
8884
}
8985

@@ -135,7 +131,6 @@ fn spark_crc32(args: &[ArrayRef]) -> Result<ArrayRef> {
135131
#[cfg(test)]
136132
mod tests {
137133
use super::*;
138-
use datafusion_common::ScalarValue;
139134

140135
#[test]
141136
fn test_crc32_nullability() -> Result<()> {
@@ -159,15 +154,6 @@ mod tests {
159154
assert!(result.is_nullable());
160155
assert_eq!(result.data_type(), &DataType::Int64);
161156

162-
// null scalar value - user input literal NULL
163-
let scalar_null = ScalarValue::Binary(None);
164-
let result = crc32_func.return_field_from_args(ReturnFieldArgs {
165-
arg_fields: &[field_not_null],
166-
scalar_arguments: &[Some(&scalar_null)],
167-
})?;
168-
assert!(result.is_nullable());
169-
assert_eq!(result.data_type(), &DataType::Int64);
170-
171157
Ok(())
172158
}
173159
}

datafusion/spark/src/function/hash/sha1.rs

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,7 @@ impl ScalarUDFImpl for SparkSha1 {
8686
}
8787

8888
fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
89-
let nullable = args.arg_fields.iter().any(|f| f.is_nullable())
90-
|| args
91-
.scalar_arguments
92-
.iter()
93-
.any(|scalar| scalar.is_some_and(|s| s.is_null()));
94-
89+
let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
9590
Ok(Arc::new(Field::new(self.name(), DataType::Utf8, nullable)))
9691
}
9792

@@ -146,7 +141,6 @@ fn spark_sha1(args: &[ArrayRef]) -> Result<ArrayRef> {
146141
#[cfg(test)]
147142
mod tests {
148143
use super::*;
149-
use datafusion_common::ScalarValue;
150144

151145
#[test]
152146
fn test_sha1_nullability() -> Result<()> {
@@ -170,15 +164,6 @@ mod tests {
170164
assert!(out.is_nullable());
171165
assert_eq!(out.data_type(), &DataType::Utf8);
172166

173-
// Null scalar argument also makes output nullable
174-
let null_scalar = ScalarValue::Binary(None);
175-
let out = func.return_field_from_args(ReturnFieldArgs {
176-
arg_fields: &[Arc::clone(&non_nullable)],
177-
scalar_arguments: &[Some(&null_scalar)],
178-
})?;
179-
assert!(out.is_nullable());
180-
assert_eq!(out.data_type(), &DataType::Utf8);
181-
182167
Ok(())
183168
}
184169
}

0 commit comments

Comments
 (0)