Skip to content

Commit f635278

Browse files
committed
revert
1 parent 4204337 commit f635278

File tree

1 file changed

+20
-72
lines changed

1 file changed

+20
-72
lines changed

datafusion/functions/src/crypto/basic.rs

Lines changed: 20 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,17 @@
1818
//! "crypto" DataFusion functions
1919
2020
use arrow::array::{
21-
Array, ArrayRef, AsArray, BinaryArray, BinaryArrayType,
22-
StringViewBuilder,
21+
Array, ArrayRef, AsArray, BinaryArray, BinaryArrayType, StringViewArray,
2322
};
24-
use arrow::compute::StringArrayType;
2523
use arrow::datatypes::DataType;
2624
use blake2::{Blake2b512, Blake2s256, Digest};
2725
use blake3::Hasher as Blake3;
26+
use datafusion_common::cast::as_binary_array;
27+
28+
use arrow::compute::StringArrayType;
2829
use datafusion_common::{
29-
DataFusionError, Result, ScalarValue, exec_err, plan_err, utils::take_function_args,
30+
DataFusionError, Result, ScalarValue, exec_err, internal_err, plan_err,
31+
utils::take_function_args,
3032
};
3133
use datafusion_expr::ColumnarValue;
3234
use md5::Md5;
@@ -136,77 +138,23 @@ impl fmt::Display for DigestAlgorithm {
136138
/// computes md5 hash digest of the given input
137139
pub fn md5(args: &[ColumnarValue]) -> Result<ColumnarValue> {
138140
let [data] = take_function_args("md5", args)?;
141+
let value = digest_process(data, DigestAlgorithm::Md5)?;
139142

140-
// MD5 returns Utf8View (hex-encoded), so we use optimized fused digest+hex functions
141-
// that avoid creating an intermediate BinaryArray
142-
match data {
143-
ColumnarValue::Array(a) => {
144-
let array = match a.data_type() {
145-
DataType::Utf8View => md5_hex_string_array(&a.as_string_view()),
146-
DataType::Utf8 => md5_hex_string_array(&a.as_string::<i32>()),
147-
DataType::LargeUtf8 => md5_hex_string_array(&a.as_string::<i64>()),
148-
DataType::Binary => md5_hex_binary_array(&a.as_binary::<i32>()),
149-
DataType::LargeBinary => md5_hex_binary_array(&a.as_binary::<i64>()),
150-
DataType::BinaryView => md5_hex_binary_array(&a.as_binary_view()),
151-
other => {
152-
return exec_err!("Unsupported data type {other:?} for function md5");
153-
}
154-
};
155-
Ok(ColumnarValue::Array(array))
156-
}
157-
ColumnarValue::Scalar(scalar) => {
158-
let hex_string = match scalar {
159-
ScalarValue::Utf8View(a)
160-
| ScalarValue::Utf8(a)
161-
| ScalarValue::LargeUtf8(a) => {
162-
a.as_ref().map(|s| hex_encode(Md5::digest(s.as_bytes())))
163-
}
164-
ScalarValue::Binary(a)
165-
| ScalarValue::LargeBinary(a)
166-
| ScalarValue::BinaryView(a) => {
167-
a.as_ref().map(|v| hex_encode(Md5::digest(v.as_slice())))
168-
}
169-
other => {
170-
return exec_err!("Unsupported data type {other:?} for function md5");
171-
}
172-
};
173-
Ok(ColumnarValue::Scalar(ScalarValue::Utf8View(hex_string)))
174-
}
175-
}
176-
}
177-
178-
/// Computes MD5 hash and hex-encodes in a single pass for string arrays.
179-
/// Uses StringViewBuilder for efficient StringViewArray construction.
180-
#[inline]
181-
fn md5_hex_string_array<'a, T: StringArrayType<'a>>(input: &T) -> ArrayRef {
182-
let mut builder = StringViewBuilder::with_capacity(input.len());
183-
for val in input.iter() {
184-
match val {
185-
Some(s) => {
186-
let hash = Md5::digest(s.as_bytes());
187-
builder.append_value(hex_encode(hash));
188-
}
189-
None => builder.append_null(),
143+
// md5 requires special handling because of its unique utf8view return type
144+
Ok(match value {
145+
ColumnarValue::Array(array) => {
146+
let binary_array = as_binary_array(&array)?;
147+
let string_array: StringViewArray = binary_array
148+
.iter()
149+
.map(|opt| opt.map(hex_encode::<_>))
150+
.collect();
151+
ColumnarValue::Array(Arc::new(string_array))
190152
}
191-
}
192-
Arc::new(builder.finish())
193-
}
194-
195-
/// Computes MD5 hash and hex-encodes in a single pass for binary arrays.
196-
/// Uses StringViewBuilder for efficient StringViewArray construction.
197-
#[inline]
198-
fn md5_hex_binary_array<'a, T: BinaryArrayType<'a>>(input: &T) -> ArrayRef {
199-
let mut builder = StringViewBuilder::with_capacity(input.len());
200-
for val in input.iter() {
201-
match val {
202-
Some(bytes) => {
203-
let hash = Md5::digest(bytes);
204-
builder.append_value(hex_encode(hash));
205-
}
206-
None => builder.append_null(),
153+
ColumnarValue::Scalar(ScalarValue::Binary(opt)) => {
154+
ColumnarValue::Scalar(ScalarValue::Utf8View(opt.map(hex_encode::<_>)))
207155
}
208-
}
209-
Arc::new(builder.finish())
156+
_ => return internal_err!("Impossibly got invalid results from digest"),
157+
})
210158
}
211159

212160
/// Hex encoding lookup table for fast byte-to-hex conversion

0 commit comments

Comments
 (0)