Skip to content

Commit a614716

Browse files
authored
Support min/max aggregates for FixedSizeBinary type (#16765)
* Support min/max for FixedSizeBinary * Remove leftover FileTypeExt trait for tests * Add sqllogic test for min/max on FixedSizeBinary * Change combinator to calm clippy * Revert "Remove leftover FileTypeExt trait for tests" This reverts commit f47547d.
1 parent 37ee8fa commit a614716

File tree

3 files changed

+86
-5
lines changed

3 files changed

+86
-5
lines changed

datafusion/functions-aggregate-common/src/min_max.rs

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@
2020
use arrow::array::{
2121
ArrayRef, AsArray as _, BinaryArray, BinaryViewArray, BooleanArray, Date32Array,
2222
Date64Array, Decimal128Array, Decimal256Array, DurationMicrosecondArray,
23-
DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray, Float16Array,
24-
Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
25-
IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray,
26-
LargeBinaryArray, LargeStringArray, StringArray, StringViewArray,
27-
Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
23+
DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray,
24+
FixedSizeBinaryArray, Float16Array, Float32Array, Float64Array, Int16Array,
25+
Int32Array, Int64Array, Int8Array, IntervalDayTimeArray, IntervalMonthDayNanoArray,
26+
IntervalYearMonthArray, LargeBinaryArray, LargeStringArray, StringArray,
27+
StringViewArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
2828
Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
2929
TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
3030
UInt64Array, UInt8Array,
@@ -254,6 +254,12 @@ pub fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
254254
min_binary
255255
)
256256
}
257+
DataType::FixedSizeBinary(size) => {
258+
let array = downcast_value!(&values, FixedSizeBinaryArray);
259+
let value = compute::min_fixed_size_binary(array);
260+
let value = value.map(|e| e.to_vec());
261+
ScalarValue::FixedSizeBinary(*size, value)
262+
}
257263
DataType::BinaryView => {
258264
typed_min_max_batch_binary!(
259265
&values,
@@ -339,6 +345,12 @@ pub fn max_batch(values: &ArrayRef) -> Result<ScalarValue> {
339345
max_binary
340346
)
341347
}
348+
DataType::FixedSizeBinary(size) => {
349+
let array = downcast_value!(&values, FixedSizeBinaryArray);
350+
let value = compute::max_fixed_size_binary(array);
351+
let value = value.map(|e| e.to_vec());
352+
ScalarValue::FixedSizeBinary(*size, value)
353+
}
342354
DataType::Struct(_) => min_max_batch_generic(values, Ordering::Less)?,
343355
DataType::List(_) => min_max_batch_generic(values, Ordering::Less)?,
344356
DataType::LargeList(_) => min_max_batch_generic(values, Ordering::Less)?,

datafusion/functions-aggregate/src/min_max.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,21 @@ macro_rules! typed_min_max_string {
443443
}};
444444
}
445445

446+
// min/max of two scalar string values with a prefix argument.
447+
macro_rules! typed_min_max_string_arg {
448+
($VALUE:expr, $DELTA:expr, $SCALAR:ident, $OP:ident, $ARG:expr) => {{
449+
ScalarValue::$SCALAR(
450+
$ARG,
451+
match ($VALUE, $DELTA) {
452+
(None, None) => None,
453+
(Some(a), None) => Some(a.clone()),
454+
(None, Some(b)) => Some(b.clone()),
455+
(Some(a), Some(b)) => Some((a).$OP(b).clone()),
456+
},
457+
)
458+
}};
459+
}
460+
446461
macro_rules! choose_min_max {
447462
(min) => {
448463
std::cmp::Ordering::Greater
@@ -546,6 +561,16 @@ macro_rules! min_max {
546561
(ScalarValue::LargeBinary(lhs), ScalarValue::LargeBinary(rhs)) => {
547562
typed_min_max_string!(lhs, rhs, LargeBinary, $OP)
548563
}
564+
(ScalarValue::FixedSizeBinary(lsize, lhs), ScalarValue::FixedSizeBinary(rsize, rhs)) => {
565+
if lsize == rsize {
566+
typed_min_max_string_arg!(lhs, rhs, FixedSizeBinary, $OP, *lsize)
567+
}
568+
else {
569+
return internal_err!(
570+
"MIN/MAX is not expected to receive FixedSizeBinary of incompatible sizes {:?}",
571+
(lsize, rsize))
572+
}
573+
}
549574
(ScalarValue::BinaryView(lhs), ScalarValue::BinaryView(rhs)) => {
550575
typed_min_max_string!(lhs, rhs, BinaryView, $OP)
551576
}

datafusion/sqllogictest/test_files/aggregate.slt

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4351,6 +4351,50 @@ DROP VIEW binary_views
43514351
statement ok
43524352
DROP TABLE strings;
43534353

4354+
############ FixedSizeBinary ############
4355+
4356+
statement ok
4357+
CREATE TABLE binaries
4358+
AS VALUES
4359+
(X'000103', 1),
4360+
(X'000104', 1),
4361+
(X'000101', 3),
4362+
(X'000103', 1),
4363+
(X'000102', 1),
4364+
(NULL, 1),
4365+
(NULL, 4),
4366+
(X'000104', 1),
4367+
(X'000109', 2),
4368+
(X'000103', 1),
4369+
(X'000101', 2);
4370+
4371+
statement ok
4372+
CREATE VIEW fixed_size_binary_views
4373+
AS SELECT arrow_cast(column1, 'FixedSizeBinary(3)') as value, column2 as id FROM binaries;
4374+
4375+
query I?
4376+
SELECT id, MIN(value) FROM fixed_size_binary_views GROUP BY id ORDER BY id;
4377+
----
4378+
1 000102
4379+
2 000101
4380+
3 000101
4381+
4 NULL
4382+
4383+
query I?
4384+
SELECT id, MAX(value) FROM fixed_size_binary_views GROUP BY id ORDER BY id;
4385+
----
4386+
1 000104
4387+
2 000109
4388+
3 000101
4389+
4 NULL
4390+
4391+
statement ok
4392+
DROP VIEW fixed_size_binary_views;
4393+
4394+
statement ok
4395+
DROP TABLE binaries;
4396+
4397+
43544398
#################
43554399
# End min_max on strings/binary with null values and groups
43564400
#################

0 commit comments

Comments
 (0)