Skip to content

Commit 3839736

Browse files
authored
feat: add array_min scalar function and associated tests (#16574)
* feat: add `array_min` scalar function and associated tests * update docs * nit * refactor: merge `array_min` and `array_max` into `min_max` module for better code reuse
1 parent fffcd1f commit 3839736

File tree

5 files changed

+207
-11
lines changed

5 files changed

+207
-11
lines changed

datafusion/functions-aggregate/src/min_max.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,7 @@ macro_rules! min_max_batch {
578578
}
579579

580580
/// dynamically-typed min(array) -> ScalarValue
581-
fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
581+
pub fn min_batch(values: &ArrayRef) -> Result<ScalarValue> {
582582
Ok(match values.data_type() {
583583
DataType::Utf8 => {
584584
typed_min_max_batch_string!(values, StringArray, Utf8, min_string)

datafusion/functions-nested/src/lib.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ pub mod map_entries;
5454
pub mod map_extract;
5555
pub mod map_keys;
5656
pub mod map_values;
57-
pub mod max;
57+
pub mod min_max;
5858
pub mod planner;
5959
pub mod position;
6060
pub mod range;
@@ -100,6 +100,8 @@ pub mod expr_fn {
100100
pub use super::map_extract::map_extract;
101101
pub use super::map_keys::map_keys;
102102
pub use super::map_values::map_values;
103+
pub use super::min_max::array_max;
104+
pub use super::min_max::array_min;
103105
pub use super::position::array_position;
104106
pub use super::position::array_positions;
105107
pub use super::range::gen_series;
@@ -148,7 +150,8 @@ pub fn all_default_nested_functions() -> Vec<Arc<ScalarUDF>> {
148150
length::array_length_udf(),
149151
distance::array_distance_udf(),
150152
flatten::flatten_udf(),
151-
max::array_max_udf(),
153+
min_max::array_max_udf(),
154+
min_max::array_min_udf(),
152155
sort::array_sort_udf(),
153156
repeat::array_repeat_udf(),
154157
resize::array_resize_udf(),

datafusion/functions-nested/src/max.rs renamed to datafusion/functions-nested/src/min_max.rs

Lines changed: 91 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -123,22 +123,106 @@ impl ScalarUDFImpl for ArrayMax {
123123
pub fn array_max_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
124124
let [array] = take_function_args("array_max", args)?;
125125
match array.data_type() {
126-
List(_) => general_array_max(as_list_array(array)?),
127-
LargeList(_) => general_array_max(as_large_list_array(array)?),
126+
List(_) => array_min_max_helper(as_list_array(array)?, min_max::max_batch),
127+
LargeList(_) => {
128+
array_min_max_helper(as_large_list_array(array)?, min_max::max_batch)
129+
}
128130
arg_type => exec_err!("array_max does not support type: {arg_type}"),
129131
}
130132
}
131133

132-
fn general_array_max<O: OffsetSizeTrait>(
134+
make_udf_expr_and_func!(
135+
ArrayMin,
136+
array_min,
137+
array,
138+
"returns the minimum value in the array",
139+
array_min_udf
140+
);
141+
#[user_doc(
142+
doc_section(label = "Array Functions"),
143+
description = "Returns the minimum value in the array.",
144+
syntax_example = "array_min(array)",
145+
sql_example = r#"```sql
146+
> select array_min([3,1,4,2]);
147+
+-----------------------------------------+
148+
| array_min(List([3,1,4,2])) |
149+
+-----------------------------------------+
150+
| 1 |
151+
+-----------------------------------------+
152+
```"#,
153+
argument(
154+
name = "array",
155+
description = "Array expression. Can be a constant, column, or function, and any combination of array operators."
156+
)
157+
)]
158+
#[derive(Debug)]
159+
struct ArrayMin {
160+
signature: Signature,
161+
}
162+
163+
impl Default for ArrayMin {
164+
fn default() -> Self {
165+
Self::new()
166+
}
167+
}
168+
169+
impl ArrayMin {
170+
fn new() -> Self {
171+
Self {
172+
signature: Signature::array(Volatility::Immutable),
173+
}
174+
}
175+
}
176+
177+
impl ScalarUDFImpl for ArrayMin {
178+
fn as_any(&self) -> &dyn Any {
179+
self
180+
}
181+
182+
fn name(&self) -> &str {
183+
"array_min"
184+
}
185+
186+
fn signature(&self) -> &Signature {
187+
&self.signature
188+
}
189+
190+
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
191+
let [array] = take_function_args(self.name(), arg_types)?;
192+
match array {
193+
List(field) | LargeList(field) => Ok(field.data_type().clone()),
194+
arg_type => plan_err!("{} does not support type {}", self.name(), arg_type),
195+
}
196+
}
197+
198+
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
199+
make_scalar_function(array_min_inner)(&args.args)
200+
}
201+
202+
fn documentation(&self) -> Option<&Documentation> {
203+
self.doc()
204+
}
205+
}
206+
207+
pub fn array_min_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
208+
let [array] = take_function_args("array_min", args)?;
209+
match array.data_type() {
210+
List(_) => array_min_max_helper(as_list_array(array)?, min_max::min_batch),
211+
LargeList(_) => {
212+
array_min_max_helper(as_large_list_array(array)?, min_max::min_batch)
213+
}
214+
arg_type => exec_err!("array_min does not support type: {arg_type}"),
215+
}
216+
}
217+
218+
fn array_min_max_helper<O: OffsetSizeTrait>(
133219
array: &GenericListArray<O>,
220+
agg_fn: fn(&ArrayRef) -> Result<ScalarValue>,
134221
) -> Result<ArrayRef> {
135222
let null_value = ScalarValue::try_from(array.value_type())?;
136223
let result_vec: Vec<ScalarValue> = array
137224
.iter()
138-
.map(|arr| {
139-
arr.as_ref()
140-
.map_or_else(|| Ok(null_value.clone()), min_max::max_batch)
141-
})
225+
.map(|arr| arr.as_ref().map_or_else(|| Ok(null_value.clone()), agg_fn))
142226
.try_collect()?;
143227
ScalarValue::iter_to_array(result_vec)
144228
}

datafusion/sqllogictest/test_files/array.slt

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1535,6 +1535,91 @@ NULL
15351535
query error DataFusion error: Error during planning: 'array_max' does not support zero arguments
15361536
select array_max();
15371537

1538+
## array_min
1539+
1540+
query I
1541+
select array_min(make_array(5, 3, 6, 4));
1542+
----
1543+
3
1544+
1545+
query I
1546+
select array_min(make_array(5, 3, 4, NULL, 6, NULL));
1547+
----
1548+
3
1549+
1550+
query ?
1551+
select array_min(make_array(NULL, NULL));
1552+
----
1553+
NULL
1554+
1555+
query T
1556+
select array_min(make_array('h', 'e', 'o', 'l', 'l'));
1557+
----
1558+
e
1559+
1560+
query T
1561+
select array_min(make_array('h', 'e', 'l', NULL, 'l', 'o', NULL));
1562+
----
1563+
e
1564+
1565+
query B
1566+
select array_min(make_array(false, true, false, true));
1567+
----
1568+
false
1569+
1570+
query B
1571+
select array_min(make_array(false, true, NULL, false, true));
1572+
----
1573+
false
1574+
1575+
query D
1576+
select array_min(make_array(DATE '1992-09-01', DATE '1993-03-01', DATE '1999-05-01', DATE '1985-11-01'));
1577+
----
1578+
1985-11-01
1579+
1580+
query D
1581+
select array_min(make_array(DATE '1995-09-01', DATE '1999-05-01', DATE '1993-03-01', NULL));
1582+
----
1583+
1993-03-01
1584+
1585+
query P
1586+
select array_min(make_array(TIMESTAMP '1992-09-01', TIMESTAMP '1995-06-01', TIMESTAMP '1984-10-01'));
1587+
----
1588+
1984-10-01T00:00:00
1589+
1590+
query P
1591+
select array_min(make_array(NULL, TIMESTAMP '1996-10-01', TIMESTAMP '1995-06-01'));
1592+
----
1593+
1995-06-01T00:00:00
1594+
1595+
query R
1596+
select array_min(make_array(5.1, -3.2, 6.3, 4.9));
1597+
----
1598+
-3.2
1599+
1600+
query ?I
1601+
select input, array_min(input) from (select make_array(d - 1, d, d + 1) input from (values (0), (10), (20), (30), (NULL)) t(d))
1602+
----
1603+
[-1, 0, 1] -1
1604+
[9, 10, 11] 9
1605+
[19, 20, 21] 19
1606+
[29, 30, 31] 29
1607+
[NULL, NULL, NULL] NULL
1608+
1609+
query II
1610+
select array_min(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_min(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'));
1611+
----
1612+
1 1
1613+
1614+
query ?
1615+
select array_min(make_array());
1616+
----
1617+
NULL
1618+
1619+
# Testing with empty arguments should result in an error
1620+
query error DataFusion error: Error during planning: 'array_min' does not support zero arguments
1621+
select array_min();
1622+
15381623

15391624
## array_pop_back (aliases: `list_pop_back`)
15401625

@@ -6009,7 +6094,7 @@ false false NULL false
60096094
false false false NULL
60106095

60116096
# Row 1: [[NULL,2],[3,NULL]], [1.1,2.2,3.3], ['L','o','r','e','m']
6012-
# Row 2: [[3,4],[5,6]], [NULL,5.5,6.6], ['i','p',NULL,'u','m']
6097+
# Row 2: [[3,4],[5,6]], [NULL,5.5,6.6], ['i','p',NULL,'u','m']
60136098
# Row 3: [[5,6],[7,8]], [7.7,8.8,9.9], ['d',NULL,'l','o','r']
60146099
# Row 4: [[7,NULL],[9,10]], [10.1,NULL,12.2], ['s','i','t','a','b']
60156100
# Row 5: NULL, [13.3,14.4,15.5], ['a','m','e','t','x']

docs/source/user-guide/sql/scalar_functions.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2552,6 +2552,7 @@ _Alias of [current_date](#current_date)._
25522552
- [array_join](#array_join)
25532553
- [array_length](#array_length)
25542554
- [array_max](#array_max)
2555+
- [array_min](#array_min)
25552556
- [array_ndims](#array_ndims)
25562557
- [array_pop_back](#array_pop_back)
25572558
- [array_pop_front](#array_pop_front)
@@ -3058,6 +3059,29 @@ array_max(array)
30583059

30593060
- list_max
30603061

3062+
### `array_min`
3063+
3064+
Returns the minimum value in the array.
3065+
3066+
```sql
3067+
array_min(array)
3068+
```
3069+
3070+
#### Arguments
3071+
3072+
- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators.
3073+
3074+
#### Example
3075+
3076+
```sql
3077+
> select array_min([3,1,4,2]);
3078+
+-----------------------------------------+
3079+
| array_min(List([3,1,4,2])) |
3080+
+-----------------------------------------+
3081+
| 1 |
3082+
+-----------------------------------------+
3083+
```
3084+
30613085
### `array_ndims`
30623086

30633087
Returns the number of dimensions of the array.

0 commit comments

Comments
 (0)