Skip to content

Commit 9885f4b

Browse files
hsiang-cmartin-g
andauthored
fix: arrays_zip/list_zip allow single array argument (apache#21047)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes apache#123` indicates that this PR will close issue apache#123. --> - Closes apache#21046 ## Rationale for this change <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> Align DataFusion's `arrays_zip` implementation w/ DuckDB and Spark ## What changes are included in this PR? <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> - Allow single array argument - Update user doc and fix a few examples ## Are these changes tested? <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> Yes, by sqllogictest ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> Yes, I modified user doc. <!-- If there are any breaking changes to public APIs, please add the `api change` label. --> --------- Co-authored-by: Martin Grigorov <martin-g@users.noreply.github.com>
1 parent 6ef4cef commit 9885f4b

File tree

3 files changed

+89
-22
lines changed

3 files changed

+89
-22
lines changed

datafusion/functions-nested/src/arrays_zip.rs

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,31 +50,33 @@ struct ListColumnView {
5050
make_udf_expr_and_func!(
5151
ArraysZip,
5252
arrays_zip,
53-
"combines multiple arrays into a single array of structs.",
53+
"combines one or multiple arrays into a single array of structs.",
5454
arrays_zip_udf
5555
);
5656

5757
#[user_doc(
5858
doc_section(label = "Array Functions"),
5959
description = "Returns an array of structs created by combining the elements of each input array at the same index. If the arrays have different lengths, shorter arrays are padded with NULLs.",
60-
syntax_example = "arrays_zip(array1, array2[, ..., array_n])",
60+
syntax_example = "arrays_zip(array1[, ..., array_n])",
6161
sql_example = r#"```sql
62-
> select arrays_zip([1, 2, 3], ['a', 'b', 'c']);
62+
> select arrays_zip([1, 2, 3]);
6363
+---------------------------------------------------+
64-
| arrays_zip([1, 2, 3], ['a', 'b', 'c']) |
64+
| arrays_zip([1, 2, 3]) |
6565
+---------------------------------------------------+
66-
| [{c0: 1, c1: a}, {c0: 2, c1: b}, {c0: 3, c1: c}] |
66+
| [{1: 1}, {1: 2}, {1: 3}] |
6767
+---------------------------------------------------+
6868
> select arrays_zip([1, 2], [3, 4, 5]);
6969
+---------------------------------------------------+
70-
| arrays_zip([1, 2], [3, 4, 5]) |
70+
| arrays_zip([1, 2], [3, 4, 5]) |
7171
+---------------------------------------------------+
72-
| [{c0: 1, c1: 3}, {c0: 2, c1: 4}, {c0: , c1: 5}] |
72+
| [{1: 1, 2: 3}, {1: 2, 2: 4}, {1: NULL, 2: 5}] |
7373
+---------------------------------------------------+
7474
```"#,
7575
argument(name = "array1", description = "First array expression."),
76-
argument(name = "array2", description = "Second array expression."),
77-
argument(name = "array_n", description = "Subsequent array expressions.")
76+
argument(
77+
name = "array_n",
78+
description = "Optional additional array expressions."
79+
)
7880
)]
7981
#[derive(Debug, PartialEq, Eq, Hash)]
8082
pub struct ArraysZip {
@@ -112,7 +114,7 @@ impl ScalarUDFImpl for ArraysZip {
112114

113115
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
114116
if arg_types.is_empty() {
115-
return exec_err!("arrays_zip requires at least two arguments");
117+
return exec_err!("arrays_zip requires at least one argument");
116118
}
117119

118120
let mut fields = Vec::with_capacity(arg_types.len());
@@ -155,8 +157,8 @@ impl ScalarUDFImpl for ArraysZip {
155157
/// lengths, shorter arrays are padded with NULLs.
156158
/// Supports List, LargeList, and Null input types.
157159
fn arrays_zip_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
158-
if args.len() < 2 {
159-
return exec_err!("arrays_zip requires at least two arguments");
160+
if args.is_empty() {
161+
return exec_err!("arrays_zip requires at least one argument");
160162
}
161163

162164
let num_rows = args[0].len();

datafusion/sqllogictest/test_files/array.slt

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7324,6 +7324,58 @@ select arrays_zip(a, b) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5,
73247324
[{1: NULL, 2: 30}, {1: NULL, 2: 40}]
73257325
[{1: 5, 2: NULL}, {1: 6, 2: NULL}]
73267326

7327+
# column-level test with single argument
7328+
query ?
7329+
select arrays_zip(a) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b);
7330+
----
7331+
[{1: 1}, {1: 2}]
7332+
NULL
7333+
[{1: 5}, {1: 6}]
7334+
7335+
query ?
7336+
select arrays_zip(b) from (values ([1, 2], [10, 20]), (null, [30, 40]), ([5, 6], null)) as t(a, b);
7337+
----
7338+
[{1: 10}, {1: 20}]
7339+
[{1: 30}, {1: 40}]
7340+
NULL
7341+
7342+
# No input
7343+
query error Error during planning: 'arrays_zip' does not support zero arguments
7344+
select arrays_zip();
7345+
7346+
# Non-array input
7347+
query error DataFusion error: Execution error: arrays_zip expects array arguments, got Int64
7348+
select arrays_zip(1, 2);
7349+
7350+
# null input
7351+
query ?
7352+
select arrays_zip(null)
7353+
----
7354+
NULL
7355+
7356+
# single empty array
7357+
query ?
7358+
select arrays_zip([])
7359+
----
7360+
[]
7361+
7362+
7363+
# single array of null
7364+
query ?
7365+
select arrays_zip([null])
7366+
----
7367+
[{1: NULL}]
7368+
7369+
query ?
7370+
select arrays_zip([NULL::int])
7371+
----
7372+
[{1: NULL}]
7373+
7374+
query ?
7375+
select arrays_zip([NULL::int[]])
7376+
----
7377+
[{1: NULL}]
7378+
73277379
# alias: list_zip
73287380
query ?
73297381
select list_zip([1, 2], [3, 4]);
@@ -7346,9 +7398,11 @@ select arrays_zip([42], ['hello']);
73467398
----
73477399
[{1: 42, 2: hello}]
73487400

7349-
# error: too few arguments
7350-
statement error
7401+
# single argument
7402+
query ?
73517403
select arrays_zip([1, 2, 3]);
7404+
----
7405+
[{1: 1}, {1: 2}, {1: 3}]
73527406

73537407
# arrays_zip with LargeList inputs
73547408
query ?
@@ -7368,6 +7422,12 @@ select arrays_zip(
73687422
----
73697423
[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: NULL, 2: 30}]
73707424

7425+
# single argument from LargeList
7426+
query ?
7427+
select arrays_zip(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'));
7428+
----
7429+
[{1: 1}, {1: 2}, {1: 3}]
7430+
73717431
# arrays_zip with FixedSizeList inputs
73727432
query ?
73737433
select arrays_zip(
@@ -7377,6 +7437,12 @@ select arrays_zip(
73777437
----
73787438
[{1: 1, 2: 10}, {1: 2, 2: 20}, {1: 3, 2: 30}]
73797439

7440+
# single argument from FixedSizeList
7441+
query ?
7442+
select arrays_zip(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'));
7443+
----
7444+
[{1: 1}, {1: 2}, {1: 3}]
7445+
73807446
# arrays_zip mixing List and LargeList
73817447
query ?
73827448
select arrays_zip(

docs/source/user-guide/sql/scalar_functions.md

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4274,29 +4274,28 @@ _Alias of [array_has_any](#array_has_any)._
42744274
Returns an array of structs created by combining the elements of each input array at the same index. If the arrays have different lengths, shorter arrays are padded with NULLs.
42754275

42764276
```sql
4277-
arrays_zip(array1, array2[, ..., array_n])
4277+
arrays_zip(array1[, ..., array_n])
42784278
```
42794279

42804280
#### Arguments
42814281

42824282
- **array1**: First array expression.
4283-
- **array2**: Second array expression.
4284-
- **array_n**: Subsequent array expressions.
4283+
- **array_n**: Optional additional array expressions.
42854284

42864285
#### Example
42874286

42884287
```sql
4289-
> select arrays_zip([1, 2, 3], ['a', 'b', 'c']);
4288+
> select arrays_zip([1, 2, 3]);
42904289
+---------------------------------------------------+
4291-
| arrays_zip([1, 2, 3], ['a', 'b', 'c']) |
4290+
| arrays_zip([1, 2, 3]) |
42924291
+---------------------------------------------------+
4293-
| [{c0: 1, c1: a}, {c0: 2, c1: b}, {c0: 3, c1: c}] |
4292+
| [{1: 1}, {1: 2}, {1: 3}] |
42944293
+---------------------------------------------------+
42954294
> select arrays_zip([1, 2], [3, 4, 5]);
42964295
+---------------------------------------------------+
4297-
| arrays_zip([1, 2], [3, 4, 5]) |
4296+
| arrays_zip([1, 2], [3, 4, 5]) |
42984297
+---------------------------------------------------+
4299-
| [{c0: 1, c1: 3}, {c0: 2, c1: 4}, {c0: , c1: 5}] |
4298+
| [{1: 1, 2: 3}, {1: 2, 2: 4}, {1: NULL, 2: 5}] |
43004299
+---------------------------------------------------+
43014300
```
43024301

0 commit comments

Comments
 (0)