Skip to content

Commit 0143b20

Browse files
compheadmbutrovich
andauthored
feat: support map_entries builtin function (#16557)
Co-authored-by: Matt Butrovich <[email protected]>
1 parent 3649dc8 commit 0143b20

File tree

5 files changed

+226
-1
lines changed

5 files changed

+226
-1
lines changed

datafusion/common/src/utils/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,7 @@ pub fn get_available_parallelism() -> usize {
950950
.get()
951951
}
952952

953-
/// Converts a collection of function arguments into an fixed-size array of length N
953+
/// Converts a collection of function arguments into a fixed-size array of length N
954954
/// producing a reasonable error message in case of unexpected number of arguments.
955955
///
956956
/// # Example

datafusion/functions-nested/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ pub mod flatten;
5050
pub mod length;
5151
pub mod make_array;
5252
pub mod map;
53+
pub mod map_entries;
5354
pub mod map_extract;
5455
pub mod map_keys;
5556
pub mod map_values;
@@ -95,6 +96,7 @@ pub mod expr_fn {
9596
pub use super::flatten::flatten;
9697
pub use super::length::array_length;
9798
pub use super::make_array::make_array;
99+
pub use super::map_entries::map_entries;
98100
pub use super::map_extract::map_extract;
99101
pub use super::map_keys::map_keys;
100102
pub use super::map_values::map_values;
@@ -163,6 +165,7 @@ pub fn all_default_nested_functions() -> Vec<Arc<ScalarUDF>> {
163165
replace::array_replace_all_udf(),
164166
replace::array_replace_udf(),
165167
map::map_udf(),
168+
map_entries::map_entries_udf(),
166169
map_extract::map_extract_udf(),
167170
map_keys::map_keys_udf(),
168171
map_values::map_values_udf(),
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! [`ScalarUDFImpl`] definitions for map_entries function.
19+
20+
use crate::utils::{get_map_entry_field, make_scalar_function};
21+
use arrow::array::{Array, ArrayRef, ListArray};
22+
use arrow::datatypes::{DataType, Field, Fields};
23+
use datafusion_common::utils::take_function_args;
24+
use datafusion_common::{cast::as_map_array, exec_err, Result};
25+
use datafusion_expr::{
26+
ArrayFunctionSignature, ColumnarValue, Documentation, ScalarUDFImpl, Signature,
27+
TypeSignature, Volatility,
28+
};
29+
use datafusion_macros::user_doc;
30+
use std::any::Any;
31+
use std::sync::Arc;
32+
33+
make_udf_expr_and_func!(
34+
MapEntriesFunc,
35+
map_entries,
36+
map,
37+
"Return a list of all entries in the map.",
38+
map_entries_udf
39+
);
40+
41+
#[user_doc(
42+
doc_section(label = "Map Functions"),
43+
description = "Returns a list of all entries in the map.",
44+
syntax_example = "map_entries(map)",
45+
sql_example = r#"```sql
46+
SELECT map_entries(MAP {'a': 1, 'b': NULL, 'c': 3});
47+
----
48+
[{'key': a, 'value': 1}, {'key': b, 'value': NULL}, {'key': c, 'value': 3}]
49+
50+
SELECT map_entries(map([100, 5], [42, 43]));
51+
----
52+
[{'key': 100, 'value': 42}, {'key': 5, 'value': 43}]
53+
```"#,
54+
argument(
55+
name = "map",
56+
description = "Map expression. Can be a constant, column, or function, and any combination of map operators."
57+
)
58+
)]
59+
#[derive(Debug)]
60+
pub struct MapEntriesFunc {
61+
signature: Signature,
62+
}
63+
64+
impl Default for MapEntriesFunc {
65+
fn default() -> Self {
66+
Self::new()
67+
}
68+
}
69+
70+
impl MapEntriesFunc {
71+
pub fn new() -> Self {
72+
Self {
73+
signature: Signature::new(
74+
TypeSignature::ArraySignature(ArrayFunctionSignature::MapArray),
75+
Volatility::Immutable,
76+
),
77+
}
78+
}
79+
}
80+
81+
impl ScalarUDFImpl for MapEntriesFunc {
82+
fn as_any(&self) -> &dyn Any {
83+
self
84+
}
85+
86+
fn name(&self) -> &str {
87+
"map_entries"
88+
}
89+
90+
fn signature(&self) -> &Signature {
91+
&self.signature
92+
}
93+
94+
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
95+
let [map_type] = take_function_args(self.name(), arg_types)?;
96+
let map_fields = get_map_entry_field(map_type)?;
97+
Ok(DataType::List(Arc::new(Field::new_list_field(
98+
DataType::Struct(Fields::from(vec![
99+
Field::new(
100+
"key",
101+
map_fields.first().unwrap().data_type().clone(),
102+
false,
103+
),
104+
Field::new(
105+
"value",
106+
map_fields.get(1).unwrap().data_type().clone(),
107+
true,
108+
),
109+
])),
110+
false,
111+
))))
112+
}
113+
114+
fn invoke_with_args(
115+
&self,
116+
args: datafusion_expr::ScalarFunctionArgs,
117+
) -> Result<ColumnarValue> {
118+
make_scalar_function(map_entries_inner)(&args.args)
119+
}
120+
121+
fn documentation(&self) -> Option<&Documentation> {
122+
self.doc()
123+
}
124+
}
125+
126+
fn map_entries_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
127+
let [map_arg] = take_function_args("map_entries", args)?;
128+
129+
let map_array = match map_arg.data_type() {
130+
DataType::Map(_, _) => as_map_array(&map_arg)?,
131+
_ => return exec_err!("Argument for map_entries should be a map"),
132+
};
133+
134+
Ok(Arc::new(ListArray::new(
135+
Arc::new(Field::new_list_field(
136+
DataType::Struct(Fields::from(vec![
137+
Field::new("key", map_array.key_type().clone(), false),
138+
Field::new("value", map_array.value_type().clone(), true),
139+
])),
140+
false,
141+
)),
142+
map_array.offsets().clone(),
143+
Arc::new(map_array.entries().clone()),
144+
map_array.nulls().cloned(),
145+
)))
146+
}

datafusion/sqllogictest/test_files/map.slt

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,57 @@ select map_extract(column1, 1), map_extract(column1, 5), map_extract(column1, 7)
651651
[NULL] [[4, NULL, 6]] [NULL]
652652
[NULL] [NULL] [[1, NULL, 3]]
653653

654+
# Tests for map_entries
655+
656+
query ?
657+
SELECT map_entries(MAP { 'a': 1, 'b': 3 });
658+
----
659+
[{key: a, value: 1}, {key: b, value: 3}]
660+
661+
query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
662+
SELECT map_entries(MAP { 'a': 1, 2: 3 });
663+
664+
query ?
665+
SELECT map_entries(MAP {'a':1, 'b':2, 'c':3 }) FROM t;
666+
----
667+
[{key: a, value: 1}, {key: b, value: 2}, {key: c, value: 3}]
668+
[{key: a, value: 1}, {key: b, value: 2}, {key: c, value: 3}]
669+
[{key: a, value: 1}, {key: b, value: 2}, {key: c, value: 3}]
670+
671+
query ?
672+
SELECT map_entries(Map{column1: column2, column3: column4}) FROM t;
673+
----
674+
[{key: a, value: 1}, {key: k1, value: 10}]
675+
[{key: b, value: 2}, {key: k3, value: 30}]
676+
[{key: d, value: 4}, {key: k5, value: 50}]
677+
678+
query ?
679+
SELECT map_entries(map(column5, column6)) FROM t;
680+
----
681+
[{key: k1, value: 1}, {key: k2, value: 2}]
682+
[{key: k3, value: 3}]
683+
[{key: k5, value: 5}]
684+
685+
query ?
686+
SELECT map_entries(map(column8, column9)) FROM t;
687+
----
688+
[{key: [1, 2, 3], value: a}]
689+
[{key: [4], value: b}]
690+
[{key: [1, 2], value: c}]
691+
692+
query ?
693+
SELECT map_entries(Map{});
694+
----
695+
[]
696+
697+
query ?
698+
SELECT map_entries(column1) from map_array_table_1;
699+
----
700+
[{key: 1, value: [1, NULL, 3]}, {key: 2, value: [4, NULL, 6]}, {key: 3, value: [7, 8, 9]}]
701+
[{key: 4, value: [1, NULL, 3]}, {key: 5, value: [4, NULL, 6]}, {key: 6, value: [7, 8, 9]}]
702+
[{key: 7, value: [1, NULL, 3]}, {key: 8, value: [9, NULL, 6]}, {key: 9, value: [7, 8, 9]}]
703+
NULL
704+
654705
# Tests for map_keys
655706

656707
query ?

docs/source/user-guide/sql/scalar_functions.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4105,6 +4105,7 @@ select struct(a as field_a, b) from t;
41054105

41064106
- [element_at](#element_at)
41074107
- [map](#map)
4108+
- [map_entries](#map_entries)
41084109
- [map_extract](#map_extract)
41094110
- [map_keys](#map_keys)
41104111
- [map_values](#map_values)
@@ -4162,6 +4163,30 @@ SELECT MAKE_MAP(['key1', 'key2'], ['value1', null]);
41624163
{key1: value1, key2: }
41634164
```
41644165

4166+
### `map_entries`
4167+
4168+
Returns a list of all entries in the map.
4169+
4170+
```sql
4171+
map_entries(map)
4172+
```
4173+
4174+
#### Arguments
4175+
4176+
- **map**: Map expression. Can be a constant, column, or function, and any combination of map operators.
4177+
4178+
#### Example
4179+
4180+
```sql
4181+
SELECT map_entries(MAP {'a': 1, 'b': NULL, 'c': 3});
4182+
----
4183+
[{'key': a, 'value': 1}, {'key': b, 'value': NULL}, {'key': c, 'value': 3}]
4184+
4185+
SELECT map_entries(map([100, 5], [42, 43]));
4186+
----
4187+
[{'key': 100, 'value': 42}, {'key': 5, 'value': 43}]
4188+
```
4189+
41654190
### `map_extract`
41664191

41674192
Returns a list containing the value for the given key or an empty list if the key is not present in the map.

0 commit comments

Comments
 (0)