Skip to content

Commit 3c40b21

Browse files
SNOW-2335255: Add support for semi-structured and structured data functions ( part 1) (#3811)
Co-authored-by: Jamison Rose <Jamison.Rose@snowflake.com>
1 parent 2e44cc0 commit 3c40b21

File tree

3 files changed

+206
-57
lines changed

3 files changed

+206
-57
lines changed

CHANGELOG.md

Lines changed: 74 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -19,63 +19,80 @@
1919
- `get_cloud_provider_token`
2020

2121
- Added support for the following scalar functions in `functions.py`:
22-
- `array_remove_at`
23-
- `as_boolean`
24-
- `booland`
25-
- `boolnot`
26-
- `boolor`
27-
- `boolor_agg`
28-
- `boolxor`
29-
- `chr`
30-
- `decode`
31-
- `div0null`
32-
- `dp_interval_high`
33-
- `dp_interval_low`
34-
- `greatest_ignore_nulls`
35-
- `h3_cell_to_boundary`
36-
- `h3_cell_to_children`
37-
- `h3_cell_to_children_string`
38-
- `h3_cell_to_parent`
39-
- `h3_cell_to_point`
40-
- `h3_compact_cells`
41-
- `h3_compact_cells_strings`
42-
- `h3_coverage`
43-
- `h3_coverage_strings`
44-
- `h3_get_resolution`
45-
- `h3_grid_disk`
46-
- `h3_grid_distance`
47-
- `h3_int_to_string`
48-
- `h3_polygon_to_cells`
49-
- `h3_polygon_to_cells_strings`
50-
- `h3_string_to_int`
51-
- `h3_try_grid_path`
52-
- `h3_try_polygon_to_cells`
53-
- `h3_try_polygon_to_cells_strings`
54-
- `h3_uncompact_cells`
55-
- `h3_uncompact_cells_strings`
56-
- `haversine`
57-
- `h3_grid_path`
58-
- `h3_is_pentagon`
59-
- `h3_is_valid_cell`
60-
- `h3_latlng_to_cell`
61-
- `h3_latlng_to_cell_string`
62-
- `h3_point_to_cell`
63-
- `h3_point_to_cell_string`
64-
- `h3_try_coverage`
65-
- `h3_try_coverage_strings`
66-
- `h3_try_grid_distance`
67-
- `hex_decode_binary`
68-
- `last_query_id`
69-
- `last_transaction`
70-
- `least_ignore_nulls`
71-
- `nullif`
72-
- `nvl2`
73-
- `regr_valx`
74-
- `st_area`
75-
- `st_asewkb`
76-
- `st_asewkt`
77-
- `st_asgeojson`
78-
- `st_aswkb`
22+
- Conditional expression functions:
23+
- `booland`
24+
- `boolnot`
25+
- `boolor`
26+
- `boolxor`
27+
- `boolor_agg`
28+
- `decode`
29+
- `greatest_ignore_nulls`
30+
- `least_ignore_nulls`
31+
- `nullif`
32+
- `nvl2`
33+
- `regr_valx`
34+
35+
- Semi-structured and structured date functions:
36+
- `array_remove_at`
37+
- `as_boolean`
38+
- `map_delete`
39+
- `map_insert`
40+
- `map_pick`
41+
- `map_size`
42+
43+
- String & binary functions:
44+
- `chr`
45+
- `hex_decode_binary`
46+
47+
- Numeric functions:
48+
- `div0null`
49+
50+
- Differential privacy functions:
51+
- `dp_interval_high`
52+
- `dp_interval_low`
53+
54+
- Context functions:
55+
- `last_query_id`
56+
- `last_transaction`
57+
58+
- Geospatial functions:
59+
- `h3_cell_to_boundary`
60+
- `h3_cell_to_children`
61+
- `h3_cell_to_children_string`
62+
- `h3_cell_to_parent`
63+
- `h3_cell_to_point`
64+
- `h3_compact_cells`
65+
- `h3_compact_cells_strings`
66+
- `h3_coverage`
67+
- `h3_coverage_strings`
68+
- `h3_get_resolution`
69+
- `h3_grid_disk`
70+
- `h3_grid_distance`
71+
- `h3_int_to_string`
72+
- `h3_polygon_to_cells`
73+
- `h3_polygon_to_cells_strings`
74+
- `h3_string_to_int`
75+
- `h3_try_grid_path`
76+
- `h3_try_polygon_to_cells`
77+
- `h3_try_polygon_to_cells_strings`
78+
- `h3_uncompact_cells`
79+
- `h3_uncompact_cells_strings`
80+
- `haversine`
81+
- `h3_grid_path`
82+
- `h3_is_pentagon`
83+
- `h3_is_valid_cell`
84+
- `h3_latlng_to_cell`
85+
- `h3_latlng_to_cell_string`
86+
- `h3_point_to_cell`
87+
- `h3_point_to_cell_string`
88+
- `h3_try_coverage`
89+
- `h3_try_coverage_strings`
90+
- `h3_try_grid_distance`
91+
- `st_area`
92+
- `st_asewkb`
93+
- `st_asewkt`
94+
- `st_asgeojson`
95+
- `st_aswkb`
7996

8097
#### Bug Fixes
8198

docs/source/snowpark/functions.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,10 @@ Functions
336336
map_concat
337337
map_contains_key
338338
map_keys
339+
map_delete
340+
map_insert
341+
map_pick
342+
map_size
339343
max
340344
max_by
341345
md5

src/snowflake/snowpark/_functions/scalar_functions.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2064,3 +2064,131 @@ def h3_try_grid_distance(
20642064
cell_id_1 = _to_col_if_str(cell_id_1, "h3_try_grid_distance")
20652065
cell_id_2 = _to_col_if_str(cell_id_2, "h3_try_grid_distance")
20662066
return builtin("h3_try_grid_distance", _emit_ast=_emit_ast)(cell_id_1, cell_id_2)
2067+
2068+
2069+
@publicapi
2070+
def map_delete(
2071+
map_col: ColumnOrName, *keys: ColumnOrName, _emit_ast: bool = True
2072+
) -> Column:
2073+
"""Returns a map consisting of the input map with one or more keys removed.
2074+
2075+
Args:
2076+
map_col (ColumnOrName): The map used to remove keys.
2077+
*keys (ColumnOrName): Keys to remove.
2078+
2079+
Returns:
2080+
Column: A map with the specified keys removed.
2081+
2082+
Example::
2083+
2084+
>>> from snowflake.snowpark.functions import col, lit, to_variant
2085+
>>> df = session.sql(\"""
2086+
... SELECT {'a':1,'b':2,'c':3}::MAP(VARCHAR,NUMBER) as map_col
2087+
... union all
2088+
... SELECT {'c':3,'d':4,'e':5}::MAP(VARCHAR,NUMBER) as map_col
2089+
... \""")
2090+
>>> df.select(to_variant(map_delete(col("map_col"), lit("c"), lit("d"))).alias("result")).collect()
2091+
[Row(RESULT='{\\n "a": 1,\\n "b": 2\\n}'), Row(RESULT='{\\n "e": 5\\n}')]
2092+
2093+
"""
2094+
m = _to_col_if_str(map_col, "map_delete")
2095+
ks = [_to_col_if_str(k, "map_delete") for k in keys]
2096+
return builtin("map_delete", _emit_ast=_emit_ast)(m, *ks)
2097+
2098+
2099+
@publicapi
2100+
def map_insert(
2101+
map_col: ColumnOrName,
2102+
key: ColumnOrName,
2103+
value: ColumnOrName,
2104+
update_flag: Optional[ColumnOrName] = None,
2105+
_emit_ast: bool = True,
2106+
) -> Column:
2107+
"""
2108+
Returns a map containing all key-value pairs from the source map as well as the new key-value pair.
2109+
If the key already exists in the map, the value is updated with the new value unless update_flag is False.
2110+
2111+
Args:
2112+
map_col (ColumnOrName): The source map
2113+
key (ColumnOrName): The key to insert or update
2114+
value (ColumnOrName): The value to associate with the key
2115+
update_flag (Optional[ColumnOrName]): A boolean flag indicating whether to update existing keys. If None or True, existing keys are updated. If False, existing keys are not updated.
2116+
2117+
Returns:
2118+
Column: A new map with the key-value pair inserted or updated
2119+
2120+
Examples:
2121+
>>> from snowflake.snowpark.functions import lit, to_variant, col
2122+
>>> df = session.sql("SELECT {'a': 1, 'b': 2}::MAP(VARCHAR, NUMBER) as MAP_COL")
2123+
>>> df.select(to_variant(map_insert(col("MAP_COL"), lit("c"), lit(3))).alias("RESULT")).collect()
2124+
[Row(RESULT='{\\n "a": 1,\\n "b": 2,\\n "c": 3\\n}')]
2125+
2126+
# Example using update flag
2127+
>>> from snowflake.snowpark.functions import lit, to_variant, col
2128+
>>> df = session.sql("SELECT {'a': 1, 'b': 2}::MAP(VARCHAR, NUMBER) as MAP_COL")
2129+
>>> df.select(to_variant(map_insert(col("MAP_COL"), lit("a"), lit(20), lit(True))).alias("RESULT")).collect()
2130+
[Row(RESULT='{\\n "a": 20,\\n "b": 2\\n}')]
2131+
"""
2132+
m = _to_col_if_str(map_col, "map_insert")
2133+
k = _to_col_if_str(key, "map_insert")
2134+
v = _to_col_if_str(value, "map_insert")
2135+
uf = _to_col_if_str(update_flag, "map_insert") if update_flag is not None else None
2136+
if uf is not None:
2137+
return builtin("map_insert", _emit_ast=_emit_ast)(m, k, v, uf)
2138+
else:
2139+
return builtin("map_insert", _emit_ast=_emit_ast)(m, k, v)
2140+
2141+
2142+
@publicapi
2143+
def map_pick(
2144+
map_col: ColumnOrName, *keys: ColumnOrName, _emit_ast: bool = True
2145+
) -> Column:
2146+
"""
2147+
Returns a new map containing some of the key-value pairs from an existing map.
2148+
2149+
To identify the key-value pairs to include in the new map, pass in the keys as arguments.
2150+
If a specified key is not present in the input map, the key is ignored.
2151+
2152+
Args:
2153+
map_col (ColumnOrName): The map column to pick from
2154+
*keys (ColumnOrName): Additional keys to pick
2155+
2156+
Returns:
2157+
Column: A new map containing the selected key-value pairs
2158+
2159+
Examples:
2160+
>>> from snowflake.snowpark.functions import lit, to_variant, col
2161+
>>> df = session.sql("SELECT {'a':1,'b':2,'c':3}::MAP(VARCHAR,NUMBER) as map_col")
2162+
>>> df.select(to_variant(map_pick(df["map_col"], lit("a"), lit("b"))).alias("result")).collect()
2163+
[Row(RESULT='{\\n "a": 1,\\n "b": 2\\n}')]
2164+
2165+
# Examlpe sending an array of keys
2166+
>>> from snowflake.snowpark.functions import map_pick, to_variant, col
2167+
>>> df = session.sql("SELECT {'a':1,'b':2,'c':3}::MAP(VARCHAR,NUMBER) as map_col, ARRAY_CONSTRUCT('a','b') as keys_arr")
2168+
>>> df.select(to_variant(map_pick(col("map_col"), col("keys_arr"))).alias("RESULT")).collect()
2169+
[Row(RESULT='{\\n "a": 1,\\n "b": 2\\n}')]
2170+
"""
2171+
m = _to_col_if_str(map_col, "map_pick")
2172+
ks = [_to_col_if_str(k, "map_pick") for k in keys]
2173+
return builtin("map_pick", _emit_ast=_emit_ast)(m, *ks)
2174+
2175+
2176+
@publicapi
2177+
def map_size(map_col: ColumnOrName, _emit_ast: bool = True) -> Column:
2178+
"""
2179+
Returns the size of the input MAP. Returns None if the input column is not a MAP type.
2180+
2181+
Args:
2182+
map_col (ColumnOrName): The map values.
2183+
2184+
Returns:
2185+
Column: The size of the map.
2186+
2187+
Examples:
2188+
>>> from snowflake.snowpark.functions import col
2189+
>>> df = session.sql("SELECT {'a': 1, 'b': 2}::MAP(VARCHAR, NUMBER) as MAP_COL")
2190+
>>> df.select(map_size(col("MAP_COL")).alias("MAP_SIZE")).collect()
2191+
[Row(MAP_SIZE=2)]
2192+
"""
2193+
c = _to_col_if_str(map_col, "map_size")
2194+
return builtin("map_size", _emit_ast=_emit_ast)(c)

0 commit comments

Comments
 (0)