From d9bf2cbc022ef5f7fdc19868f58222a8396be39f Mon Sep 17 00:00:00 2001 From: Jamison Date: Thu, 5 Dec 2024 09:44:36 -0800 Subject: [PATCH 01/19] SNOW-1829870: Allow structured types to be enabled by default --- .../_internal/analyzer/datatype_mapper.py | 10 +++- .../snowpark/_internal/type_utils.py | 17 ++++-- src/snowflake/snowpark/types.py | 59 +++++++++++++++---- src/snowflake/snowpark/udaf.py | 8 ++- src/snowflake/snowpark/udtf.py | 4 ++ tests/integ/conftest.py | 10 ++++ tests/integ/test_column_names.py | 2 +- tests/integ/test_dataframe.py | 2 +- tests/integ/test_df_to_pandas.py | 2 +- tests/integ/test_function.py | 4 +- 10 files changed, 91 insertions(+), 27 deletions(-) diff --git a/src/snowflake/snowpark/_internal/analyzer/datatype_mapper.py b/src/snowflake/snowpark/_internal/analyzer/datatype_mapper.py index 21a33c99af..ea75178c7f 100644 --- a/src/snowflake/snowpark/_internal/analyzer/datatype_mapper.py +++ b/src/snowflake/snowpark/_internal/analyzer/datatype_mapper.py @@ -156,10 +156,16 @@ def to_sql(value: Any, datatype: DataType, from_values_statement: bool = False) return f"'{binascii.hexlify(bytes(value)).decode()}' :: BINARY" if isinstance(value, (list, tuple, array)) and isinstance(datatype, ArrayType): - return f"PARSE_JSON({str_to_sql(json.dumps(value, cls=PythonObjJSONEncoder))}) :: ARRAY" + type_str = "ARRAY" + if datatype.structured: + type_str = convert_sp_to_sf_type(datatype) + return f"PARSE_JSON({str_to_sql(json.dumps(value, cls=PythonObjJSONEncoder))}) :: {type_str}" if isinstance(value, dict) and isinstance(datatype, MapType): - return f"PARSE_JSON({str_to_sql(json.dumps(value, cls=PythonObjJSONEncoder))}) :: OBJECT" + type_str = "OBJECT" + if datatype.structured: + type_str = convert_sp_to_sf_type(datatype) + return f"PARSE_JSON({str_to_sql(json.dumps(value, cls=PythonObjJSONEncoder))}) :: {type_str}" if isinstance(datatype, VariantType): # PARSE_JSON returns VARIANT, so no need to append :: VARIANT here explicitly. diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index 55fe27c9f8..dc8dc7b17d 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -70,6 +70,7 @@ _FractionalType, _IntegralType, _NumericType, + STRUCTURED_TYPES_ENABLED, ) # Python 3.8 needs to use typing.Iterable because collections.abc.Iterable is not subscriptable @@ -183,12 +184,13 @@ def convert_sf_to_sp_type( max_string_size: int, ) -> DataType: """Convert the Snowflake logical type to the Snowpark type.""" + semi_structured_fill = None if STRUCTURED_TYPES_ENABLED else StringType() if column_type_name == "ARRAY": - return ArrayType(StringType()) + return ArrayType(semi_structured_fill) if column_type_name == "VARIANT": return VariantType() if column_type_name in {"OBJECT", "MAP"}: - return MapType(StringType(), StringType()) + return MapType(semi_structured_fill, semi_structured_fill) if column_type_name == "GEOGRAPHY": return GeographyType() if column_type_name == "GEOMETRY": @@ -530,7 +532,10 @@ def merge_type(a: DataType, b: DataType, name: Optional[str] = None) -> DataType return a -def python_value_str_to_object(value, tp: DataType) -> Any: +def python_value_str_to_object(value, tp: Optional[DataType]) -> Any: + if tp is None: + return None + if isinstance(tp, StringType): return value @@ -639,7 +644,7 @@ def python_type_to_snow_type( element_type = ( python_type_to_snow_type(tp_args[0], is_return_type_of_sproc)[0] if tp_args - else StringType() + else None ) return ArrayType(element_type), False @@ -649,12 +654,12 @@ def python_type_to_snow_type( key_type = ( python_type_to_snow_type(tp_args[0], is_return_type_of_sproc)[0] if tp_args - else StringType() + else None ) value_type = ( python_type_to_snow_type(tp_args[1], is_return_type_of_sproc)[0] if tp_args - else StringType() + else None ) return MapType(key_type, value_type), False diff --git a/src/snowflake/snowpark/types.py b/src/snowflake/snowpark/types.py index 06bcc8969b..06ea7fd4d5 100644 --- a/src/snowflake/snowpark/types.py +++ b/src/snowflake/snowpark/types.py @@ -31,6 +31,9 @@ from collections.abc import Iterable +STRUCTURED_TYPES_ENABLED = False + + class DataType: """The base class of Snowpark data types.""" @@ -333,10 +336,16 @@ class ArrayType(DataType): def __init__( self, element_type: Optional[DataType] = None, - structured: bool = False, + structured: Optional[bool] = None, ) -> None: - self.structured = structured - self.element_type = element_type if element_type else StringType() + if STRUCTURED_TYPES_ENABLED: + self.structured = ( + structured if structured is not None else element_type is not None + ) + self.element_type = element_type + else: + self.structured = structured or False + self.element_type = element_type if element_type else StringType() def __repr__(self) -> str: return f"ArrayType({repr(self.element_type) if self.element_type else ''})" @@ -379,14 +388,30 @@ def __init__( self, key_type: Optional[DataType] = None, value_type: Optional[DataType] = None, - structured: bool = False, + structured: Optional[bool] = None, ) -> None: - self.structured = structured - self.key_type = key_type if key_type else StringType() - self.value_type = value_type if value_type else StringType() + if STRUCTURED_TYPES_ENABLED: + if (key_type is None and value_type is not None) or ( + key_type is not None and value_type is None + ): + raise ValueError( + "Must either set both key_type and value_type or leave both unset." + ) + self.structured = ( + structured if structured is not None else key_type is not None + ) + self.key_type = key_type + self.value_type = value_type + else: + self.structured = structured or False + self.key_type = key_type if key_type else StringType() + self.value_type = value_type if value_type else StringType() def __repr__(self) -> str: - return f"MapType({repr(self.key_type) if self.key_type else ''}, {repr(self.value_type) if self.value_type else ''})" + type_str = "" + if self.key_type and self.value_type: + type_str = f"{repr(self.key_type)}, {repr(self.value_type)}" + return f"MapType({type_str})" def is_primitive(self): return False @@ -617,12 +642,20 @@ class StructType(DataType): """Represents a table schema or structured column. Contains :class:`StructField` for each field.""" def __init__( - self, fields: Optional[List["StructField"]] = None, structured=False + self, + fields: Optional[List["StructField"]] = None, + structured: Optional[bool] = False, ) -> None: - self.structured = structured - if fields is None: - fields = [] - self.fields = fields + if STRUCTURED_TYPES_ENABLED: + self.structured = ( + structured if structured is not None else fields is not None + ) + self.fields = fields or [] + else: + self.structured = structured or False + if fields is None: + fields = [] + self.fields = fields def add( self, diff --git a/src/snowflake/snowpark/udaf.py b/src/snowflake/snowpark/udaf.py index 4d5069e222..3b75a4fddf 100644 --- a/src/snowflake/snowpark/udaf.py +++ b/src/snowflake/snowpark/udaf.py @@ -6,6 +6,7 @@ """User-defined aggregate functions (UDAFs) in Snowpark. Refer to :class:`~snowflake.snowpark.udaf.UDAFRegistration` for details and sample code.""" import sys +import warnings from types import ModuleType from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union @@ -39,7 +40,7 @@ publicapi, ) from snowflake.snowpark.column import Column -from snowflake.snowpark.types import DataType +from snowflake.snowpark.types import DataType, MapType # Python 3.8 needs to use typing.Iterable because collections.abc.Iterable is not subscriptable # Python 3.9 can use both @@ -710,6 +711,11 @@ def _do_register_udaf( name, ) + if isinstance(return_type, MapType): + if return_type.structured: + warnings.warn("Snowflake does not support structured maps as return type for UDAFs. Downcasting to semi-structured object.") + return_type = MapType() + # Capture original parameters. if _emit_ast: stmt = self._session._ast_batch.assign() diff --git a/src/snowflake/snowpark/udtf.py b/src/snowflake/snowpark/udtf.py index 69908890c3..5b687696d6 100644 --- a/src/snowflake/snowpark/udtf.py +++ b/src/snowflake/snowpark/udtf.py @@ -969,6 +969,10 @@ def _do_register_udtf( output_schema=output_schema, ) + # Structured Struct is interpreted as Object by function registration + # Force unstructured to ensure Table return type. + output_schema.structured = False + # Capture original parameters. if _emit_ast: stmt = self._session._ast_batch.assign() diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index e173eb52b8..6b68a6f0de 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -11,6 +11,7 @@ from snowflake.snowpark import Session from snowflake.snowpark.exceptions import SnowparkSQLException from snowflake.snowpark.mock._connection import MockServerConnection +from snowflake.snowpark.types import STRUCTURED_TYPES_ENABLED from tests.ast.ast_test_utils import ( close_full_ast_validation_mode, setup_full_ast_validation_mode, @@ -244,6 +245,15 @@ def session( session._cte_optimization_enabled = cte_optimization_enabled session.ast_enabled = ast_enabled + if STRUCTURED_TYPES_ENABLED: + queries = [ + "alter session set ENABLE_STRUCTURED_TYPES_IN_CLIENT_RESPONSE=true", + "alter session set IGNORE_CLIENT_VESRION_IN_STRUCTURED_TYPES_RESPONSE=true", + "alter session set FORCE_ENABLE_STRUCTURED_TYPES_NATIVE_ARROW_FORMAT=true", + ] + for q in queries: + session.sql(q).collect() + if os.getenv("GITHUB_ACTIONS") == "true" and not local_testing_mode: set_up_external_access_integration_resources( session, rule1, rule2, key1, key2, integration1, integration2 diff --git a/tests/integ/test_column_names.py b/tests/integ/test_column_names.py index 724c35fede..1859831501 100644 --- a/tests/integ/test_column_names.py +++ b/tests/integ/test_column_names.py @@ -361,7 +361,7 @@ def test_literal(session, local_testing_mode): BooleanType(), # snowflake doesn't enforce the inner type of ArrayType, so it is expected that # it returns StringType() as inner type. - ArrayType(LongType()) if local_testing_mode else ArrayType(StringType()), + ArrayType(LongType()) if local_testing_mode else ArrayType(), ] verify_column_result( session, diff --git a/tests/integ/test_dataframe.py b/tests/integ/test_dataframe.py index 75d1754a76..7c91222181 100644 --- a/tests/integ/test_dataframe.py +++ b/tests/integ/test_dataframe.py @@ -2797,7 +2797,7 @@ def test_save_as_table_with_table_sproc_output(session, save_mode, table_type): lambda session_: session_.sql("SELECT 1 as A"), packages=["snowflake-snowpark-python"], name=temp_sp_name, - return_type=StructType([StructField("A", IntegerType())]), + return_type=StructType([StructField("A", IntegerType())], structured=False), input_types=[], replace=True, ) diff --git a/tests/integ/test_df_to_pandas.py b/tests/integ/test_df_to_pandas.py index c3e6f55cc7..a5e069af00 100644 --- a/tests/integ/test_df_to_pandas.py +++ b/tests/integ/test_df_to_pandas.py @@ -359,7 +359,7 @@ def test_df_to_pandas_df(session): StructField("n", TimestampType()), StructField("o", TimeType()), StructField("p", VariantType()), - StructField("q", MapType(StringType(), StringType())), + StructField("q", MapType()), ] ), ) diff --git a/tests/integ/test_function.py b/tests/integ/test_function.py index 2b22fe692d..597938de45 100644 --- a/tests/integ/test_function.py +++ b/tests/integ/test_function.py @@ -1290,9 +1290,9 @@ def test_to_date_to_array_to_variant_to_object(session, local_testing_mode): Utils.assert_rows(res1, expected) assert df1.schema.fields[0].datatype == DateType() - assert df1.schema.fields[1].datatype == ArrayType(StringType()) + assert df1.schema.fields[1].datatype == ArrayType() assert df1.schema.fields[2].datatype == VariantType() - assert df1.schema.fields[3].datatype == MapType(StringType(), StringType()) + assert df1.schema.fields[3].datatype == MapType() def test_to_binary(session): From ec43e1af34bcfcdf2a356b4bd2ef5dee580148f1 Mon Sep 17 00:00:00 2001 From: Jamison Date: Fri, 6 Dec 2024 13:57:08 -0800 Subject: [PATCH 02/19] type checking --- src/snowflake/snowpark/_internal/analyzer/datatype_mapper.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/snowflake/snowpark/_internal/analyzer/datatype_mapper.py b/src/snowflake/snowpark/_internal/analyzer/datatype_mapper.py index ea75178c7f..db18f91dc2 100644 --- a/src/snowflake/snowpark/_internal/analyzer/datatype_mapper.py +++ b/src/snowflake/snowpark/_internal/analyzer/datatype_mapper.py @@ -220,11 +220,14 @@ def schema_expression(data_type: DataType, is_nullable: bool) -> str: return "to_timestamp('2020-09-16 06:30:00')" if isinstance(data_type, ArrayType): if data_type.structured: + assert isinstance(data_type.element_type, DataType) element = schema_expression(data_type.element_type, is_nullable) return f"to_array({element}) :: {convert_sp_to_sf_type(data_type)}" return "to_array(0)" if isinstance(data_type, MapType): if data_type.structured: + assert isinstance(data_type.key_type, DataType) + assert isinstance(data_type.value_type, DataType) key = schema_expression(data_type.key_type, is_nullable) value = schema_expression(data_type.value_type, is_nullable) return f"object_construct_keep_null({key}, {value}) :: {convert_sp_to_sf_type(data_type)}" From 7f3a5fd51313a3e3fec4d89e3834a0fb23f7ac42 Mon Sep 17 00:00:00 2001 From: Jamison Date: Fri, 6 Dec 2024 14:22:56 -0800 Subject: [PATCH 03/19] lint --- src/snowflake/snowpark/udaf.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/snowflake/snowpark/udaf.py b/src/snowflake/snowpark/udaf.py index 3b75a4fddf..dceaeee5be 100644 --- a/src/snowflake/snowpark/udaf.py +++ b/src/snowflake/snowpark/udaf.py @@ -713,7 +713,10 @@ def _do_register_udaf( if isinstance(return_type, MapType): if return_type.structured: - warnings.warn("Snowflake does not support structured maps as return type for UDAFs. Downcasting to semi-structured object.") + warnings.warn( + "Snowflake does not support structured maps as return type for UDAFs. Downcasting to semi-structured object.", + stacklevel=3, + ) return_type = MapType() # Capture original parameters. From ed232de4ff0daea87ab8fc14d27218f8cc2a8a9f Mon Sep 17 00:00:00 2001 From: Jamison Date: Mon, 16 Dec 2024 10:47:54 -0800 Subject: [PATCH 04/19] Move flag to context --- src/snowflake/snowpark/_internal/type_utils.py | 6 ++++-- src/snowflake/snowpark/context.py | 4 ++++ src/snowflake/snowpark/types.py | 10 ++++------ src/snowflake/snowpark/udaf.py | 6 +++--- tests/integ/conftest.py | 10 ---------- 5 files changed, 15 insertions(+), 21 deletions(-) diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index dc8dc7b17d..918f0c7d64 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -30,6 +30,7 @@ get_origin, ) +import snowflake.snowpark.context as context import snowflake.snowpark.types # type: ignore from snowflake.connector.constants import FIELD_ID_TO_NAME from snowflake.connector.cursor import ResultMetadata @@ -70,7 +71,6 @@ _FractionalType, _IntegralType, _NumericType, - STRUCTURED_TYPES_ENABLED, ) # Python 3.8 needs to use typing.Iterable because collections.abc.Iterable is not subscriptable @@ -184,7 +184,9 @@ def convert_sf_to_sp_type( max_string_size: int, ) -> DataType: """Convert the Snowflake logical type to the Snowpark type.""" - semi_structured_fill = None if STRUCTURED_TYPES_ENABLED else StringType() + semi_structured_fill = ( + None if context._should_use_structured_type_semanticselse else StringType() + ) if column_type_name == "ARRAY": return ArrayType(semi_structured_fill) if column_type_name == "VARIANT": diff --git a/src/snowflake/snowpark/context.py b/src/snowflake/snowpark/context.py index c8f6888c5b..8bc86f928a 100644 --- a/src/snowflake/snowpark/context.py +++ b/src/snowflake/snowpark/context.py @@ -21,6 +21,10 @@ _should_continue_registration: Optional[Callable[..., bool]] = None +# Global flag that determines if structured type semantics should be used +_should_use_structured_type_semantics = False + + def get_active_session() -> "snowflake.snowpark.Session": """Returns the current active Snowpark session. diff --git a/src/snowflake/snowpark/types.py b/src/snowflake/snowpark/types.py index 06ea7fd4d5..2b79b5e33c 100644 --- a/src/snowflake/snowpark/types.py +++ b/src/snowflake/snowpark/types.py @@ -11,6 +11,7 @@ from enum import Enum from typing import Generic, List, Optional, Type, TypeVar, Union, Dict, Any +import snowflake.snowpark.context as context import snowflake.snowpark._internal.analyzer.expression as expression import snowflake.snowpark._internal.proto.generated.ast_pb2 as proto @@ -31,9 +32,6 @@ from collections.abc import Iterable -STRUCTURED_TYPES_ENABLED = False - - class DataType: """The base class of Snowpark data types.""" @@ -338,7 +336,7 @@ def __init__( element_type: Optional[DataType] = None, structured: Optional[bool] = None, ) -> None: - if STRUCTURED_TYPES_ENABLED: + if context._should_use_structured_type_semantics: self.structured = ( structured if structured is not None else element_type is not None ) @@ -390,7 +388,7 @@ def __init__( value_type: Optional[DataType] = None, structured: Optional[bool] = None, ) -> None: - if STRUCTURED_TYPES_ENABLED: + if context._should_use_structured_type_semantics: if (key_type is None and value_type is not None) or ( key_type is not None and value_type is None ): @@ -646,7 +644,7 @@ def __init__( fields: Optional[List["StructField"]] = None, structured: Optional[bool] = False, ) -> None: - if STRUCTURED_TYPES_ENABLED: + if context._should_use_structured_type_semantics: self.structured = ( structured if structured is not None else fields is not None ) diff --git a/src/snowflake/snowpark/udaf.py b/src/snowflake/snowpark/udaf.py index dceaeee5be..84d2e905ac 100644 --- a/src/snowflake/snowpark/udaf.py +++ b/src/snowflake/snowpark/udaf.py @@ -6,7 +6,6 @@ """User-defined aggregate functions (UDAFs) in Snowpark. Refer to :class:`~snowflake.snowpark.udaf.UDAFRegistration` for details and sample code.""" import sys -import warnings from types import ModuleType from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union @@ -38,6 +37,7 @@ TempObjectType, parse_positional_args_to_list, publicapi, + warning, ) from snowflake.snowpark.column import Column from snowflake.snowpark.types import DataType, MapType @@ -713,9 +713,9 @@ def _do_register_udaf( if isinstance(return_type, MapType): if return_type.structured: - warnings.warn( + warning( + "_do_register_udaf", "Snowflake does not support structured maps as return type for UDAFs. Downcasting to semi-structured object.", - stacklevel=3, ) return_type = MapType() diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index 6b68a6f0de..e173eb52b8 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -11,7 +11,6 @@ from snowflake.snowpark import Session from snowflake.snowpark.exceptions import SnowparkSQLException from snowflake.snowpark.mock._connection import MockServerConnection -from snowflake.snowpark.types import STRUCTURED_TYPES_ENABLED from tests.ast.ast_test_utils import ( close_full_ast_validation_mode, setup_full_ast_validation_mode, @@ -245,15 +244,6 @@ def session( session._cte_optimization_enabled = cte_optimization_enabled session.ast_enabled = ast_enabled - if STRUCTURED_TYPES_ENABLED: - queries = [ - "alter session set ENABLE_STRUCTURED_TYPES_IN_CLIENT_RESPONSE=true", - "alter session set IGNORE_CLIENT_VESRION_IN_STRUCTURED_TYPES_RESPONSE=true", - "alter session set FORCE_ENABLE_STRUCTURED_TYPES_NATIVE_ARROW_FORMAT=true", - ] - for q in queries: - session.sql(q).collect() - if os.getenv("GITHUB_ACTIONS") == "true" and not local_testing_mode: set_up_external_access_integration_resources( session, rule1, rule2, key1, key2, integration1, integration2 From 0dd7b91f1ee47d556fa5ba432c4f207c86eb0084 Mon Sep 17 00:00:00 2001 From: Jamison Date: Mon, 16 Dec 2024 10:51:48 -0800 Subject: [PATCH 05/19] typo --- src/snowflake/snowpark/_internal/type_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index 918f0c7d64..361277da6c 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -185,7 +185,7 @@ def convert_sf_to_sp_type( ) -> DataType: """Convert the Snowflake logical type to the Snowpark type.""" semi_structured_fill = ( - None if context._should_use_structured_type_semanticselse else StringType() + None if context._should_use_structured_type_semantics else StringType() ) if column_type_name == "ARRAY": return ArrayType(semi_structured_fill) From 13c142464026a2e7cfdf5b043729cd91da13db84 Mon Sep 17 00:00:00 2001 From: Varnika Budati Date: Mon, 16 Dec 2024 11:10:48 -0800 Subject: [PATCH 06/19] SNOW-1852779 Fix AST encoding for Column `in_`, `asc`, and `desc` (#2756) 1. Which Jira issue is this PR addressing? Make sure that there is an accompanying issue to your PR. Fixes SNOW-1852779 2. Fill out the following pre-review checklist: - [ ] I am adding a new automated test(s) to verify correctness of my new code - [ ] If this test skips Local Testing mode, I'm requesting review from @snowflakedb/local-testing - [ ] I am adding new logging messages - [ ] I am adding a new telemetry message - [ ] I am adding new credentials - [ ] I am adding a new dependency - [ ] If this is a new feature/behavior, I'm adding the Local Testing parity changes. - [x] I acknowledge that I have ensured my changes to be thread-safe. Follow the link for more information: [Thread-safe Developer Guidelines](https://github.com/snowflakedb/snowpark-python/blob/main/CONTRIBUTING.md#thread-safe-development) 3. Please describe how your code solves the related issue. Fixed AST encoding for Column `in_`, `asc`, and `desc`. Removed an unused entity and renamed `sp_column_in__seq` to `sp_column_in`. Changed the `nulls_first` optional boolean param to be an enum. --- .../snowpark/_internal/proto/ast.proto | 641 +++++++++--------- src/snowflake/snowpark/column.py | 14 +- tests/ast/data/col_asc.test | 10 +- tests/ast/data/col_desc.test | 10 +- tests/ast/data/col_in_.test | 6 +- tests/ast/data/windows.test | 3 + 6 files changed, 348 insertions(+), 336 deletions(-) diff --git a/src/snowflake/snowpark/_internal/proto/ast.proto b/src/snowflake/snowpark/_internal/proto/ast.proto index f8e612cf34..80bbf52c79 100644 --- a/src/snowflake/snowpark/_internal/proto/ast.proto +++ b/src/snowflake/snowpark/_internal/proto/ast.proto @@ -1,4 +1,4 @@ -// N.B. This file is generated by `ir-dsl-c`. DO NOT EDIT! +// N.B. This file is generated by `//Snowpark/ir-dsl-c`. DO NOT EDIT! // Generated from `{git@github.com:snowflakedb/snowflake.git}/Snowpark/ast`. syntax = "proto3"; @@ -283,6 +283,15 @@ message SpJoinType { } } +// sp-col-expr.ir:70 +message SpNullOrder { + oneof variant { + bool sp_null_order_default = 1; + bool sp_null_order_nulls_first = 2; + bool sp_null_order_nulls_last = 3; + } +} + // sp-type.ir:88 message SpPivotValue { oneof sealed_value { @@ -673,142 +682,141 @@ message Expr { SpColumnDesc sp_column_desc = 63; SpColumnEqualNan sp_column_equal_nan = 64; SpColumnEqualNull sp_column_equal_null = 65; - SpColumnIn_Dataframe sp_column_in__dataframe = 66; - SpColumnIn_Seq sp_column_in__seq = 67; - SpColumnIsNotNull sp_column_is_not_null = 68; - SpColumnIsNull sp_column_is_null = 69; - SpColumnOver sp_column_over = 70; - SpColumnRef sp_column_ref = 71; - SpColumnSqlExpr sp_column_sql_expr = 72; - SpColumnStringCollate sp_column_string_collate = 73; - SpColumnStringContains sp_column_string_contains = 74; - SpColumnStringEndsWith sp_column_string_ends_with = 75; - SpColumnStringLike sp_column_string_like = 76; - SpColumnStringRegexp sp_column_string_regexp = 77; - SpColumnStringStartsWith sp_column_string_starts_with = 78; - SpColumnStringSubstr sp_column_string_substr = 79; - SpColumnTryCast sp_column_try_cast = 80; - SpColumnWithinGroup sp_column_within_group = 81; - SpCreateDataframe sp_create_dataframe = 82; - SpDataframeAgg sp_dataframe_agg = 83; - SpDataframeAlias sp_dataframe_alias = 84; - SpDataframeAnalyticsComputeLag sp_dataframe_analytics_compute_lag = 85; - SpDataframeAnalyticsComputeLead sp_dataframe_analytics_compute_lead = 86; - SpDataframeAnalyticsCumulativeAgg sp_dataframe_analytics_cumulative_agg = 87; - SpDataframeAnalyticsMovingAgg sp_dataframe_analytics_moving_agg = 88; - SpDataframeAnalyticsTimeSeriesAgg sp_dataframe_analytics_time_series_agg = 89; - SpDataframeApply sp_dataframe_apply = 90; - SpDataframeCacheResult sp_dataframe_cache_result = 91; - SpDataframeCol sp_dataframe_col = 92; - SpDataframeCollect sp_dataframe_collect = 93; - SpDataframeCopyIntoTable sp_dataframe_copy_into_table = 94; - SpDataframeCount sp_dataframe_count = 95; - SpDataframeCreateOrReplaceDynamicTable sp_dataframe_create_or_replace_dynamic_table = 96; - SpDataframeCreateOrReplaceView sp_dataframe_create_or_replace_view = 97; - SpDataframeCrossJoin sp_dataframe_cross_join = 98; - SpDataframeCube sp_dataframe_cube = 99; - SpDataframeDescribe sp_dataframe_describe = 100; - SpDataframeDistinct sp_dataframe_distinct = 101; - SpDataframeDrop sp_dataframe_drop = 102; - SpDataframeDropDuplicates sp_dataframe_drop_duplicates = 103; - SpDataframeExcept sp_dataframe_except = 104; - SpDataframeFilter sp_dataframe_filter = 105; - SpDataframeFirst sp_dataframe_first = 106; - SpDataframeFlatten sp_dataframe_flatten = 107; - SpDataframeGroupBy sp_dataframe_group_by = 108; - SpDataframeGroupByGroupingSets sp_dataframe_group_by_grouping_sets = 109; - SpDataframeGroupBy_Columns sp_dataframe_group_by__columns = 110; - SpDataframeGroupBy_Strings sp_dataframe_group_by__strings = 111; - SpDataframeIntersect sp_dataframe_intersect = 112; - SpDataframeJoin sp_dataframe_join = 113; - SpDataframeJoinTableFunction sp_dataframe_join_table_function = 114; - SpDataframeJoin_Dataframe_JoinExprs sp_dataframe_join__dataframe__join_exprs = 115; - SpDataframeJoin_Dataframe_UsingColumns sp_dataframe_join__dataframe__using_columns = 116; - SpDataframeLimit sp_dataframe_limit = 117; - SpDataframeNaDrop_Python sp_dataframe_na_drop__python = 118; - SpDataframeNaDrop_Scala sp_dataframe_na_drop__scala = 119; - SpDataframeNaFill sp_dataframe_na_fill = 120; - SpDataframeNaReplace sp_dataframe_na_replace = 121; - SpDataframeNaturalJoin sp_dataframe_natural_join = 122; - SpDataframePivot sp_dataframe_pivot = 123; - SpDataframeRandomSplit sp_dataframe_random_split = 124; - SpDataframeRef sp_dataframe_ref = 125; - SpDataframeRename sp_dataframe_rename = 126; - SpDataframeRollup sp_dataframe_rollup = 127; - SpDataframeRollup_Columns sp_dataframe_rollup__columns = 128; - SpDataframeRollup_Strings sp_dataframe_rollup__strings = 129; - SpDataframeSample sp_dataframe_sample = 130; - SpDataframeSelect_Columns sp_dataframe_select__columns = 131; - SpDataframeSelect_Exprs sp_dataframe_select__exprs = 132; - SpDataframeShow sp_dataframe_show = 133; - SpDataframeSort sp_dataframe_sort = 134; - SpDataframeStatApproxQuantile sp_dataframe_stat_approx_quantile = 135; - SpDataframeStatCorr sp_dataframe_stat_corr = 136; - SpDataframeStatCov sp_dataframe_stat_cov = 137; - SpDataframeStatCrossTab sp_dataframe_stat_cross_tab = 138; - SpDataframeStatSampleBy sp_dataframe_stat_sample_by = 139; - SpDataframeToDf sp_dataframe_to_df = 140; - SpDataframeToLocalIterator sp_dataframe_to_local_iterator = 141; - SpDataframeToPandas sp_dataframe_to_pandas = 142; - SpDataframeToPandasBatches sp_dataframe_to_pandas_batches = 143; - SpDataframeUnion sp_dataframe_union = 144; - SpDataframeUnionAll sp_dataframe_union_all = 145; - SpDataframeUnionAllByName sp_dataframe_union_all_by_name = 146; - SpDataframeUnionByName sp_dataframe_union_by_name = 147; - SpDataframeUnpivot sp_dataframe_unpivot = 148; - SpDataframeWhere sp_dataframe_where = 149; - SpDataframeWithColumn sp_dataframe_with_column = 150; - SpDataframeWithColumnRenamed sp_dataframe_with_column_renamed = 151; - SpDataframeWithColumns sp_dataframe_with_columns = 152; - SpDataframeWrite sp_dataframe_write = 153; - SpDatatypeVal sp_datatype_val = 154; - SpFlatten sp_flatten = 155; - SpFnRef sp_fn_ref = 156; - SpGenerator sp_generator = 157; - SpGroupingSets sp_grouping_sets = 158; - SpMergeDeleteWhenMatchedClause sp_merge_delete_when_matched_clause = 159; - SpMergeInsertWhenNotMatchedClause sp_merge_insert_when_not_matched_clause = 160; - SpMergeUpdateWhenMatchedClause sp_merge_update_when_matched_clause = 161; - SpRange sp_range = 162; - SpReadAvro sp_read_avro = 163; - SpReadCsv sp_read_csv = 164; - SpReadJson sp_read_json = 165; - SpReadOrc sp_read_orc = 166; - SpReadParquet sp_read_parquet = 167; - SpReadTable sp_read_table = 168; - SpReadXml sp_read_xml = 169; - SpRelationalGroupedDataframeAgg sp_relational_grouped_dataframe_agg = 170; - SpRelationalGroupedDataframeApplyInPandas sp_relational_grouped_dataframe_apply_in_pandas = 171; - SpRelationalGroupedDataframeBuiltin sp_relational_grouped_dataframe_builtin = 172; - SpRelationalGroupedDataframePivot sp_relational_grouped_dataframe_pivot = 173; - SpRelationalGroupedDataframeRef sp_relational_grouped_dataframe_ref = 174; - SpRow sp_row = 175; - SpSessionTableFunction sp_session_table_function = 176; - SpSql sp_sql = 177; - SpTable sp_table = 178; - SpTableDelete sp_table_delete = 179; - SpTableDropTable sp_table_drop_table = 180; - SpTableFnCallAlias sp_table_fn_call_alias = 181; - SpTableFnCallOver sp_table_fn_call_over = 182; - SpTableMerge sp_table_merge = 183; - SpTableSample sp_table_sample = 184; - SpTableUpdate sp_table_update = 185; - SpToSnowparkPandas sp_to_snowpark_pandas = 186; - SpWriteCopyIntoLocation sp_write_copy_into_location = 187; - SpWriteCsv sp_write_csv = 188; - SpWriteJson sp_write_json = 189; - SpWritePandas sp_write_pandas = 190; - SpWriteParquet sp_write_parquet = 191; - SpWriteTable sp_write_table = 192; - StoredProcedure stored_procedure = 193; - StringVal string_val = 194; - Sub sub = 195; - TimeVal time_val = 196; - TimestampVal timestamp_val = 197; - TupleVal tuple_val = 198; - Udaf udaf = 199; - Udf udf = 200; - Udtf udtf = 201; + SpColumnIn sp_column_in = 66; + SpColumnIsNotNull sp_column_is_not_null = 67; + SpColumnIsNull sp_column_is_null = 68; + SpColumnOver sp_column_over = 69; + SpColumnRef sp_column_ref = 70; + SpColumnSqlExpr sp_column_sql_expr = 71; + SpColumnStringCollate sp_column_string_collate = 72; + SpColumnStringContains sp_column_string_contains = 73; + SpColumnStringEndsWith sp_column_string_ends_with = 74; + SpColumnStringLike sp_column_string_like = 75; + SpColumnStringRegexp sp_column_string_regexp = 76; + SpColumnStringStartsWith sp_column_string_starts_with = 77; + SpColumnStringSubstr sp_column_string_substr = 78; + SpColumnTryCast sp_column_try_cast = 79; + SpColumnWithinGroup sp_column_within_group = 80; + SpCreateDataframe sp_create_dataframe = 81; + SpDataframeAgg sp_dataframe_agg = 82; + SpDataframeAlias sp_dataframe_alias = 83; + SpDataframeAnalyticsComputeLag sp_dataframe_analytics_compute_lag = 84; + SpDataframeAnalyticsComputeLead sp_dataframe_analytics_compute_lead = 85; + SpDataframeAnalyticsCumulativeAgg sp_dataframe_analytics_cumulative_agg = 86; + SpDataframeAnalyticsMovingAgg sp_dataframe_analytics_moving_agg = 87; + SpDataframeAnalyticsTimeSeriesAgg sp_dataframe_analytics_time_series_agg = 88; + SpDataframeApply sp_dataframe_apply = 89; + SpDataframeCacheResult sp_dataframe_cache_result = 90; + SpDataframeCol sp_dataframe_col = 91; + SpDataframeCollect sp_dataframe_collect = 92; + SpDataframeCopyIntoTable sp_dataframe_copy_into_table = 93; + SpDataframeCount sp_dataframe_count = 94; + SpDataframeCreateOrReplaceDynamicTable sp_dataframe_create_or_replace_dynamic_table = 95; + SpDataframeCreateOrReplaceView sp_dataframe_create_or_replace_view = 96; + SpDataframeCrossJoin sp_dataframe_cross_join = 97; + SpDataframeCube sp_dataframe_cube = 98; + SpDataframeDescribe sp_dataframe_describe = 99; + SpDataframeDistinct sp_dataframe_distinct = 100; + SpDataframeDrop sp_dataframe_drop = 101; + SpDataframeDropDuplicates sp_dataframe_drop_duplicates = 102; + SpDataframeExcept sp_dataframe_except = 103; + SpDataframeFilter sp_dataframe_filter = 104; + SpDataframeFirst sp_dataframe_first = 105; + SpDataframeFlatten sp_dataframe_flatten = 106; + SpDataframeGroupBy sp_dataframe_group_by = 107; + SpDataframeGroupByGroupingSets sp_dataframe_group_by_grouping_sets = 108; + SpDataframeGroupBy_Columns sp_dataframe_group_by__columns = 109; + SpDataframeGroupBy_Strings sp_dataframe_group_by__strings = 110; + SpDataframeIntersect sp_dataframe_intersect = 111; + SpDataframeJoin sp_dataframe_join = 112; + SpDataframeJoinTableFunction sp_dataframe_join_table_function = 113; + SpDataframeJoin_Dataframe_JoinExprs sp_dataframe_join__dataframe__join_exprs = 114; + SpDataframeJoin_Dataframe_UsingColumns sp_dataframe_join__dataframe__using_columns = 115; + SpDataframeLimit sp_dataframe_limit = 116; + SpDataframeNaDrop_Python sp_dataframe_na_drop__python = 117; + SpDataframeNaDrop_Scala sp_dataframe_na_drop__scala = 118; + SpDataframeNaFill sp_dataframe_na_fill = 119; + SpDataframeNaReplace sp_dataframe_na_replace = 120; + SpDataframeNaturalJoin sp_dataframe_natural_join = 121; + SpDataframePivot sp_dataframe_pivot = 122; + SpDataframeRandomSplit sp_dataframe_random_split = 123; + SpDataframeRef sp_dataframe_ref = 124; + SpDataframeRename sp_dataframe_rename = 125; + SpDataframeRollup sp_dataframe_rollup = 126; + SpDataframeRollup_Columns sp_dataframe_rollup__columns = 127; + SpDataframeRollup_Strings sp_dataframe_rollup__strings = 128; + SpDataframeSample sp_dataframe_sample = 129; + SpDataframeSelect_Columns sp_dataframe_select__columns = 130; + SpDataframeSelect_Exprs sp_dataframe_select__exprs = 131; + SpDataframeShow sp_dataframe_show = 132; + SpDataframeSort sp_dataframe_sort = 133; + SpDataframeStatApproxQuantile sp_dataframe_stat_approx_quantile = 134; + SpDataframeStatCorr sp_dataframe_stat_corr = 135; + SpDataframeStatCov sp_dataframe_stat_cov = 136; + SpDataframeStatCrossTab sp_dataframe_stat_cross_tab = 137; + SpDataframeStatSampleBy sp_dataframe_stat_sample_by = 138; + SpDataframeToDf sp_dataframe_to_df = 139; + SpDataframeToLocalIterator sp_dataframe_to_local_iterator = 140; + SpDataframeToPandas sp_dataframe_to_pandas = 141; + SpDataframeToPandasBatches sp_dataframe_to_pandas_batches = 142; + SpDataframeUnion sp_dataframe_union = 143; + SpDataframeUnionAll sp_dataframe_union_all = 144; + SpDataframeUnionAllByName sp_dataframe_union_all_by_name = 145; + SpDataframeUnionByName sp_dataframe_union_by_name = 146; + SpDataframeUnpivot sp_dataframe_unpivot = 147; + SpDataframeWhere sp_dataframe_where = 148; + SpDataframeWithColumn sp_dataframe_with_column = 149; + SpDataframeWithColumnRenamed sp_dataframe_with_column_renamed = 150; + SpDataframeWithColumns sp_dataframe_with_columns = 151; + SpDataframeWrite sp_dataframe_write = 152; + SpDatatypeVal sp_datatype_val = 153; + SpFlatten sp_flatten = 154; + SpFnRef sp_fn_ref = 155; + SpGenerator sp_generator = 156; + SpGroupingSets sp_grouping_sets = 157; + SpMergeDeleteWhenMatchedClause sp_merge_delete_when_matched_clause = 158; + SpMergeInsertWhenNotMatchedClause sp_merge_insert_when_not_matched_clause = 159; + SpMergeUpdateWhenMatchedClause sp_merge_update_when_matched_clause = 160; + SpRange sp_range = 161; + SpReadAvro sp_read_avro = 162; + SpReadCsv sp_read_csv = 163; + SpReadJson sp_read_json = 164; + SpReadOrc sp_read_orc = 165; + SpReadParquet sp_read_parquet = 166; + SpReadTable sp_read_table = 167; + SpReadXml sp_read_xml = 168; + SpRelationalGroupedDataframeAgg sp_relational_grouped_dataframe_agg = 169; + SpRelationalGroupedDataframeApplyInPandas sp_relational_grouped_dataframe_apply_in_pandas = 170; + SpRelationalGroupedDataframeBuiltin sp_relational_grouped_dataframe_builtin = 171; + SpRelationalGroupedDataframePivot sp_relational_grouped_dataframe_pivot = 172; + SpRelationalGroupedDataframeRef sp_relational_grouped_dataframe_ref = 173; + SpRow sp_row = 174; + SpSessionTableFunction sp_session_table_function = 175; + SpSql sp_sql = 176; + SpTable sp_table = 177; + SpTableDelete sp_table_delete = 178; + SpTableDropTable sp_table_drop_table = 179; + SpTableFnCallAlias sp_table_fn_call_alias = 180; + SpTableFnCallOver sp_table_fn_call_over = 181; + SpTableMerge sp_table_merge = 182; + SpTableSample sp_table_sample = 183; + SpTableUpdate sp_table_update = 184; + SpToSnowparkPandas sp_to_snowpark_pandas = 185; + SpWriteCopyIntoLocation sp_write_copy_into_location = 186; + SpWriteCsv sp_write_csv = 187; + SpWriteJson sp_write_json = 188; + SpWritePandas sp_write_pandas = 189; + SpWriteParquet sp_write_parquet = 190; + SpWriteTable sp_write_table = 191; + StoredProcedure stored_procedure = 192; + StringVal string_val = 193; + Sub sub = 194; + TimeVal time_val = 195; + TimestampVal timestamp_val = 196; + TupleVal tuple_val = 197; + Udaf udaf = 198; + Udf udf = 199; + Udtf udtf = 200; } } @@ -951,152 +959,151 @@ message HasSrcPosition { SpColumnDesc sp_column_desc = 67; SpColumnEqualNan sp_column_equal_nan = 68; SpColumnEqualNull sp_column_equal_null = 69; - SpColumnIn_Dataframe sp_column_in__dataframe = 70; - SpColumnIn_Seq sp_column_in__seq = 71; - SpColumnIsNotNull sp_column_is_not_null = 72; - SpColumnIsNull sp_column_is_null = 73; - SpColumnOver sp_column_over = 74; - SpColumnRef sp_column_ref = 75; - SpColumnSqlExpr sp_column_sql_expr = 76; - SpColumnStringCollate sp_column_string_collate = 77; - SpColumnStringContains sp_column_string_contains = 78; - SpColumnStringEndsWith sp_column_string_ends_with = 79; - SpColumnStringLike sp_column_string_like = 80; - SpColumnStringRegexp sp_column_string_regexp = 81; - SpColumnStringStartsWith sp_column_string_starts_with = 82; - SpColumnStringSubstr sp_column_string_substr = 83; - SpColumnTryCast sp_column_try_cast = 84; - SpColumnWithinGroup sp_column_within_group = 85; - SpCreateDataframe sp_create_dataframe = 86; - SpDataframeAgg sp_dataframe_agg = 87; - SpDataframeAlias sp_dataframe_alias = 88; - SpDataframeAnalyticsComputeLag sp_dataframe_analytics_compute_lag = 89; - SpDataframeAnalyticsComputeLead sp_dataframe_analytics_compute_lead = 90; - SpDataframeAnalyticsCumulativeAgg sp_dataframe_analytics_cumulative_agg = 91; - SpDataframeAnalyticsMovingAgg sp_dataframe_analytics_moving_agg = 92; - SpDataframeAnalyticsTimeSeriesAgg sp_dataframe_analytics_time_series_agg = 93; - SpDataframeApply sp_dataframe_apply = 94; - SpDataframeCacheResult sp_dataframe_cache_result = 95; - SpDataframeCol sp_dataframe_col = 96; - SpDataframeCollect sp_dataframe_collect = 97; - SpDataframeCopyIntoTable sp_dataframe_copy_into_table = 98; - SpDataframeCount sp_dataframe_count = 99; - SpDataframeCreateOrReplaceDynamicTable sp_dataframe_create_or_replace_dynamic_table = 100; - SpDataframeCreateOrReplaceView sp_dataframe_create_or_replace_view = 101; - SpDataframeCrossJoin sp_dataframe_cross_join = 102; - SpDataframeCube sp_dataframe_cube = 103; - SpDataframeDescribe sp_dataframe_describe = 104; - SpDataframeDistinct sp_dataframe_distinct = 105; - SpDataframeDrop sp_dataframe_drop = 106; - SpDataframeDropDuplicates sp_dataframe_drop_duplicates = 107; - SpDataframeExcept sp_dataframe_except = 108; - SpDataframeFilter sp_dataframe_filter = 109; - SpDataframeFirst sp_dataframe_first = 110; - SpDataframeFlatten sp_dataframe_flatten = 111; - SpDataframeGroupBy sp_dataframe_group_by = 112; - SpDataframeGroupByGroupingSets sp_dataframe_group_by_grouping_sets = 113; - SpDataframeGroupBy_Columns sp_dataframe_group_by__columns = 114; - SpDataframeGroupBy_Strings sp_dataframe_group_by__strings = 115; - SpDataframeIntersect sp_dataframe_intersect = 116; - SpDataframeJoin sp_dataframe_join = 117; - SpDataframeJoinTableFunction sp_dataframe_join_table_function = 118; - SpDataframeJoin_Dataframe_JoinExprs sp_dataframe_join__dataframe__join_exprs = 119; - SpDataframeJoin_Dataframe_UsingColumns sp_dataframe_join__dataframe__using_columns = 120; - SpDataframeLimit sp_dataframe_limit = 121; - SpDataframeNaDrop_Python sp_dataframe_na_drop__python = 122; - SpDataframeNaDrop_Scala sp_dataframe_na_drop__scala = 123; - SpDataframeNaFill sp_dataframe_na_fill = 124; - SpDataframeNaReplace sp_dataframe_na_replace = 125; - SpDataframeNaturalJoin sp_dataframe_natural_join = 126; - SpDataframePivot sp_dataframe_pivot = 127; - SpDataframeRandomSplit sp_dataframe_random_split = 128; - SpDataframeReaderInit sp_dataframe_reader_init = 129; - SpDataframeReaderOption sp_dataframe_reader_option = 130; - SpDataframeReaderOptions sp_dataframe_reader_options = 131; - SpDataframeReaderSchema sp_dataframe_reader_schema = 132; - SpDataframeReaderWithMetadata sp_dataframe_reader_with_metadata = 133; - SpDataframeRef sp_dataframe_ref = 134; - SpDataframeRename sp_dataframe_rename = 135; - SpDataframeRollup sp_dataframe_rollup = 136; - SpDataframeRollup_Columns sp_dataframe_rollup__columns = 137; - SpDataframeRollup_Strings sp_dataframe_rollup__strings = 138; - SpDataframeSample sp_dataframe_sample = 139; - SpDataframeSelect_Columns sp_dataframe_select__columns = 140; - SpDataframeSelect_Exprs sp_dataframe_select__exprs = 141; - SpDataframeShow sp_dataframe_show = 142; - SpDataframeSort sp_dataframe_sort = 143; - SpDataframeStatApproxQuantile sp_dataframe_stat_approx_quantile = 144; - SpDataframeStatCorr sp_dataframe_stat_corr = 145; - SpDataframeStatCov sp_dataframe_stat_cov = 146; - SpDataframeStatCrossTab sp_dataframe_stat_cross_tab = 147; - SpDataframeStatSampleBy sp_dataframe_stat_sample_by = 148; - SpDataframeToDf sp_dataframe_to_df = 149; - SpDataframeToLocalIterator sp_dataframe_to_local_iterator = 150; - SpDataframeToPandas sp_dataframe_to_pandas = 151; - SpDataframeToPandasBatches sp_dataframe_to_pandas_batches = 152; - SpDataframeUnion sp_dataframe_union = 153; - SpDataframeUnionAll sp_dataframe_union_all = 154; - SpDataframeUnionAllByName sp_dataframe_union_all_by_name = 155; - SpDataframeUnionByName sp_dataframe_union_by_name = 156; - SpDataframeUnpivot sp_dataframe_unpivot = 157; - SpDataframeWhere sp_dataframe_where = 158; - SpDataframeWithColumn sp_dataframe_with_column = 159; - SpDataframeWithColumnRenamed sp_dataframe_with_column_renamed = 160; - SpDataframeWithColumns sp_dataframe_with_columns = 161; - SpDataframeWrite sp_dataframe_write = 162; - SpDatatypeVal sp_datatype_val = 163; - SpFlatten sp_flatten = 164; - SpFnRef sp_fn_ref = 165; - SpGenerator sp_generator = 166; - SpGroupingSets sp_grouping_sets = 167; - SpMergeDeleteWhenMatchedClause sp_merge_delete_when_matched_clause = 168; - SpMergeInsertWhenNotMatchedClause sp_merge_insert_when_not_matched_clause = 169; - SpMergeUpdateWhenMatchedClause sp_merge_update_when_matched_clause = 170; - SpRange sp_range = 171; - SpReadAvro sp_read_avro = 172; - SpReadCsv sp_read_csv = 173; - SpReadJson sp_read_json = 174; - SpReadOrc sp_read_orc = 175; - SpReadParquet sp_read_parquet = 176; - SpReadTable sp_read_table = 177; - SpReadXml sp_read_xml = 178; - SpRelationalGroupedDataframeAgg sp_relational_grouped_dataframe_agg = 179; - SpRelationalGroupedDataframeApplyInPandas sp_relational_grouped_dataframe_apply_in_pandas = 180; - SpRelationalGroupedDataframeBuiltin sp_relational_grouped_dataframe_builtin = 181; - SpRelationalGroupedDataframePivot sp_relational_grouped_dataframe_pivot = 182; - SpRelationalGroupedDataframeRef sp_relational_grouped_dataframe_ref = 183; - SpRow sp_row = 184; - SpSessionTableFunction sp_session_table_function = 185; - SpSql sp_sql = 186; - SpTable sp_table = 187; - SpTableDelete sp_table_delete = 188; - SpTableDropTable sp_table_drop_table = 189; - SpTableFnCallAlias sp_table_fn_call_alias = 190; - SpTableFnCallOver sp_table_fn_call_over = 191; - SpTableMerge sp_table_merge = 192; - SpTableSample sp_table_sample = 193; - SpTableUpdate sp_table_update = 194; - SpToSnowparkPandas sp_to_snowpark_pandas = 195; - SpWindowSpecEmpty sp_window_spec_empty = 196; - SpWindowSpecOrderBy sp_window_spec_order_by = 197; - SpWindowSpecPartitionBy sp_window_spec_partition_by = 198; - SpWindowSpecRangeBetween sp_window_spec_range_between = 199; - SpWindowSpecRowsBetween sp_window_spec_rows_between = 200; - SpWriteCopyIntoLocation sp_write_copy_into_location = 201; - SpWriteCsv sp_write_csv = 202; - SpWriteJson sp_write_json = 203; - SpWritePandas sp_write_pandas = 204; - SpWriteParquet sp_write_parquet = 205; - SpWriteTable sp_write_table = 206; - StoredProcedure stored_procedure = 207; - StringVal string_val = 208; - Sub sub = 209; - TimeVal time_val = 210; - TimestampVal timestamp_val = 211; - TupleVal tuple_val = 212; - Udaf udaf = 213; - Udf udf = 214; - Udtf udtf = 215; + SpColumnIn sp_column_in = 70; + SpColumnIsNotNull sp_column_is_not_null = 71; + SpColumnIsNull sp_column_is_null = 72; + SpColumnOver sp_column_over = 73; + SpColumnRef sp_column_ref = 74; + SpColumnSqlExpr sp_column_sql_expr = 75; + SpColumnStringCollate sp_column_string_collate = 76; + SpColumnStringContains sp_column_string_contains = 77; + SpColumnStringEndsWith sp_column_string_ends_with = 78; + SpColumnStringLike sp_column_string_like = 79; + SpColumnStringRegexp sp_column_string_regexp = 80; + SpColumnStringStartsWith sp_column_string_starts_with = 81; + SpColumnStringSubstr sp_column_string_substr = 82; + SpColumnTryCast sp_column_try_cast = 83; + SpColumnWithinGroup sp_column_within_group = 84; + SpCreateDataframe sp_create_dataframe = 85; + SpDataframeAgg sp_dataframe_agg = 86; + SpDataframeAlias sp_dataframe_alias = 87; + SpDataframeAnalyticsComputeLag sp_dataframe_analytics_compute_lag = 88; + SpDataframeAnalyticsComputeLead sp_dataframe_analytics_compute_lead = 89; + SpDataframeAnalyticsCumulativeAgg sp_dataframe_analytics_cumulative_agg = 90; + SpDataframeAnalyticsMovingAgg sp_dataframe_analytics_moving_agg = 91; + SpDataframeAnalyticsTimeSeriesAgg sp_dataframe_analytics_time_series_agg = 92; + SpDataframeApply sp_dataframe_apply = 93; + SpDataframeCacheResult sp_dataframe_cache_result = 94; + SpDataframeCol sp_dataframe_col = 95; + SpDataframeCollect sp_dataframe_collect = 96; + SpDataframeCopyIntoTable sp_dataframe_copy_into_table = 97; + SpDataframeCount sp_dataframe_count = 98; + SpDataframeCreateOrReplaceDynamicTable sp_dataframe_create_or_replace_dynamic_table = 99; + SpDataframeCreateOrReplaceView sp_dataframe_create_or_replace_view = 100; + SpDataframeCrossJoin sp_dataframe_cross_join = 101; + SpDataframeCube sp_dataframe_cube = 102; + SpDataframeDescribe sp_dataframe_describe = 103; + SpDataframeDistinct sp_dataframe_distinct = 104; + SpDataframeDrop sp_dataframe_drop = 105; + SpDataframeDropDuplicates sp_dataframe_drop_duplicates = 106; + SpDataframeExcept sp_dataframe_except = 107; + SpDataframeFilter sp_dataframe_filter = 108; + SpDataframeFirst sp_dataframe_first = 109; + SpDataframeFlatten sp_dataframe_flatten = 110; + SpDataframeGroupBy sp_dataframe_group_by = 111; + SpDataframeGroupByGroupingSets sp_dataframe_group_by_grouping_sets = 112; + SpDataframeGroupBy_Columns sp_dataframe_group_by__columns = 113; + SpDataframeGroupBy_Strings sp_dataframe_group_by__strings = 114; + SpDataframeIntersect sp_dataframe_intersect = 115; + SpDataframeJoin sp_dataframe_join = 116; + SpDataframeJoinTableFunction sp_dataframe_join_table_function = 117; + SpDataframeJoin_Dataframe_JoinExprs sp_dataframe_join__dataframe__join_exprs = 118; + SpDataframeJoin_Dataframe_UsingColumns sp_dataframe_join__dataframe__using_columns = 119; + SpDataframeLimit sp_dataframe_limit = 120; + SpDataframeNaDrop_Python sp_dataframe_na_drop__python = 121; + SpDataframeNaDrop_Scala sp_dataframe_na_drop__scala = 122; + SpDataframeNaFill sp_dataframe_na_fill = 123; + SpDataframeNaReplace sp_dataframe_na_replace = 124; + SpDataframeNaturalJoin sp_dataframe_natural_join = 125; + SpDataframePivot sp_dataframe_pivot = 126; + SpDataframeRandomSplit sp_dataframe_random_split = 127; + SpDataframeReaderInit sp_dataframe_reader_init = 128; + SpDataframeReaderOption sp_dataframe_reader_option = 129; + SpDataframeReaderOptions sp_dataframe_reader_options = 130; + SpDataframeReaderSchema sp_dataframe_reader_schema = 131; + SpDataframeReaderWithMetadata sp_dataframe_reader_with_metadata = 132; + SpDataframeRef sp_dataframe_ref = 133; + SpDataframeRename sp_dataframe_rename = 134; + SpDataframeRollup sp_dataframe_rollup = 135; + SpDataframeRollup_Columns sp_dataframe_rollup__columns = 136; + SpDataframeRollup_Strings sp_dataframe_rollup__strings = 137; + SpDataframeSample sp_dataframe_sample = 138; + SpDataframeSelect_Columns sp_dataframe_select__columns = 139; + SpDataframeSelect_Exprs sp_dataframe_select__exprs = 140; + SpDataframeShow sp_dataframe_show = 141; + SpDataframeSort sp_dataframe_sort = 142; + SpDataframeStatApproxQuantile sp_dataframe_stat_approx_quantile = 143; + SpDataframeStatCorr sp_dataframe_stat_corr = 144; + SpDataframeStatCov sp_dataframe_stat_cov = 145; + SpDataframeStatCrossTab sp_dataframe_stat_cross_tab = 146; + SpDataframeStatSampleBy sp_dataframe_stat_sample_by = 147; + SpDataframeToDf sp_dataframe_to_df = 148; + SpDataframeToLocalIterator sp_dataframe_to_local_iterator = 149; + SpDataframeToPandas sp_dataframe_to_pandas = 150; + SpDataframeToPandasBatches sp_dataframe_to_pandas_batches = 151; + SpDataframeUnion sp_dataframe_union = 152; + SpDataframeUnionAll sp_dataframe_union_all = 153; + SpDataframeUnionAllByName sp_dataframe_union_all_by_name = 154; + SpDataframeUnionByName sp_dataframe_union_by_name = 155; + SpDataframeUnpivot sp_dataframe_unpivot = 156; + SpDataframeWhere sp_dataframe_where = 157; + SpDataframeWithColumn sp_dataframe_with_column = 158; + SpDataframeWithColumnRenamed sp_dataframe_with_column_renamed = 159; + SpDataframeWithColumns sp_dataframe_with_columns = 160; + SpDataframeWrite sp_dataframe_write = 161; + SpDatatypeVal sp_datatype_val = 162; + SpFlatten sp_flatten = 163; + SpFnRef sp_fn_ref = 164; + SpGenerator sp_generator = 165; + SpGroupingSets sp_grouping_sets = 166; + SpMergeDeleteWhenMatchedClause sp_merge_delete_when_matched_clause = 167; + SpMergeInsertWhenNotMatchedClause sp_merge_insert_when_not_matched_clause = 168; + SpMergeUpdateWhenMatchedClause sp_merge_update_when_matched_clause = 169; + SpRange sp_range = 170; + SpReadAvro sp_read_avro = 171; + SpReadCsv sp_read_csv = 172; + SpReadJson sp_read_json = 173; + SpReadOrc sp_read_orc = 174; + SpReadParquet sp_read_parquet = 175; + SpReadTable sp_read_table = 176; + SpReadXml sp_read_xml = 177; + SpRelationalGroupedDataframeAgg sp_relational_grouped_dataframe_agg = 178; + SpRelationalGroupedDataframeApplyInPandas sp_relational_grouped_dataframe_apply_in_pandas = 179; + SpRelationalGroupedDataframeBuiltin sp_relational_grouped_dataframe_builtin = 180; + SpRelationalGroupedDataframePivot sp_relational_grouped_dataframe_pivot = 181; + SpRelationalGroupedDataframeRef sp_relational_grouped_dataframe_ref = 182; + SpRow sp_row = 183; + SpSessionTableFunction sp_session_table_function = 184; + SpSql sp_sql = 185; + SpTable sp_table = 186; + SpTableDelete sp_table_delete = 187; + SpTableDropTable sp_table_drop_table = 188; + SpTableFnCallAlias sp_table_fn_call_alias = 189; + SpTableFnCallOver sp_table_fn_call_over = 190; + SpTableMerge sp_table_merge = 191; + SpTableSample sp_table_sample = 192; + SpTableUpdate sp_table_update = 193; + SpToSnowparkPandas sp_to_snowpark_pandas = 194; + SpWindowSpecEmpty sp_window_spec_empty = 195; + SpWindowSpecOrderBy sp_window_spec_order_by = 196; + SpWindowSpecPartitionBy sp_window_spec_partition_by = 197; + SpWindowSpecRangeBetween sp_window_spec_range_between = 198; + SpWindowSpecRowsBetween sp_window_spec_rows_between = 199; + SpWriteCopyIntoLocation sp_write_copy_into_location = 200; + SpWriteCsv sp_write_csv = 201; + SpWriteJson sp_write_json = 202; + SpWritePandas sp_write_pandas = 203; + SpWriteParquet sp_write_parquet = 204; + SpWriteTable sp_write_table = 205; + StoredProcedure stored_procedure = 206; + StringVal string_val = 207; + Sub sub = 208; + TimeVal time_val = 209; + TimestampVal timestamp_val = 210; + TupleVal tuple_val = 211; + Udaf udaf = 212; + Udf udf = 213; + Udtf udtf = 214; } } @@ -1326,7 +1333,7 @@ message SpColumnApply_String { // sp-col-expr.ir:49 message SpColumnAsc { Expr col = 1; - google.protobuf.BoolValue nulls_first = 2; + SpNullOrder null_order = 2; SrcPosition src = 3; } @@ -1354,17 +1361,17 @@ message SpColumnCast { // sp-col-expr.ir:66 message SpColumnDesc { Expr col = 1; - google.protobuf.BoolValue nulls_first = 2; + SpNullOrder null_order = 2; SrcPosition src = 3; } -// sp-col-expr.ir:70 +// sp-col-expr.ir:72 message SpColumnEqualNan { Expr col = 1; SrcPosition src = 2; } -// sp-col-expr.ir:72 +// sp-col-expr.ir:74 message SpColumnEqualNull { Expr lhs = 1; Expr rhs = 2; @@ -1392,50 +1399,42 @@ message SpColumnFn { SpColumnCast sp_column_cast = 6; SpColumnDesc sp_column_desc = 7; SpColumnEqualNan sp_column_equal_nan = 8; - SpColumnIn_Dataframe sp_column_in__dataframe = 9; - SpColumnIn_Seq sp_column_in__seq = 10; - SpColumnIsNotNull sp_column_is_not_null = 11; - SpColumnIsNull sp_column_is_null = 12; - SpColumnOver sp_column_over = 13; - SpColumnStringCollate sp_column_string_collate = 14; - SpColumnStringContains sp_column_string_contains = 15; - SpColumnStringEndsWith sp_column_string_ends_with = 16; - SpColumnStringLike sp_column_string_like = 17; - SpColumnStringRegexp sp_column_string_regexp = 18; - SpColumnStringStartsWith sp_column_string_starts_with = 19; - SpColumnStringSubstr sp_column_string_substr = 20; - SpColumnTryCast sp_column_try_cast = 21; - SpColumnWithinGroup sp_column_within_group = 22; + SpColumnIn sp_column_in = 9; + SpColumnIsNotNull sp_column_is_not_null = 10; + SpColumnIsNull sp_column_is_null = 11; + SpColumnOver sp_column_over = 12; + SpColumnStringCollate sp_column_string_collate = 13; + SpColumnStringContains sp_column_string_contains = 14; + SpColumnStringEndsWith sp_column_string_ends_with = 15; + SpColumnStringLike sp_column_string_like = 16; + SpColumnStringRegexp sp_column_string_regexp = 17; + SpColumnStringStartsWith sp_column_string_starts_with = 18; + SpColumnStringSubstr sp_column_string_substr = 19; + SpColumnTryCast sp_column_try_cast = 20; + SpColumnWithinGroup sp_column_within_group = 21; } } -// sp-col-expr.ir:77 -message SpColumnIn_Dataframe { - Expr col = 1; - SpDataframeExpr df = 2; - SrcPosition src = 3; -} - -// sp-col-expr.ir:81 -message SpColumnIn_Seq { +// sp-col-expr.ir:79 +message SpColumnIn { Expr col = 1; SrcPosition src = 2; repeated Expr values = 3; } -// sp-col-expr.ir:85 +// sp-col-expr.ir:83 message SpColumnIsNotNull { Expr col = 1; SrcPosition src = 2; } -// sp-col-expr.ir:87 +// sp-col-expr.ir:85 message SpColumnIsNull { Expr col = 1; SrcPosition src = 2; } -// sp-col-expr.ir:89 +// sp-col-expr.ir:87 message SpColumnOver { Expr col = 1; SrcPosition src = 2; @@ -1455,35 +1454,35 @@ message SpColumnSqlExpr { SrcPosition src = 3; } -// sp-col-expr.ir:119 +// sp-col-expr.ir:117 message SpColumnStringCollate { Expr col = 1; Expr collation_spec = 2; SrcPosition src = 3; } -// sp-col-expr.ir:123 +// sp-col-expr.ir:121 message SpColumnStringContains { Expr col = 1; Expr pattern = 2; SrcPosition src = 3; } -// sp-col-expr.ir:110 +// sp-col-expr.ir:108 message SpColumnStringEndsWith { Expr col = 1; SrcPosition src = 2; Expr suffix = 3; } -// sp-col-expr.ir:97 +// sp-col-expr.ir:95 message SpColumnStringLike { Expr col = 1; Expr pattern = 2; SrcPosition src = 3; } -// sp-col-expr.ir:101 +// sp-col-expr.ir:99 message SpColumnStringRegexp { Expr col = 1; Expr parameters = 2; @@ -1491,14 +1490,14 @@ message SpColumnStringRegexp { SrcPosition src = 4; } -// sp-col-expr.ir:106 +// sp-col-expr.ir:104 message SpColumnStringStartsWith { Expr col = 1; Expr prefix = 2; SrcPosition src = 3; } -// sp-col-expr.ir:114 +// sp-col-expr.ir:112 message SpColumnStringSubstr { Expr col = 1; Expr len = 2; @@ -1513,7 +1512,7 @@ message SpColumnTryCast { SpDataType to = 3; } -// sp-col-expr.ir:93 +// sp-col-expr.ir:91 message SpColumnWithinGroup { Expr col = 1; ExprArgList cols = 2; diff --git a/src/snowflake/snowpark/column.py b/src/snowflake/snowpark/column.py index 0b46f7d101..3f32c98120 100644 --- a/src/snowflake/snowpark/column.py +++ b/src/snowflake/snowpark/column.py @@ -644,7 +644,7 @@ def in_( ast = None if _emit_ast: ast = proto.Expr() - proto_ast = ast.sp_column_in__seq + proto_ast = ast.sp_column_in proto_ast.col.CopyFrom(self._ast) return Column(Literal(False), _ast=ast, _emit_ast=_emit_ast) @@ -699,7 +699,7 @@ def validate_value(value_expr: Expression): ast = None if _emit_ast: ast = proto.Expr() - proto_ast = ast.sp_column_in__seq + proto_ast = ast.sp_column_in proto_ast.col.CopyFrom(self._ast) for val in vals: val_ast = proto_ast.values.add() @@ -952,6 +952,7 @@ def desc(self, _emit_ast: bool = True) -> "Column": expr = proto.Expr() ast = with_src_position(expr.sp_column_desc) ast.col.CopyFrom(self._ast) + ast.null_order.sp_null_order_default = True return Column( SortOrder(self._expression, Descending()), _ast=expr, _emit_ast=_emit_ast ) @@ -965,7 +966,7 @@ def desc_nulls_first(self, _emit_ast: bool = True) -> "Column": expr = proto.Expr() ast = with_src_position(expr.sp_column_desc) ast.col.CopyFrom(self._ast) - ast.nulls_first.value = True + ast.null_order.sp_null_order_nulls_first = True return Column( SortOrder(self._expression, Descending(), NullsFirst()), _ast=expr, @@ -981,7 +982,7 @@ def desc_nulls_last(self, _emit_ast: bool = True) -> "Column": expr = proto.Expr() ast = with_src_position(expr.sp_column_desc) ast.col.CopyFrom(self._ast) - ast.nulls_first.value = False + ast.null_order.sp_null_order_nulls_last = True return Column( SortOrder(self._expression, Descending(), NullsLast()), _ast=expr, @@ -996,6 +997,7 @@ def asc(self, _emit_ast: bool = True) -> "Column": expr = proto.Expr() ast = with_src_position(expr.sp_column_asc) ast.col.CopyFrom(self._ast) + ast.null_order.sp_null_order_default = True return Column( SortOrder(self._expression, Ascending()), _ast=expr, _emit_ast=_emit_ast ) @@ -1009,7 +1011,7 @@ def asc_nulls_first(self, _emit_ast: bool = True) -> "Column": expr = proto.Expr() ast = with_src_position(expr.sp_column_asc) ast.col.CopyFrom(self._ast) - ast.nulls_first.value = True + ast.null_order.sp_null_order_nulls_first = True return Column( SortOrder(self._expression, Ascending(), NullsFirst()), _ast=expr, @@ -1025,7 +1027,7 @@ def asc_nulls_last(self, _emit_ast: bool = True) -> "Column": expr = proto.Expr() ast = with_src_position(expr.sp_column_asc) ast.col.CopyFrom(self._ast) - ast.nulls_first.value = False + ast.null_order.sp_null_order_nulls_last = True return Column( SortOrder(self._expression, Ascending(), NullsLast()), _ast=expr, diff --git a/tests/ast/data/col_asc.test b/tests/ast/data/col_asc.test index d938cd948b..90a8d4612e 100644 --- a/tests/ast/data/col_asc.test +++ b/tests/ast/data/col_asc.test @@ -90,6 +90,9 @@ body { } } } + null_order { + sp_null_order_default: true + } src { end_column: 37 end_line: 29 @@ -163,8 +166,8 @@ body { } } } - nulls_first { - value: true + null_order { + sp_null_order_nulls_first: true } src { end_column: 49 @@ -239,7 +242,8 @@ body { } } } - nulls_first { + null_order { + sp_null_order_nulls_last: true } src { end_column: 48 diff --git a/tests/ast/data/col_desc.test b/tests/ast/data/col_desc.test index 4156c2d0c0..97d30879d2 100644 --- a/tests/ast/data/col_desc.test +++ b/tests/ast/data/col_desc.test @@ -88,6 +88,9 @@ body { } } } + null_order { + sp_null_order_default: true + } src { end_column: 38 end_line: 27 @@ -161,8 +164,8 @@ body { } } } - nulls_first { - value: true + null_order { + sp_null_order_nulls_first: true } src { end_column: 50 @@ -237,7 +240,8 @@ body { } } } - nulls_first { + null_order { + sp_null_order_nulls_last: true } src { end_column: 49 diff --git a/tests/ast/data/col_in_.test b/tests/ast/data/col_in_.test index 4d274a9a28..9fee6e2f51 100644 --- a/tests/ast/data/col_in_.test +++ b/tests/ast/data/col_in_.test @@ -55,7 +55,7 @@ body { expr { sp_dataframe_select__columns { cols { - sp_column_in__seq { + sp_column_in { col { apply_expr { fn { @@ -157,7 +157,7 @@ body { expr { sp_dataframe_select__columns { cols { - sp_column_in__seq { + sp_column_in { col { apply_expr { fn { @@ -230,7 +230,7 @@ body { expr { sp_dataframe_select__columns { cols { - sp_column_in__seq { + sp_column_in { col { apply_expr { fn { diff --git a/tests/ast/data/windows.test b/tests/ast/data/windows.test index 272559c87a..be96f9fff2 100644 --- a/tests/ast/data/windows.test +++ b/tests/ast/data/windows.test @@ -557,6 +557,9 @@ body { } } } + null_order { + sp_null_order_default: true + } src { end_column: 51 end_line: 35 From b32806ff104e89f8c488359279cac196a57b7129 Mon Sep 17 00:00:00 2001 From: Jamison Date: Mon, 16 Dec 2024 16:09:42 -0800 Subject: [PATCH 07/19] merge main and fix test --- src/snowflake/snowpark/types.py | 2 ++ tests/integ/scala/test_datatype_suite.py | 6 +++--- tests/unit/test_datatype_mapper.py | 8 ++++++++ tests/unit/test_types.py | 1 + 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/snowflake/snowpark/types.py b/src/snowflake/snowpark/types.py index 59a1a6fdae..f78b35e1f9 100644 --- a/src/snowflake/snowpark/types.py +++ b/src/snowflake/snowpark/types.py @@ -683,6 +683,8 @@ def __init__( ) else: self.structured = structured or False + + self.fields = [] for field in fields or []: self.add(field) diff --git a/tests/integ/scala/test_datatype_suite.py b/tests/integ/scala/test_datatype_suite.py index a1bd1d48ac..1004190b00 100644 --- a/tests/integ/scala/test_datatype_suite.py +++ b/tests/integ/scala/test_datatype_suite.py @@ -856,9 +856,9 @@ def test_structured_dtypes_cast(structured_type_session, structured_type_support pytest.skip("Test requires structured type support.") expected_semi_schema = StructType( [ - StructField("ARR", ArrayType(StringType()), nullable=True), - StructField("MAP", MapType(StringType(), StringType()), nullable=True), - StructField("OBJ", MapType(StringType(), StringType()), nullable=True), + StructField("ARR", ArrayType(), nullable=True), + StructField("MAP", MapType(), nullable=True), + StructField("OBJ", MapType(), nullable=True), ] ) expected_structured_schema = StructType( diff --git a/tests/unit/test_datatype_mapper.py b/tests/unit/test_datatype_mapper.py index af8b9cd3c1..44e84df0a4 100644 --- a/tests/unit/test_datatype_mapper.py +++ b/tests/unit/test_datatype_mapper.py @@ -123,9 +123,17 @@ def test_to_sql(): assert ( to_sql([1, "2", 3.5], ArrayType()) == "PARSE_JSON('[1, \"2\", 3.5]') :: ARRAY" ) + assert ( + to_sql([1, 2, 3], ArrayType(IntegerType(), structured=True)) + == "PARSE_JSON('[1, 2, 3]') :: ARRAY(INT)" + ) assert ( to_sql({"'": '"'}, MapType()) == 'PARSE_JSON(\'{"\'\'": "\\\\""}\') :: OBJECT' ) + assert ( + to_sql({"'": '"'}, MapType(StringType(), structured=True)) + == 'PARSE_JSON(\'{"\'\'": "\\\\""}\') :: MAP(STRING, STRING)' + ) assert to_sql([{1: 2}], ArrayType()) == "PARSE_JSON('[{\"1\": 2}]') :: ARRAY" assert to_sql({1: [2]}, MapType()) == "PARSE_JSON('{\"1\": [2]}') :: OBJECT" diff --git a/tests/unit/test_types.py b/tests/unit/test_types.py index d5ffc9757f..db5355d1ce 100644 --- a/tests/unit/test_types.py +++ b/tests/unit/test_types.py @@ -683,6 +683,7 @@ def {func_name}(x, y {datatype_str} = {annotated_value}) -> None: @pytest.mark.parametrize( "value_str,datatype,expected_value", [ + (None, None, None), ("1", IntegerType(), 1), ("True", BooleanType(), True), ("1.0", FloatType(), 1.0), From c3db2239f412880e406451e9e3a2662d4ead0266 Mon Sep 17 00:00:00 2001 From: Jamison Date: Mon, 16 Dec 2024 18:20:04 -0800 Subject: [PATCH 08/19] make feature flag thread safe --- src/snowflake/snowpark/_internal/type_utils.py | 4 ++-- src/snowflake/snowpark/context.py | 16 ++++++++++++++-- src/snowflake/snowpark/types.py | 16 ++++++++-------- tests/integ/scala/test_datatype_suite.py | 11 ++++++----- 4 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index 1a0c17ee3a..0910a2a4aa 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -159,7 +159,7 @@ def convert_metadata_to_sp_type( [ StructField( field.name - if context._should_use_structured_type_semantics + if context._should_use_structured_type_semantics() else quote_name(field.name, keep_case=True), convert_metadata_to_sp_type(field, max_string_size), nullable=field.is_nullable, @@ -188,7 +188,7 @@ def convert_sf_to_sp_type( ) -> DataType: """Convert the Snowflake logical type to the Snowpark type.""" semi_structured_fill = ( - None if context._should_use_structured_type_semantics else StringType() + None if context._should_use_structured_type_semantics() else StringType() ) if column_type_name == "ARRAY": return ArrayType(semi_structured_fill) diff --git a/src/snowflake/snowpark/context.py b/src/snowflake/snowpark/context.py index 8bc86f928a..a975a53a39 100644 --- a/src/snowflake/snowpark/context.py +++ b/src/snowflake/snowpark/context.py @@ -7,6 +7,7 @@ from typing import Callable, Optional import snowflake.snowpark +import threading _use_scoped_temp_objects = True @@ -21,8 +22,19 @@ _should_continue_registration: Optional[Callable[..., bool]] = None -# Global flag that determines if structured type semantics should be used -_should_use_structured_type_semantics = False +# Internal-only global flag that determines if structured type semantics should be used +_use_structured_type_semantics = False +_use_structured_type_semantics_lock = None + + +def _should_use_structured_type_semantics(): + global _use_structured_type_semantics + global _use_structured_type_semantics_lock + if _use_structured_type_semantics_lock is None: + _use_structured_type_semantics_lock = threading.RLock() + + with _use_structured_type_semantics_lock: + return _use_structured_type_semantics def get_active_session() -> "snowflake.snowpark.Session": diff --git a/src/snowflake/snowpark/types.py b/src/snowflake/snowpark/types.py index f78b35e1f9..6cf8075713 100644 --- a/src/snowflake/snowpark/types.py +++ b/src/snowflake/snowpark/types.py @@ -336,7 +336,7 @@ def __init__( element_type: Optional[DataType] = None, structured: Optional[bool] = None, ) -> None: - if context._should_use_structured_type_semantics: + if context._should_use_structured_type_semantics(): self.structured = ( structured if structured is not None else element_type is not None ) @@ -349,7 +349,7 @@ def __repr__(self) -> str: return f"ArrayType({repr(self.element_type) if self.element_type else ''})" def _as_nested(self) -> "ArrayType": - if not context._should_use_structured_type_semantics: + if not context._should_use_structured_type_semantics(): return self element_type = self.element_type if isinstance(element_type, (ArrayType, MapType, StructType)): @@ -396,7 +396,7 @@ def __init__( value_type: Optional[DataType] = None, structured: Optional[bool] = None, ) -> None: - if context._should_use_structured_type_semantics: + if context._should_use_structured_type_semantics(): if (key_type is None and value_type is not None) or ( key_type is not None and value_type is None ): @@ -423,7 +423,7 @@ def is_primitive(self): return False def _as_nested(self) -> "MapType": - if not context._should_use_structured_type_semantics: + if not context._should_use_structured_type_semantics(): return self value_type = self.value_type if isinstance(value_type, (ArrayType, MapType, StructType)): @@ -600,7 +600,7 @@ def __init__( @property def name(self) -> str: - if self._is_column or not context._should_use_structured_type_semantics: + if self._is_column or not context._should_use_structured_type_semantics(): return self.column_identifier.name else: return self._name @@ -615,7 +615,7 @@ def name(self, n: Union[ColumnIdentifier, str]) -> None: self.column_identifier = ColumnIdentifier(n) def _as_nested(self) -> "StructField": - if not context._should_use_structured_type_semantics: + if not context._should_use_structured_type_semantics(): return self datatype = self.datatype if isinstance(datatype, (ArrayType, MapType, StructType)): @@ -677,7 +677,7 @@ def __init__( fields: Optional[List["StructField"]] = None, structured: Optional[bool] = False, ) -> None: - if context._should_use_structured_type_semantics: + if context._should_use_structured_type_semantics(): self.structured = ( structured if structured is not None else fields is not None ) @@ -713,7 +713,7 @@ def add( return self def _as_nested(self) -> "StructType": - if not context._should_use_structured_type_semantics: + if not context._should_use_structured_type_semantics(): return self return StructType( [field._as_nested() for field in self.fields], self.structured diff --git a/tests/integ/scala/test_datatype_suite.py b/tests/integ/scala/test_datatype_suite.py index 1004190b00..48f64638b7 100644 --- a/tests/integ/scala/test_datatype_suite.py +++ b/tests/integ/scala/test_datatype_suite.py @@ -167,10 +167,11 @@ def examples(structured_type_support): def structured_type_session(session, structured_type_support): if structured_type_support: with structured_types_enabled_session(session) as sess: - semantics_enabled = context._should_use_structured_type_semantics - context._should_use_structured_type_semantics = True - yield sess - context._should_use_structured_type_semantics = semantics_enabled + semantics_enabled = context._should_use_structured_type_semantics() + with context._use_structured_type_semantics_lock(): + context._use_structured_type_semantics = True + yield sess + context._use_structured_type_semantics = semantics_enabled else: yield session @@ -399,7 +400,7 @@ def test_structured_dtypes_select( ): query, expected_dtypes, expected_schema = examples df = _create_test_dataframe(structured_type_session, structured_type_support) - nested_field_name = "b" if context._should_use_structured_type_semantics else "B" + nested_field_name = "b" if context._should_use_structured_type_semantics() else "B" flattened_df = df.select( df.map["k1"].alias("value1"), df.obj["A"].alias("a"), From 1c262d7e29a34e575251910c1691b1dd85db02cd Mon Sep 17 00:00:00 2001 From: Jamison Date: Mon, 16 Dec 2024 18:39:39 -0800 Subject: [PATCH 09/19] typo --- tests/integ/scala/test_datatype_suite.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integ/scala/test_datatype_suite.py b/tests/integ/scala/test_datatype_suite.py index 48f64638b7..8de0f62e05 100644 --- a/tests/integ/scala/test_datatype_suite.py +++ b/tests/integ/scala/test_datatype_suite.py @@ -168,7 +168,7 @@ def structured_type_session(session, structured_type_support): if structured_type_support: with structured_types_enabled_session(session) as sess: semantics_enabled = context._should_use_structured_type_semantics() - with context._use_structured_type_semantics_lock(): + with context._use_structured_type_semantics_lock: context._use_structured_type_semantics = True yield sess context._use_structured_type_semantics = semantics_enabled From 0caef5806a36a0631ce6d7a7db37b16e651a7c70 Mon Sep 17 00:00:00 2001 From: Jamison Date: Tue, 17 Dec 2024 12:02:41 -0800 Subject: [PATCH 10/19] Fix ast test --- src/snowflake/snowpark/types.py | 8 ++++++++ tests/integ/scala/test_datatype_suite.py | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/src/snowflake/snowpark/types.py b/src/snowflake/snowpark/types.py index 6cf8075713..53473186bf 100644 --- a/src/snowflake/snowpark/types.py +++ b/src/snowflake/snowpark/types.py @@ -384,6 +384,10 @@ def json_value(self) -> Dict[str, Any]: def _fill_ast(self, ast: proto.SpDataType) -> None: ast.sp_array_type.structured = self.structured + if self.element_type is None: + raise NotImplementedError( + "SNOW-1862700: AST does not support empty element_type." + ) self.element_type._fill_ast(ast.sp_array_type.ty) @@ -469,6 +473,10 @@ def valueType(self): def _fill_ast(self, ast: proto.SpDataType) -> None: ast.sp_map_type.structured = self.structured + if self.key_type is None or self.value_type is None: + raise NotImplementedError( + "SNOW-1862700: AST does not support empty key or value type." + ) self.key_type._fill_ast(ast.sp_map_type.key_ty) self.value_type._fill_ast(ast.sp_map_type.value_ty) diff --git a/tests/integ/scala/test_datatype_suite.py b/tests/integ/scala/test_datatype_suite.py index 8de0f62e05..a0fcc61036 100644 --- a/tests/integ/scala/test_datatype_suite.py +++ b/tests/integ/scala/test_datatype_suite.py @@ -852,6 +852,10 @@ def test_dtypes_vector(session): "config.getoption('local_testing_mode', default=False)", reason="FEAT: SNOW-1372813 Cast to StructType not supported", ) +@pytest.mark.skipif( + "config.getoption('enable_ast', default=False)", + reason="SNOW-1862700: AST does not support new structured type semantics yet.", +) def test_structured_dtypes_cast(structured_type_session, structured_type_support): if not structured_type_support: pytest.skip("Test requires structured type support.") From 238004029678ed412c74c9341627f001a9461702 Mon Sep 17 00:00:00 2001 From: Jamison Date: Wed, 18 Dec 2024 09:34:31 -0800 Subject: [PATCH 11/19] move lock --- src/snowflake/snowpark/context.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/snowflake/snowpark/context.py b/src/snowflake/snowpark/context.py index a975a53a39..49258b3a97 100644 --- a/src/snowflake/snowpark/context.py +++ b/src/snowflake/snowpark/context.py @@ -24,15 +24,12 @@ # Internal-only global flag that determines if structured type semantics should be used _use_structured_type_semantics = False -_use_structured_type_semantics_lock = None +_use_structured_type_semantics_lock = threading.RLock() def _should_use_structured_type_semantics(): global _use_structured_type_semantics global _use_structured_type_semantics_lock - if _use_structured_type_semantics_lock is None: - _use_structured_type_semantics_lock = threading.RLock() - with _use_structured_type_semantics_lock: return _use_structured_type_semantics From 995e519fcd60c7b3f09d580ae611750c40614e51 Mon Sep 17 00:00:00 2001 From: Jamison Date: Wed, 18 Dec 2024 12:23:49 -0800 Subject: [PATCH 12/19] test coverage --- tests/integ/scala/test_datatype_suite.py | 69 ++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/tests/integ/scala/test_datatype_suite.py b/tests/integ/scala/test_datatype_suite.py index a0fcc61036..1ea3305111 100644 --- a/tests/integ/scala/test_datatype_suite.py +++ b/tests/integ/scala/test_datatype_suite.py @@ -6,7 +6,9 @@ # Many of the tests have been moved to unit/scala/test_datattype_suite.py from decimal import Decimal +from unittest import mock +import logging import pytest import snowflake.snowpark.context as context @@ -21,6 +23,7 @@ lit, object_construct, sum_distinct, + udaf, udf, ) from snowflake.snowpark.types import ( @@ -522,6 +525,72 @@ def test_structured_dtypes_iceberg( Utils.drop_dynamic_table(structured_type_session, dynamic_table_name) +@pytest.mark.skipif( + "config.getoption('local_testing_mode', default=False)", + reason="local testing does not fully support structured types yet.", +) +def test_structured_dtypes_negative(structured_type_session, structured_type_support): + if not structured_type_support: + pytest.skip("Test requires structured type support.") + + # SNOW-1862700: Array Type and Map Type missing element or value fails to generate AST + with pytest.raises( + NotImplementedError, match="AST does not support empty element_type." + ): + x = ArrayType() + x._fill_ast(mock.Mock()) + + with pytest.raises( + NotImplementedError, match="AST does not support empty key or value type." + ): + x = MapType() + x._fill_ast(mock.Mock()) + + # Maptype requires both key and value type be set if either is set + with pytest.raises( + ValueError, + match="Must either set both key_type and value_type or leave both unset.", + ): + MapType(StringType()) + + +@pytest.mark.skipif( + "config.getoption('local_testing_mode', default=False)", + reason="local testing does not fully support structured types yet.", +) +def test_udaf_structured_map_downcast( + structured_type_session, structured_type_support, caplog +): + if not structured_type_support: + pytest.skip("Test requires structured type support.") + + with caplog.at_level(logging.WARNING): + + @udaf(return_type=MapType(StringType(), StringType(), structured=True)) + class MapCollector: + def __init__(self) -> None: + self._agg_state = dict() + + @property + def aggregate_state(self) -> dict: + return self._agg_state + + def accumulate(self, int_: int) -> None: + self._agg_state[int_] = self._agg_state.get(int_, 0) + 1 + + def merge(self, other_state: int) -> None: + self._agg_state = {**self._agg_state, **other_state} + + def finish(self) -> dict: + return self._agg_state + + assert ( + "Snowflake does not support structured maps as return type for UDAFs. Downcasting to semi-structured object." + in caplog.text + ) + assert MapCollector._return_type == MapType() + + @pytest.mark.skipif( "config.getoption('local_testing_mode', default=False)", reason="local testing does not fully support structured types yet.", From 1b89027149e04636fd84fd5ed393d4f9baa9db84 Mon Sep 17 00:00:00 2001 From: Jamison Date: Wed, 18 Dec 2024 13:28:40 -0800 Subject: [PATCH 13/19] remove context manager --- tests/integ/scala/test_datatype_suite.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/integ/scala/test_datatype_suite.py b/tests/integ/scala/test_datatype_suite.py index 1ea3305111..429809b02c 100644 --- a/tests/integ/scala/test_datatype_suite.py +++ b/tests/integ/scala/test_datatype_suite.py @@ -171,10 +171,11 @@ def structured_type_session(session, structured_type_support): if structured_type_support: with structured_types_enabled_session(session) as sess: semantics_enabled = context._should_use_structured_type_semantics() - with context._use_structured_type_semantics_lock: - context._use_structured_type_semantics = True - yield sess - context._use_structured_type_semantics = semantics_enabled + context._use_structured_type_semantics_lock.acquire() + context._use_structured_type_semantics = True + yield sess + context._use_structured_type_semantics = semantics_enabled + context._use_structured_type_semantics_lock.release() else: yield session From 26fd29e67265e283938b0c8e6ab0fb0d001e0f2e Mon Sep 17 00:00:00 2001 From: Jamison Date: Thu, 19 Dec 2024 12:55:00 -0800 Subject: [PATCH 14/19] switch to using patch --- tests/integ/scala/test_datatype_suite.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/integ/scala/test_datatype_suite.py b/tests/integ/scala/test_datatype_suite.py index 98dc59b272..152c13c31c 100644 --- a/tests/integ/scala/test_datatype_suite.py +++ b/tests/integ/scala/test_datatype_suite.py @@ -170,12 +170,10 @@ def examples(structured_type_support): def structured_type_session(session, structured_type_support): if structured_type_support: with structured_types_enabled_session(session) as sess: - semantics_enabled = context._should_use_structured_type_semantics() - context._use_structured_type_semantics_lock.acquire() - context._use_structured_type_semantics = True - yield sess - context._use_structured_type_semantics = semantics_enabled - context._use_structured_type_semantics_lock.release() + with mock.patch( + "snowflake.snowpark.context._use_structured_type_semantics", True + ): + yield sess else: yield session From 9295e11c917c448484d35aa7f76efe4f32465074 Mon Sep 17 00:00:00 2001 From: Jamison Date: Thu, 19 Dec 2024 13:44:00 -0800 Subject: [PATCH 15/19] move test to other module --- src/snowflake/snowpark/types.py | 2 +- tests/integ/scala/test_datatype_suite.py | 81 +++++++++++++++++++++++- tests/integ/test_stored_procedure.py | 79 +---------------------- 3 files changed, 82 insertions(+), 80 deletions(-) diff --git a/src/snowflake/snowpark/types.py b/src/snowflake/snowpark/types.py index 53473186bf..2be260c553 100644 --- a/src/snowflake/snowpark/types.py +++ b/src/snowflake/snowpark/types.py @@ -683,7 +683,7 @@ class StructType(DataType): def __init__( self, fields: Optional[List["StructField"]] = None, - structured: Optional[bool] = False, + structured: Optional[bool] = None, ) -> None: if context._should_use_structured_type_semantics(): self.structured = ( diff --git a/tests/integ/scala/test_datatype_suite.py b/tests/integ/scala/test_datatype_suite.py index 152c13c31c..894e1bbde7 100644 --- a/tests/integ/scala/test_datatype_suite.py +++ b/tests/integ/scala/test_datatype_suite.py @@ -14,6 +14,7 @@ import snowflake.snowpark.context as context from snowflake.connector.options import installed_pandas from snowflake.snowpark import Row +from snowflake.snowpark.dataframe import DataFrame from snowflake.snowpark.exceptions import SnowparkSQLException from snowflake.snowpark.functions import ( any_value, @@ -26,6 +27,7 @@ udaf, udf, ) +from snowflake.snowpark.session import Session from snowflake.snowpark.types import ( ArrayType, BinaryType, @@ -51,6 +53,8 @@ VectorType, ) from tests.utils import ( + TempObjectType, + TestFiles, Utils, iceberg_supported, structured_types_enabled_session, @@ -649,7 +653,7 @@ def test_iceberg_nested_fields( nullable=True, ) ], - structured=False, + structured=True, ) try: @@ -1199,3 +1203,78 @@ def test_structured_type_schema_expression( Utils.drop_table(structured_type_session, table_name) Utils.drop_table(structured_type_session, non_null_table_name) Utils.drop_table(structured_type_session, nested_table_name) + + +@pytest.mark.skipif( + "config.getoption('local_testing_mode', default=False)", + reason="Structured types are not supported in Local Testing", +) +def test_stored_procedure_with_structured_returns( + structured_type_session, structured_type_support, local_testing_mode, resources_path +): + if not structured_type_support: + pytest.skip("Structured types not enabled in this account.") + + test_files = TestFiles(resources_path) + tmp_stage_name = Utils.random_stage_name() + if not local_testing_mode: + Utils.create_stage(structured_type_session, tmp_stage_name, is_temporary=True) + structured_type_session.add_packages("snowflake-snowpark-python") + Utils.upload_to_stage( + structured_type_session, + tmp_stage_name, + test_files.test_sp_py_file, + compress=False, + ) + + expected_dtypes = [ + ("VEC", "vector"), + ("MAP", "map"), + ("OBJ", "struct"), + ("ARR", "array"), + ] + expected_schema = StructType( + [ + StructField("VEC", VectorType(int, 5), nullable=True), + StructField( + "MAP", + MapType(StringType(16777216), LongType(), structured=True), + nullable=True, + ), + StructField( + "OBJ", + StructType( + [ + StructField("a", StringType(16777216), nullable=True), + StructField("b", DoubleType(), nullable=True), + ], + structured=True, + ), + nullable=True, + ), + StructField("ARR", ArrayType(DoubleType(), structured=True), nullable=True), + ] + ) + + sproc_name = Utils.random_name_for_temp_object(TempObjectType.PROCEDURE) + + def test_sproc(_session: Session) -> DataFrame: + return _session.sql( + """ + select + [1,2,3,4,5] :: vector(int, 5) as vec, + object_construct('k1', 1) :: map(varchar, int) as map, + object_construct('a', 'foo', 'b', 0.05) :: object(a varchar, b float) as obj, + [1.0, 3.1, 4.5] :: array(float) as arr + ; + """ + ) + + structured_type_session.sproc.register( + test_sproc, + name=sproc_name, + replace=True, + ) + df = structured_type_session.call(sproc_name) + assert df.schema == expected_schema + assert df.dtypes == expected_dtypes diff --git a/tests/integ/test_stored_procedure.py b/tests/integ/test_stored_procedure.py index 9345bca0bb..849a49210b 100644 --- a/tests/integ/test_stored_procedure.py +++ b/tests/integ/test_stored_procedure.py @@ -4,7 +4,6 @@ # import datetime -import decimal import logging import os import re @@ -45,34 +44,20 @@ ) from snowflake.snowpark.row import Row from snowflake.snowpark.types import ( - ArrayType, DateType, DoubleType, - Geography, - Geometry, IntegerType, - LongType, - MapType, StringType, StructField, StructType, - Variant, - VectorType, ) -# flake8: noqa -from tests.integ.scala.test_datatype_suite import ( - structured_type_session, - structured_type_support, -) from tests.utils import ( IS_IN_STORED_PROC, IS_NOT_ON_GITHUB, TempObjectType, TestFiles, Utils, - structured_types_enabled_session, - structured_types_supported, ) pytestmark = [ @@ -361,68 +346,6 @@ def test_call_named_stored_procedure( # restore active session -@pytest.mark.skipif( - "config.getoption('local_testing_mode', default=False)", - reason="Structured types are not supported in Local Testing", -) -def test_stored_procedure_with_structured_returns( - structured_type_session, structured_type_support -): - if not structured_type_support: - pytest.skip("Structured types not enabled in this account.") - expected_dtypes = [ - ("VEC", "vector"), - ("MAP", "map"), - ("OBJ", "struct"), - ("ARR", "array"), - ] - expected_schema = StructType( - [ - StructField("VEC", VectorType(int, 5), nullable=True), - StructField( - "MAP", - MapType(StringType(16777216), LongType(), structured=True), - nullable=True, - ), - StructField( - "OBJ", - StructType( - [ - StructField("a", StringType(16777216), nullable=True), - StructField("b", DoubleType(), nullable=True), - ], - structured=True, - ), - nullable=True, - ), - StructField("ARR", ArrayType(DoubleType(), structured=True), nullable=True), - ] - ) - - sproc_name = Utils.random_name_for_temp_object(TempObjectType.PROCEDURE) - - def test_sproc(_session: Session) -> DataFrame: - return _session.sql( - """ - select - [1,2,3,4,5] :: vector(int, 5) as vec, - object_construct('k1', 1) :: map(varchar, int) as map, - object_construct('a', 'foo', 'b', 0.05) :: object(a varchar, b float) as obj, - [1.0, 3.1, 4.5] :: array(float) as arr - ; - """ - ) - - structured_type_session.sproc.register( - test_sproc, - name=sproc_name, - replace=True, - ) - df = structured_type_session.call(sproc_name) - assert df.schema == expected_schema - assert df.dtypes == expected_dtypes - - @pytest.mark.skipif( "config.getoption('local_testing_mode', default=False)", reason="system functions not supported by local testing", @@ -1979,7 +1902,7 @@ def test_register_sproc_after_switch_schema(session): databases = [] try: - for i in range(2): + for _ in range(2): new_database = f"db_{Utils.random_alphanumeric_str(10)}" databases.append(new_database) new_schema = f"{new_database}.test" From 77a57a63b36c93f7a02c52ebfbdf6a402bdc89b7 Mon Sep 17 00:00:00 2001 From: Jamison Date: Thu, 19 Dec 2024 13:57:44 -0800 Subject: [PATCH 16/19] fix broken import --- tests/integ/test_stored_procedure.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integ/test_stored_procedure.py b/tests/integ/test_stored_procedure.py index 849a49210b..ff0673f361 100644 --- a/tests/integ/test_stored_procedure.py +++ b/tests/integ/test_stored_procedure.py @@ -4,6 +4,7 @@ # import datetime +import decimal # noqa: F401 import logging import os import re From 4769169d98d9b0dc805fca895309e7e8eba12682 Mon Sep 17 00:00:00 2001 From: Jamison Date: Thu, 19 Dec 2024 14:47:49 -0800 Subject: [PATCH 17/19] another broken import --- tests/integ/test_stored_procedure.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integ/test_stored_procedure.py b/tests/integ/test_stored_procedure.py index ff0673f361..c470cec8d3 100644 --- a/tests/integ/test_stored_procedure.py +++ b/tests/integ/test_stored_procedure.py @@ -4,7 +4,6 @@ # import datetime -import decimal # noqa: F401 import logging import os import re @@ -830,6 +829,9 @@ def return_datetime(_: Session) -> datetime.datetime: ) @pytest.mark.parametrize("register_from_file", [True, False]) def test_register_sp_with_optional_args(session: Session, tmpdir, register_from_file): + import decimal # noqa: F401 + from snowflake.snowpark.types import Variant, Geometry, Geography # noqa: F401 + import_body = """ import datetime import decimal From af5af8752a1fc8d8ce52de3a03b6ce7088b15c6f Mon Sep 17 00:00:00 2001 From: Jamison Date: Thu, 19 Dec 2024 15:15:15 -0800 Subject: [PATCH 18/19] another test fix --- tests/integ/scala/test_datatype_suite.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/integ/scala/test_datatype_suite.py b/tests/integ/scala/test_datatype_suite.py index 894e1bbde7..9f08cedbde 100644 --- a/tests/integ/scala/test_datatype_suite.py +++ b/tests/integ/scala/test_datatype_suite.py @@ -561,6 +561,10 @@ def test_structured_dtypes_negative(structured_type_session, structured_type_sup "config.getoption('local_testing_mode', default=False)", reason="local testing does not fully support structured types yet.", ) +@pytest.mark.skipif( + "config.getoption('enable_ast', default=False)", + reason="SNOW-1862700: AST does not support new structured type semantics yet.", +) def test_udaf_structured_map_downcast( structured_type_session, structured_type_support, caplog ): From ee2298077c8ef1839598fb4026ebe42df358980c Mon Sep 17 00:00:00 2001 From: Jamison Date: Fri, 20 Dec 2024 14:47:14 -0800 Subject: [PATCH 19/19] SNOW-1865926: Infer schema for StructType columns from nested Rows --- .../snowpark/_internal/type_utils.py | 3 ++ src/snowflake/snowpark/session.py | 9 ++++ tests/integ/scala/test_datatype_suite.py | 41 +++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index 0910a2a4aa..a989e1625f 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -36,6 +36,7 @@ from snowflake.connector.cursor import ResultMetadata from snowflake.connector.options import installed_pandas, pandas from snowflake.snowpark._internal.utils import quote_name +from snowflake.snowpark.row import Row from snowflake.snowpark.types import ( LTZ, NTZ, @@ -441,6 +442,8 @@ def infer_type(obj: Any) -> DataType: if key is not None and value is not None: return MapType(infer_type(key), infer_type(value)) return MapType(NullType(), NullType()) + elif isinstance(obj, Row) and context._should_use_structured_type_semantics(): + return infer_schema(obj) elif isinstance(obj, (list, tuple)): for v in obj: if v is not None: diff --git a/src/snowflake/snowpark/session.py b/src/snowflake/snowpark/session.py index d5ad8448d8..e2a9c5ef8c 100644 --- a/src/snowflake/snowpark/session.py +++ b/src/snowflake/snowpark/session.py @@ -36,6 +36,7 @@ import pkg_resources import snowflake.snowpark._internal.proto.generated.ast_pb2 as proto +import snowflake.snowpark.context as context from snowflake.connector import ProgrammingError, SnowflakeConnection from snowflake.connector.options import installed_pandas, pandas from snowflake.connector.pandas_tools import write_pandas @@ -3294,6 +3295,14 @@ def convert_row_to_list( data_type, (MapType, StructType) ): converted_row.append(json.dumps(value, cls=PythonObjJSONEncoder)) + elif ( + isinstance(value, Row) + and isinstance(data_type, StructType) + and context._should_use_structured_type_semantics() + ): + converted_row.append( + json.dumps(value.as_dict(), cls=PythonObjJSONEncoder) + ) elif isinstance(data_type, VariantType): converted_row.append(json.dumps(value, cls=PythonObjJSONEncoder)) elif isinstance(data_type, GeographyType): diff --git a/tests/integ/scala/test_datatype_suite.py b/tests/integ/scala/test_datatype_suite.py index 9f08cedbde..935a8c829d 100644 --- a/tests/integ/scala/test_datatype_suite.py +++ b/tests/integ/scala/test_datatype_suite.py @@ -598,6 +598,47 @@ def finish(self) -> dict: assert MapCollector._return_type == MapType() +@pytest.mark.skipif( + "config.getoption('local_testing_mode', default=False)", + reason="local testing does not fully support structured types yet.", +) +def test_structured_type_infer(structured_type_session, structured_type_support): + if not structured_type_support: + pytest.skip("Test requires structured type support.") + + struct = Row(f1="v1", f2=2) + df = structured_type_session.create_dataframe( + [ + ({"key": "value"}, [1, 2, 3], struct), + ], + schema=["map", "array", "obj"], + ) + + assert df.schema == StructType( + [ + StructField( + "MAP", + MapType(StringType(), StringType(), structured=True), + nullable=True, + ), + StructField("ARRAY", ArrayType(LongType(), structured=True), nullable=True), + StructField( + "OBJ", + StructType( + [ + StructField("f1", StringType(), nullable=True), + StructField("f2", LongType(), nullable=True), + ], + structured=True, + ), + nullable=True, + ), + ], + structured=True, + ) + df.collect() + + @pytest.mark.skipif( "config.getoption('local_testing_mode', default=False)", reason="local testing does not fully support structured types yet.",