snowflakedb
diff --git a/‎CHANGELOG.md‎
Lines changed: 6 additions & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎docs/source/modin/dataframe.rst‎
Lines changed: 3 additions & 1 deletion b/‎docs/source/modin/dataframe.rst‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎docs/source/modin/series.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/modin/series.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/modin/supported/dataframe_supported.rst‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/modin/supported/dataframe_supported.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/modin/supported/series_supported.rst‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/modin/supported/series_supported.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/snowflake/snowpark/_internal/analyzer/snowflake_plan.py‎
Lines changed: 51 additions & 9 deletions b/‎src/snowflake/snowpark/_internal/analyzer/snowflake_plan.py‎
Lines changed: 51 additions & 9 deletions
diff --git a/‎src/snowflake/snowpark/_internal/error_message.py‎
Lines changed: 11 additions & 0 deletions b/‎src/snowflake/snowpark/_internal/error_message.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎src/snowflake/snowpark/_internal/type_utils.py‎
Lines changed: 6 additions & 0 deletions b/‎src/snowflake/snowpark/_internal/type_utils.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/snowflake/snowpark/_internal/udf_utils.py‎
Lines changed: 15 additions & 1 deletion b/‎src/snowflake/snowpark/_internal/udf_utils.py‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎src/snowflake/snowpark/_internal/utils.py‎
Lines changed: 2 additions & 0 deletions b/‎src/snowflake/snowpark/_internal/utils.py‎
Lines changed: 2 additions & 0 deletions
@@ -8,8 +8,10 @@
 
 - Invoking snowflake system procedures does not invoke an additional `describe procedure` call to check the return type of the procedure.
 - Added support for `Session.create_dataframe()` with the stage URL and FILE data type.
-- Added support for different modes for dealing with corrupt XML records when reading an XML file using `session.read.option('rowTag', <tag_name>).xml(<stage_file_path>)`. Currently `PERMISSIVE`, `DROPMALFORMED` and `FAILFAST` are supported.
+- Added support for different modes for dealing with corrupt XML records when reading an XML file using `session.read.option('mode', <mode>), option('rowTag', <tag_name>).xml(<stage_file_path>)`. Currently `PERMISSIVE`, `DROPMALFORMED` and `FAILFAST` are supported.
+- Improved the error message of the XML reader when the specified row tag is not found in the file.
 - Improved query generation for `Dataframe.drop` to use `SELECT * EXCLUDE ()` to exclude the dropped columns. To enable this feature, set `session.conf.set("use_simplified_query_generation", True)`.
+- Added support for `VariantType` to `StructType.from_json`
 
 #### Bug Fixes
 
@@ -21,13 +23,16 @@
 #### Bug Fixes
 
 - Fixed a bug in `snowflake.snowpark.functions.rank` that would cause sort direction to not be respected.
+- Fixed a bug in `snowflake.snowpark.functions.to_timestamp_*` that would cause incorrect results on filtered data.
 
 ### Snowpark pandas API Updates
 
 #### New Features
 
 - Added support for dict values in `Series.str.get`, `Series.str.slice`, and `Series.str.__getitem__` (`Series.str[...]`).
 - Added support for `DataFrame.to_html`.
+- Added support for `DataFrame.to_string` and `Series.to_string`.
+- Added support for reading files from S3 buckets using `pd.read_csv`.
 
 #### Improvements
 
 
@@ -234,4 +234,6 @@ DataFrame
     :toctree: pandas_api/
 
     DataFrame.to_csv
-    DataFrame.to_html
+    DataFrame.to_html
+    DataFrame.to_string
+    
@@ -327,3 +327,4 @@ Series
     :toctree: pandas_api/
 
     Series.to_csv
+    Series.to_string
@@ -472,7 +472,7 @@ Methods
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``to_stata``                | N                               |                                  |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
-| ``to_string``               | N                               |                                  |                                                    |
+| ``to_string``               | Y                               |                                  |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``to_timestamp``            | N                               |                                  |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 
@@ -451,7 +451,7 @@ Methods
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``to_sql``                  | N                               |                                  |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
-| ``to_string``               | N                               |                                  |                                                    |
+| ``to_string``               | Y                               |                                  |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 | ``to_timestamp``            | N                               |                                  |                                                    |
 +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
 
@@ -150,6 +150,10 @@ def wrap(*args, **kwargs):
                 try:
                     return func(*args, **kwargs)
                 except snowflake.connector.errors.ProgrammingError as e:
+                    from snowflake.snowpark._internal.analyzer.select_statement import (
+                        Selectable,
+                    )
+
                     query = getattr(e, "query", None)
                     tb = sys.exc_info()[2]
                     assert e.msg is not None
@@ -209,10 +213,6 @@ def wrap(*args, **kwargs):
                             )
                             raise ne.with_traceback(tb) from None
                         else:
-                            from snowflake.snowpark._internal.analyzer.select_statement import (
-                                Selectable,
-                            )
-
                             # We need the potential double quotes for invalid identifier
                             match = SnowflakePlan.Decorator.__wrap_exception_regex_match_with_double_quotes.match(
                                 e.msg
@@ -277,11 +277,53 @@ def add_single_quote(string: str) -> str:
                                 e
                             )
                             raise ne.with_traceback(tb) from None
-                    else:
-                        ne = SnowparkClientExceptionMessages.SQL_EXCEPTION_FROM_PROGRAMMING_ERROR(
-                            e
-                        )
-                        raise ne.with_traceback(tb) from None
+                    elif e.sqlstate == "42601" and "SELECT with no columns" in e.msg:
+                        # This is a special case when the select statement has no columns,
+                        # and it's a reading XML query.
+
+                        def search_read_file_node(
+                            node: Union[SnowflakePlan, Selectable]
+                        ) -> Optional[ReadFileNode]:
+                            for child in node.children_plan_nodes:
+                                source_plan = (
+                                    child.source_plan
+                                    if isinstance(child, SnowflakePlan)
+                                    else child.snowflake_plan.source_plan
+                                )
+                                if isinstance(source_plan, ReadFileNode):
+                                    return source_plan
+                                result = search_read_file_node(child)
+                                if result:
+                                    return result
+                            return None
+
+                        for arg in args:
+                            if isinstance(arg, SnowflakePlan):
+                                read_file_node = search_read_file_node(arg)
+                                if (
+                                    read_file_node
+                                    and read_file_node.xml_reader_udtf is not None
+                                ):
+                                    row_tag = read_file_node.options.get(
+                                        XML_ROW_TAG_STRING
+                                    )
+                                    file_path = read_file_node.path
+                                    ne = SnowparkClientExceptionMessages.DF_XML_ROW_TAG_NOT_FOUND(
+                                        row_tag, file_path
+                                    )
+                                    raise ne.with_traceback(tb) from None
+                            # when the describe query fails, the arg is a query string
+                            elif isinstance(arg, str):
+                                if f'"{XML_ROW_DATA_COLUMN_NAME}"' in arg:
+                                    ne = (
+                                        SnowparkClientExceptionMessages.DF_XML_ROW_TAG_NOT_FOUND()
+                                    )
+                                    raise ne.with_traceback(tb) from None
+
+                    ne = SnowparkClientExceptionMessages.SQL_EXCEPTION_FROM_PROGRAMMING_ERROR(
+                        e
+                    )
+                    raise ne.with_traceback(tb) from None
 
             return wrap
 
 
@@ -127,6 +127,17 @@ def DF_COPY_INTO_CANNOT_CREATE_TABLE(
             f"Cannot create the target table {table_name} because Snowpark cannot determine the column names to use. You should create the table before calling copy_into_table()."
         )
 
+    @staticmethod
+    def DF_XML_ROW_TAG_NOT_FOUND(
+        row_tag: Optional[str] = None,
+        file_path: Optional[str] = None,
+    ) -> SnowparkDataframeReaderException:
+        if row_tag is not None and file_path is not None:
+            msg = f"Cannot find the row tag '{row_tag}' in the XML file {file_path}."
+        else:
+            msg = "Cannot find the row tag in the XML file."
+        return SnowparkDataframeReaderException(msg)
+
     @staticmethod
     def DF_CROSS_TAB_COUNT_TOO_LARGE(
         count: int, max_count: int
 
@@ -204,6 +204,8 @@ def convert_sf_to_sp_type(
         return ArrayType(semi_structured_fill)
     if column_type_name == "VARIANT":
         return VariantType()
+    if context._should_use_structured_type_semantics() and column_type_name == "OBJECT":
+        return StructType()
     if column_type_name in {"OBJECT", "MAP"}:
         return MapType(semi_structured_fill, semi_structured_fill)
     if column_type_name == "GEOGRAPHY":
@@ -690,6 +692,10 @@ def python_type_to_snow_type(
             if tp_args
             else None
         )
+        if (
+            key_type is None or value_type is None
+        ) and context._should_use_structured_type_semantics():
+            return StructType(), False
         return MapType(key_type, value_type), False
 
     if installed_pandas:
 
@@ -10,6 +10,7 @@
 import typing
 import zipfile
 from copy import deepcopy
+from enum import Enum
 from logging import getLogger
 from types import ModuleType
 from typing import (
@@ -112,6 +113,13 @@ class UDFColumn(NamedTuple):
     name: str
 
 
+class RegistrationType(Enum):
+    UDF = "UDF"
+    UDAF = "UDAF"
+    UDTF = "UDTF"
+    SPROC = "SPROC"
+
+
 class ExtensionFunctionProperties:
     """
     This is a data class to hold all information, resolved or otherwise, about a UDF/UDTF/UDAF/Sproc object
@@ -1266,6 +1274,7 @@ def create_python_udf_or_sp(
     replace: bool,
     if_not_exists: bool,
     raw_imports: Optional[List[Union[str, Tuple[str, str]]]],
+    registration_type: RegistrationType,
     inline_python_code: Optional[str] = None,
     execute_as: Optional[typing.Literal["caller", "owner", "restricted caller"]] = None,
     api_call_source: Optional[str] = None,
@@ -1288,7 +1297,12 @@ def create_python_udf_or_sp(
 
     if replace and if_not_exists:
         raise ValueError("options replace and if_not_exists are incompatible")
-    if isinstance(return_type, StructType) and not return_type.structured:
+
+    if (
+        isinstance(return_type, StructType)
+        and not return_type.structured
+        and registration_type in {RegistrationType.UDTF, RegistrationType.SPROC}
+    ):
         return_sql = f'RETURNS TABLE ({",".join(f"{field.name} {convert_sp_to_sf_type(field.datatype)}" for field in return_type.fields)})'
     elif installed_pandas and isinstance(return_type, PandasDataFrameType):
         return_sql = f'RETURNS TABLE ({",".join(f"{name} {convert_sp_to_sf_type(datatype)}" for name, datatype in zip(return_type.col_names, return_type.col_types))})'
 
@@ -200,6 +200,8 @@
 XML_ROW_TAG_STRING = "ROWTAG"
 XML_ROW_DATA_COLUMN_NAME = "ROW_DATA"
 XML_READER_FILE_PATH = os.path.join(os.path.dirname(__file__), "xml_reader.py")
+XML_READER_API_SIGNATURE = "DataFrameReader.xml[rowTag]"
+XML_READER_SQL_COMMENT = f"/* Python:snowflake.snowpark.{XML_READER_API_SIGNATURE} */"
 
 QUERY_TAG_STRING = "QUERY_TAG"
 SKIP_LEVELS_TWO = (
Original file line number	Diff line number	Diff line change
`@@ -327,3 +327,4 @@ Series`
`327`	`327`	`:toctree: pandas_api/`
`328`	`328`
`329`	`329`	`Series.to_csv`
	`330`	`+ Series.to_string`