Skip to content

Commit 617e105

Browse files
committed
Merge branch 'main' into aalam-SNOW-2084165-add-error-trace
2 parents 7e22397 + 1973988 commit 617e105

31 files changed

+400
-63
lines changed

CHANGELOG.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@
88

99
- Invoking snowflake system procedures does not invoke an additional `describe procedure` call to check the return type of the procedure.
1010
- Added support for `Session.create_dataframe()` with the stage URL and FILE data type.
11-
- Added support for different modes for dealing with corrupt XML records when reading an XML file using `session.read.option('rowTag', <tag_name>).xml(<stage_file_path>)`. Currently `PERMISSIVE`, `DROPMALFORMED` and `FAILFAST` are supported.
11+
- Added support for different modes for dealing with corrupt XML records when reading an XML file using `session.read.option('mode', <mode>), option('rowTag', <tag_name>).xml(<stage_file_path>)`. Currently `PERMISSIVE`, `DROPMALFORMED` and `FAILFAST` are supported.
12+
- Improved the error message of the XML reader when the specified row tag is not found in the file.
1213
- Improved query generation for `Dataframe.drop` to use `SELECT * EXCLUDE ()` to exclude the dropped columns. To enable this feature, set `session.conf.set("use_simplified_query_generation", True)`.
14+
- Added support for `VariantType` to `StructType.from_json`
1315

1416
#### Bug Fixes
1517

@@ -21,13 +23,16 @@
2123
#### Bug Fixes
2224

2325
- Fixed a bug in `snowflake.snowpark.functions.rank` that would cause sort direction to not be respected.
26+
- Fixed a bug in `snowflake.snowpark.functions.to_timestamp_*` that would cause incorrect results on filtered data.
2427

2528
### Snowpark pandas API Updates
2629

2730
#### New Features
2831

2932
- Added support for dict values in `Series.str.get`, `Series.str.slice`, and `Series.str.__getitem__` (`Series.str[...]`).
3033
- Added support for `DataFrame.to_html`.
34+
- Added support for `DataFrame.to_string` and `Series.to_string`.
35+
- Added support for reading files from S3 buckets using `pd.read_csv`.
3136

3237
#### Improvements
3338

docs/source/modin/dataframe.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,4 +234,6 @@ DataFrame
234234
:toctree: pandas_api/
235235

236236
DataFrame.to_csv
237-
DataFrame.to_html
237+
DataFrame.to_html
238+
DataFrame.to_string
239+

docs/source/modin/series.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,3 +327,4 @@ Series
327327
:toctree: pandas_api/
328328

329329
Series.to_csv
330+
Series.to_string

docs/source/modin/supported/dataframe_supported.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ Methods
472472
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
473473
| ``to_stata`` | N | | |
474474
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
475-
| ``to_string`` | N | | |
475+
| ``to_string`` | Y | | |
476476
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
477477
| ``to_timestamp`` | N | | |
478478
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+

docs/source/modin/supported/series_supported.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,7 @@ Methods
451451
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
452452
| ``to_sql`` | N | | |
453453
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
454-
| ``to_string`` | N | | |
454+
| ``to_string`` | Y | | |
455455
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+
456456
| ``to_timestamp`` | N | | |
457457
+-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+

src/snowflake/snowpark/_internal/analyzer/snowflake_plan.py

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,10 @@ def wrap(*args, **kwargs):
150150
try:
151151
return func(*args, **kwargs)
152152
except snowflake.connector.errors.ProgrammingError as e:
153+
from snowflake.snowpark._internal.analyzer.select_statement import (
154+
Selectable,
155+
)
156+
153157
query = getattr(e, "query", None)
154158
tb = sys.exc_info()[2]
155159
assert e.msg is not None
@@ -209,10 +213,6 @@ def wrap(*args, **kwargs):
209213
)
210214
raise ne.with_traceback(tb) from None
211215
else:
212-
from snowflake.snowpark._internal.analyzer.select_statement import (
213-
Selectable,
214-
)
215-
216216
# We need the potential double quotes for invalid identifier
217217
match = SnowflakePlan.Decorator.__wrap_exception_regex_match_with_double_quotes.match(
218218
e.msg
@@ -277,11 +277,53 @@ def add_single_quote(string: str) -> str:
277277
e
278278
)
279279
raise ne.with_traceback(tb) from None
280-
else:
281-
ne = SnowparkClientExceptionMessages.SQL_EXCEPTION_FROM_PROGRAMMING_ERROR(
282-
e
283-
)
284-
raise ne.with_traceback(tb) from None
280+
elif e.sqlstate == "42601" and "SELECT with no columns" in e.msg:
281+
# This is a special case when the select statement has no columns,
282+
# and it's a reading XML query.
283+
284+
def search_read_file_node(
285+
node: Union[SnowflakePlan, Selectable]
286+
) -> Optional[ReadFileNode]:
287+
for child in node.children_plan_nodes:
288+
source_plan = (
289+
child.source_plan
290+
if isinstance(child, SnowflakePlan)
291+
else child.snowflake_plan.source_plan
292+
)
293+
if isinstance(source_plan, ReadFileNode):
294+
return source_plan
295+
result = search_read_file_node(child)
296+
if result:
297+
return result
298+
return None
299+
300+
for arg in args:
301+
if isinstance(arg, SnowflakePlan):
302+
read_file_node = search_read_file_node(arg)
303+
if (
304+
read_file_node
305+
and read_file_node.xml_reader_udtf is not None
306+
):
307+
row_tag = read_file_node.options.get(
308+
XML_ROW_TAG_STRING
309+
)
310+
file_path = read_file_node.path
311+
ne = SnowparkClientExceptionMessages.DF_XML_ROW_TAG_NOT_FOUND(
312+
row_tag, file_path
313+
)
314+
raise ne.with_traceback(tb) from None
315+
# when the describe query fails, the arg is a query string
316+
elif isinstance(arg, str):
317+
if f'"{XML_ROW_DATA_COLUMN_NAME}"' in arg:
318+
ne = (
319+
SnowparkClientExceptionMessages.DF_XML_ROW_TAG_NOT_FOUND()
320+
)
321+
raise ne.with_traceback(tb) from None
322+
323+
ne = SnowparkClientExceptionMessages.SQL_EXCEPTION_FROM_PROGRAMMING_ERROR(
324+
e
325+
)
326+
raise ne.with_traceback(tb) from None
285327

286328
return wrap
287329

src/snowflake/snowpark/_internal/error_message.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,17 @@ def DF_COPY_INTO_CANNOT_CREATE_TABLE(
127127
f"Cannot create the target table {table_name} because Snowpark cannot determine the column names to use. You should create the table before calling copy_into_table()."
128128
)
129129

130+
@staticmethod
131+
def DF_XML_ROW_TAG_NOT_FOUND(
132+
row_tag: Optional[str] = None,
133+
file_path: Optional[str] = None,
134+
) -> SnowparkDataframeReaderException:
135+
if row_tag is not None and file_path is not None:
136+
msg = f"Cannot find the row tag '{row_tag}' in the XML file {file_path}."
137+
else:
138+
msg = "Cannot find the row tag in the XML file."
139+
return SnowparkDataframeReaderException(msg)
140+
130141
@staticmethod
131142
def DF_CROSS_TAB_COUNT_TOO_LARGE(
132143
count: int, max_count: int

src/snowflake/snowpark/_internal/type_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ def convert_sf_to_sp_type(
204204
return ArrayType(semi_structured_fill)
205205
if column_type_name == "VARIANT":
206206
return VariantType()
207+
if context._should_use_structured_type_semantics() and column_type_name == "OBJECT":
208+
return StructType()
207209
if column_type_name in {"OBJECT", "MAP"}:
208210
return MapType(semi_structured_fill, semi_structured_fill)
209211
if column_type_name == "GEOGRAPHY":
@@ -690,6 +692,10 @@ def python_type_to_snow_type(
690692
if tp_args
691693
else None
692694
)
695+
if (
696+
key_type is None or value_type is None
697+
) and context._should_use_structured_type_semantics():
698+
return StructType(), False
693699
return MapType(key_type, value_type), False
694700

695701
if installed_pandas:

src/snowflake/snowpark/_internal/udf_utils.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import typing
1111
import zipfile
1212
from copy import deepcopy
13+
from enum import Enum
1314
from logging import getLogger
1415
from types import ModuleType
1516
from typing import (
@@ -112,6 +113,13 @@ class UDFColumn(NamedTuple):
112113
name: str
113114

114115

116+
class RegistrationType(Enum):
117+
UDF = "UDF"
118+
UDAF = "UDAF"
119+
UDTF = "UDTF"
120+
SPROC = "SPROC"
121+
122+
115123
class ExtensionFunctionProperties:
116124
"""
117125
This is a data class to hold all information, resolved or otherwise, about a UDF/UDTF/UDAF/Sproc object
@@ -1266,6 +1274,7 @@ def create_python_udf_or_sp(
12661274
replace: bool,
12671275
if_not_exists: bool,
12681276
raw_imports: Optional[List[Union[str, Tuple[str, str]]]],
1277+
registration_type: RegistrationType,
12691278
inline_python_code: Optional[str] = None,
12701279
execute_as: Optional[typing.Literal["caller", "owner", "restricted caller"]] = None,
12711280
api_call_source: Optional[str] = None,
@@ -1288,7 +1297,12 @@ def create_python_udf_or_sp(
12881297

12891298
if replace and if_not_exists:
12901299
raise ValueError("options replace and if_not_exists are incompatible")
1291-
if isinstance(return_type, StructType) and not return_type.structured:
1300+
1301+
if (
1302+
isinstance(return_type, StructType)
1303+
and not return_type.structured
1304+
and registration_type in {RegistrationType.UDTF, RegistrationType.SPROC}
1305+
):
12921306
return_sql = f'RETURNS TABLE ({",".join(f"{field.name} {convert_sp_to_sf_type(field.datatype)}" for field in return_type.fields)})'
12931307
elif installed_pandas and isinstance(return_type, PandasDataFrameType):
12941308
return_sql = f'RETURNS TABLE ({",".join(f"{name} {convert_sp_to_sf_type(datatype)}" for name, datatype in zip(return_type.col_names, return_type.col_types))})'

src/snowflake/snowpark/_internal/utils.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,8 @@
200200
XML_ROW_TAG_STRING = "ROWTAG"
201201
XML_ROW_DATA_COLUMN_NAME = "ROW_DATA"
202202
XML_READER_FILE_PATH = os.path.join(os.path.dirname(__file__), "xml_reader.py")
203+
XML_READER_API_SIGNATURE = "DataFrameReader.xml[rowTag]"
204+
XML_READER_SQL_COMMENT = f"/* Python:snowflake.snowpark.{XML_READER_API_SIGNATURE} */"
203205

204206
QUERY_TAG_STRING = "QUERY_TAG"
205207
SKIP_LEVELS_TWO = (

0 commit comments

Comments
 (0)