Skip to content

Commit 01b1dbf

Browse files
SNOW-2097586: Add Debug mode for eager evaluation (#3380)
Co-authored-by: graphite-app[bot] <96075541+graphite-app[bot]@users.noreply.github.com>
1 parent a227fd4 commit 01b1dbf

File tree

8 files changed

+136
-6
lines changed

8 files changed

+136
-6
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44

55
### Snowpark Python API Updates
66

7+
#### New Features
8+
9+
- Added debuggability improvements to eagerly validate dataframe schema metadata. Enable it using `snowflake.snowpark.context.configure_development_features()`.
10+
711
#### Improvements
812

913
- Added support for row validation using XSD schema using `rowValidationXSDPath` option when reading XML files with a row tag using `rowTag` option.

src/snowflake/snowpark/_internal/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@
5555
from snowflake.connector.options import MissingOptionalDependency, ModuleLikeObject
5656
from snowflake.connector.version import VERSION as connector_version
5757
from snowflake.snowpark._internal.error_message import SnowparkClientExceptionMessages
58-
from snowflake.snowpark.context import _should_use_structured_type_semantics
5958
from snowflake.snowpark.row import Row
6059
from snowflake.snowpark.version import VERSION as snowpark_version
6160

@@ -760,6 +759,8 @@ def _parse_result_meta(
760759
an expected format. For example StructType columns are returned as dict objects, but are better
761760
represented as Row objects.
762761
"""
762+
from snowflake.snowpark.context import _should_use_structured_type_semantics
763+
763764
if not result_meta:
764765
return None, None
765766
col_names = []

src/snowflake/snowpark/context.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import threading
1212

1313
_logger = logging.getLogger(__name__)
14+
1415
_use_scoped_temp_objects = True
1516

1617
# This is an internal-only global flag, used to determine whether to execute code in a client's local sandbox or connect to a Snowflake account.
@@ -33,6 +34,7 @@
3334

3435
# Following are internal-only global flags, used to enable development features.
3536
_enable_dataframe_trace_on_error = False
37+
_debug_eager_schema_validation = False
3638

3739
# This is an internal-only global flag, used to determine whether to enable query line tracking for tracing sql compilation errors.
3840
_enable_trace_sql_errors_to_dataframe = False
@@ -41,6 +43,7 @@
4143
def configure_development_features(
4244
*,
4345
enable_dataframe_trace_on_error: bool = True,
46+
enable_eager_schema_validation: bool = True,
4447
enable_trace_sql_errors_to_dataframe: bool = True,
4548
) -> None:
4649
"""
@@ -50,6 +53,8 @@ def configure_development_features(
5053
enable_dataframe_trace_on_error: If True, upon failure, we will add most recent dataframe
5154
operations to the error trace. This requires AST collection to be enabled in the
5255
session which can be done using `session.ast_enabled = True`.
56+
enable_eager_schema_validation: If True, dataframe schemas are eagerly validated by querying
57+
for column metadata after every dataframe operation. This adds additional query overhead.
5358
enable_trace_sql_errors_to_dataframe: If True, we will enable query line tracking.
5459
Note:
5560
This feature is experimental since 1.33.0. Do not use it in production.
@@ -58,7 +63,9 @@ def configure_development_features(
5863
"configure_development_features() is experimental since 1.33.0. Do not use it in production.",
5964
)
6065
global _enable_dataframe_trace_on_error, _enable_trace_sql_errors_to_dataframe
66+
global _debug_eager_schema_validation
6167
_enable_dataframe_trace_on_error = enable_dataframe_trace_on_error
68+
_debug_eager_schema_validation = enable_eager_schema_validation
6269
_enable_trace_sql_errors_to_dataframe = enable_trace_sql_errors_to_dataframe
6370

6471

src/snowflake/snowpark/dataframe.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
)
2828

2929
import snowflake.snowpark
30+
import snowflake.snowpark.context as context
3031
import snowflake.snowpark._internal.proto.generated.ast_pb2 as proto
3132
from snowflake.connector.options import installed_pandas, pandas, pyarrow
3233

@@ -645,6 +646,11 @@ def __init__(
645646

646647
self._alias: Optional[str] = None
647648

649+
if context._debug_eager_schema_validation:
650+
# Getting the plan attributes may run a describe query
651+
# and populates the schema for the dataframe.
652+
self._plan.attributes
653+
648654
def _set_ast_ref(self, dataframe_expr_builder: Any) -> None:
649655
"""
650656
Given a field builder expression of the AST type Expr, points the builder to reference this dataframe.

tests/integ/conftest.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,9 @@ def session(
285285
session.ast_enabled = ast_enabled
286286
if not session._generate_multiline_queries:
287287
session._enable_multiline_queries()
288-
context.configure_development_features(enable_trace_sql_errors_to_dataframe=True)
288+
context.configure_development_features(
289+
enable_trace_sql_errors_to_dataframe=True, enable_eager_schema_validation=False
290+
)
289291

290292
if (RUNNING_ON_GH or RUNNING_ON_JENKINS) and not local_testing_mode:
291293
set_up_external_access_integration_resources(

tests/integ/test_df_debug_trace.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,16 @@
3030
@pytest.fixture(autouse=True)
3131
def setup(request, session):
3232
original = session.ast_enabled
33-
context.configure_development_features(enable_dataframe_trace_on_error=True)
33+
context.configure_development_features(
34+
enable_dataframe_trace_on_error=True, enable_eager_schema_validation=False
35+
)
3436
set_ast_state(AstFlagSource.TEST, True)
3537
if SNOWPARK_PYTHON_DATAFRAME_TRANSFORM_TRACE_LENGTH in os.environ:
3638
del os.environ[SNOWPARK_PYTHON_DATAFRAME_TRANSFORM_TRACE_LENGTH]
3739
yield
38-
context.configure_development_features(enable_dataframe_trace_on_error=False)
40+
context.configure_development_features(
41+
enable_dataframe_trace_on_error=False, enable_eager_schema_validation=False
42+
)
3943
set_ast_state(AstFlagSource.TEST, original)
4044
if SNOWPARK_PYTHON_DATAFRAME_TRANSFORM_TRACE_LENGTH in os.environ:
4145
del os.environ[SNOWPARK_PYTHON_DATAFRAME_TRANSFORM_TRACE_LENGTH]
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#!/usr/bin/env python3
2+
#
3+
# Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
4+
#
5+
6+
import pytest
7+
import snowflake.snowpark.context as context
8+
from copy import copy
9+
from unittest.mock import patch, Mock
10+
11+
12+
from snowflake.snowpark.functions import col, lit, max
13+
14+
15+
@pytest.mark.skipif(
16+
"config.getoption('local_testing_mode', default=False)",
17+
reason="debug_mode not used in local testing mode",
18+
)
19+
@pytest.mark.parametrize("debug_mode", [True, False])
20+
@pytest.mark.parametrize(
21+
"transform",
22+
[
23+
pytest.param(lambda x: copy(x), id="copy"),
24+
pytest.param(lambda x: x.to_df(["C", "D"]), id="to_df"),
25+
pytest.param(lambda x: x.distinct(), id="distinct"),
26+
pytest.param(lambda x: x.drop_duplicates(), id="drop_duplicates"),
27+
pytest.param(lambda x: x.limit(1), id="limit"),
28+
pytest.param(lambda x: x.union(x), id="union"),
29+
pytest.param(lambda x: x.union_all(x), id="union_all"),
30+
pytest.param(lambda x: x.union_by_name(x), id="union_by_name"),
31+
pytest.param(lambda x: x.union_all_by_name(x), id="union_all_by_name"),
32+
pytest.param(lambda x: x.intersect(x), id="intersect"),
33+
pytest.param(lambda x: x.natural_join(x), id="natural_join"),
34+
pytest.param(lambda x: x.cross_join(x), id="cross_join"),
35+
pytest.param(lambda x: x.sample(n=1), id="sample"),
36+
pytest.param(
37+
lambda x: x.with_column_renamed(col("A"), "B"), id="with_column_renamed"
38+
),
39+
# Unpivot already validates names
40+
pytest.param(lambda x: x.unpivot("x", "y", ["A"]), id="unpivot"),
41+
# The following functions do not error early because their schema_query do not contain
42+
# information about the transformation being called.
43+
pytest.param(lambda x: x.drop(col("A")), id="drop"),
44+
pytest.param(lambda x: x.filter(col("A") == lit(1)), id="filter"),
45+
pytest.param(lambda x: x.sort(col("A").desc()), id="sort"),
46+
],
47+
)
48+
def test_early_attributes(session, transform, debug_mode):
49+
with patch.object(context, "_debug_eager_schema_validation", debug_mode):
50+
df = session.create_dataframe([(1, "A"), (2, "B"), (3, "C")], ["A", "B"])
51+
52+
transformed = transform(df)
53+
54+
# When debug mode is enabled the dataframe plan attributes are populated early
55+
if debug_mode:
56+
assert transformed._plan._metadata.attributes is not None
57+
else:
58+
assert transformed._plan._metadata.attributes is None
59+
60+
61+
@pytest.mark.skipif(
62+
"config.getoption('local_testing_mode', default=False)",
63+
reason="debug_mode not used in local testing mode",
64+
)
65+
@pytest.mark.parametrize("debug_mode", [True, False])
66+
@pytest.mark.parametrize(
67+
"transform",
68+
[
69+
pytest.param(lambda x: x.select("B"), id="select"),
70+
pytest.param(lambda x: x.select_expr("cast(b as str)"), id="select_expr"),
71+
pytest.param(lambda x: x.agg(max("B")), id="agg"),
72+
pytest.param(lambda x: x.join(copy(x), on=(col("A") == col("B"))), id="join"),
73+
pytest.param(
74+
lambda x: x.join_table_function("flatten", col("B")),
75+
id="join_table_function",
76+
),
77+
pytest.param(lambda x: x.with_column("C", col("B")), id="with_column"),
78+
pytest.param(lambda x: x.with_columns(["C"], [col("B")]), id="with_columns"),
79+
],
80+
)
81+
def test_early_error(session, transform, debug_mode):
82+
with patch.object(context, "_debug_eager_schema_validation", debug_mode):
83+
df = session.create_dataframe([1, 2, 3], ["A"])
84+
85+
show_mock = Mock()
86+
show_mock.__qualname__ = "show"
87+
show_mock.__name__ = "show"
88+
89+
with patch("snowflake.snowpark.dataframe.DataFrame.show", show_mock):
90+
try:
91+
transformed = transform(df)
92+
transformed.show()
93+
except Exception:
94+
pass
95+
# When debug mode is enabled the error is thrown before reaching show.
96+
# Without debug mode the error only shows up once show is called.
97+
if debug_mode:
98+
show_mock.assert_not_called()
99+
assert df._plan._metadata.attributes is not None
100+
else:
101+
show_mock.assert_called()
102+
assert df._plan._metadata.attributes is None

tests/unit/test_selectable_queries.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,13 @@
2525

2626
@pytest.fixture(autouse=True)
2727
def setup(request):
28-
context.configure_development_features(enable_trace_sql_errors_to_dataframe=True)
28+
context.configure_development_features(
29+
enable_trace_sql_errors_to_dataframe=True, enable_eager_schema_validation=False
30+
)
2931
yield
30-
context.configure_development_features(enable_trace_sql_errors_to_dataframe=False)
32+
context.configure_development_features(
33+
enable_trace_sql_errors_to_dataframe=False, enable_eager_schema_validation=False
34+
)
3135

3236

3337
def test_select_statement_sql_query(mock_session, mock_analyzer):

0 commit comments

Comments
 (0)