Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions DESCRIPTION.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ Source code is also available at: https://github.com/snowflakedb/snowflake-conne
# Release Notes
- v4.2.0(TBD)
- Added support for async I/O. Asynchronous version of connector is available via `snowflake.connector.aio` module.
- Added `SnowflakeCursor.stats` property to expose granular DML statistics (rows inserted, deleted, updated, and duplicates) for operations like CTAS where `rowcount` is insufficient.

- v4.1.1(TBD)
- Relaxed pandas dependency requirements for Python below 3.12.
- Changed CRL cache cleanup background task to daemon to avoid blocking main thread.
Expand Down
4 changes: 4 additions & 0 deletions src/snowflake/connector/aio/_cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,10 @@ async def _init_result_and_meta(self, data: dict[Any, Any]) -> None:
self._rownumber = -1
self._result_state = ResultState.VALID

# Extract stats object if available (for DML operations like CTAS, INSERT, UPDATE, DELETE)
self._stats_data = data.get("stats", None)
logger.debug("Execution DML stats: %s", self.stats)

# don't update the row count when the result is returned from `describe` method
if is_dml and "rowset" in data and len(data["rowset"]) > 0:
updated_rows = 0
Expand Down
48 changes: 48 additions & 0 deletions src/snowflake/connector/cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,10 @@ def __init__(
self._log_max_query_length = connection.log_max_query_length
self._inner_cursor: SnowflakeCursorBase | None = None
self._prefetch_hook = None
self._stats_data: dict[str, int] | None = (
None # Stores stats from response for DML operations
)

self._rownumber: int | None = None

self.reset()
Expand Down Expand Up @@ -454,6 +458,23 @@ def _description_internal(self) -> list[ResultMetadataV2]:
def rowcount(self) -> int | None:
return self._total_rowcount if self._total_rowcount >= 0 else None

@property
def stats(self) -> QueryResultStats | None:
"""Returns detailed rows affected statistics for DML operations.

Returns a NamedTuple with fields:
- num_rows_inserted: Number of rows inserted
- num_rows_deleted: Number of rows deleted
- num_rows_updated: Number of rows updated
- num_dml_duplicates: Number of duplicates in DML statement

Returns None on each position if no DML stats are available - this includes DML operations where no rows were
affected as well as other type of SQL statements (e.g. DDL, DQL).
"""
if self._stats_data is None:
return QueryResultStats(None, None, None, None)
return QueryResultStats.from_dict(self._stats_data)

@property
def rownumber(self) -> int | None:
return self._rownumber if self._rownumber >= 0 else None
Expand Down Expand Up @@ -1201,6 +1222,10 @@ def _init_result_and_meta(self, data: dict[Any, Any]) -> None:
self._rownumber = -1
self._result_state = ResultState.VALID

# Extract stats object if available (for DML operations like CTAS, INSERT, UPDATE, DELETE)
self._stats_data = data.get("stats", None)
logger.debug("Execution DML stats: %s", self.stats)

# don't update the row count when the result is returned from `describe` method
if is_dml and "rowset" in data and len(data["rowset"]) > 0:
updated_rows = 0
Expand Down Expand Up @@ -2007,3 +2032,26 @@ def __getattr__(name):
)
return None
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


class QueryResultStats(NamedTuple):
"""
Statistics for rows affected by a DML operation.
None value expresses particular statistic being unknown - not returned by the backend service.

Added in the first place to expose DML data of CTAS statements - SNOW-295953
"""

num_rows_inserted: int | None = None
num_rows_deleted: int | None = None
num_rows_updated: int | None = None
num_dml_duplicates: int | None = None

@classmethod
def from_dict(cls, stats_dict: dict[str, int]) -> QueryResultStats:
return cls(
num_rows_inserted=stats_dict.get("numRowsInserted", None),
num_rows_deleted=stats_dict.get("numRowsDeleted", None),
num_rows_updated=stats_dict.get("numRowsUpdated", None),
num_dml_duplicates=stats_dict.get("numDmlDuplicates", None),
)
Loading
Loading