snowflakedb · sfc-gh-aling · Oct 15, 2025 · Oct 8, 2025 · Oct 13, 2025 · Oct 13, 2025
@@ -3,7 +3,7 @@
 #
 from enum import Enum
 import datetime
-from typing import List, Callable, Any, Optional, TYPE_CHECKING
+from typing import Dict, List, Callable, Any, Optional, TYPE_CHECKING
 from snowflake.connector.options import pandas as pd
 
 from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
@@ -152,6 +152,7 @@ def udtf_ingestion(
         packages: Optional[List[str]] = None,
         session_init_statement: Optional[List[str]] = None,
         query_timeout: Optional[int] = 0,
+        statement_params: Optional[Dict[str, str]] = None,
         _emit_ast: bool = True,
     ) -> "snowflake.snowpark.DataFrame":
         from snowflake.snowpark._internal.data_source.utils import UDTF_PACKAGE_MAP
@@ -175,6 +176,7 @@ def udtf_ingestion(
                 external_access_integrations=[external_access_integrations],
                 packages=packages or UDTF_PACKAGE_MAP.get(self.dbms_type),
                 imports=imports,
+                statement_params=statement_params,
             )
         logger.debug(f"register ingestion udtf takes: {udtf_register_time()} seconds")
         call_udtf_sql = f"""

@@ -549,3 +549,21 @@ def process_parquet_queue_with_threads(
     )
 
     return fetch_to_local_end_time, upload_to_sf_start_time, upload_to_sf_end_time
+
+
+def track_data_source_statement_params(
+    dataframe, statement_params: Optional[Dict] = None
+) -> Optional[Dict]:
+    """
+    Helper method to initialize and update data source tracking statement_params based on dataframe attributes.
+    """
+    statement_params = statement_params or {}
+    if (
+        dataframe._plan
+        and dataframe._plan.api_calls
+        and dataframe._plan.api_calls[0].get("name") == DATA_SOURCE_DBAPI_SIGNATURE
+    ):
+        # Track data source ingestion
+        statement_params[STATEMENT_PARAMS_DATA_SOURCE] = "1"
+
+    return statement_params if statement_params else None
@@ -172,6 +172,9 @@
     string_half_width,
     warning,
 )
+from snowflake.snowpark._internal.data_source.utils import (
+    track_data_source_statement_params,
+)
 from snowflake.snowpark.async_job import AsyncJob, _AsyncResultType
 from snowflake.snowpark.column import Column, _to_col_if_sql_expr, _to_col_if_str
 from snowflake.snowpark.dataframe_ai_functions import DataFrameAIFunctions
@@ -836,6 +839,9 @@ def _internal_collect_with_tag_no_telemetry(
         # When executing a DataFrame in any method of snowpark (either public or private),
         # we should always call this method instead of collect(), to make sure the
         # query tag is set properly.
+        statement_params = track_data_source_statement_params(
+            self, statement_params or self._statement_params
+        )
         return self._session._conn.execute(
             self._plan,
             block=block,

@@ -1876,7 +1876,11 @@ def create_sqlite_connection(timeout=5.0, isolation_level=None, **kwargs):
             partitions_table = random_name_for_temp_object(TempObjectType.TABLE)
             self._session.create_dataframe(
                 [[query] for query in partitioned_queries], schema=["partition"]
-            ).write.save_as_table(partitions_table, table_type="temp")
+            ).write.save_as_table(
+                partitions_table,
+                table_type="temp",
+                statement_params=statements_params_for_telemetry,
+            )
             df = partitioner.driver.udtf_ingestion(
                 self._session,
                 struct_schema,
@@ -1887,7 +1891,8 @@ def create_sqlite_connection(timeout=5.0, isolation_level=None, **kwargs):
                 packages=udtf_configs.get("packages", None),
                 session_init_statement=session_init_statement,
                 query_timeout=query_timeout,
-                _emit_ast=_emit_ast,
+                statement_params=statements_params_for_telemetry,
+                _emit_ast=False,  # internal API, no need to emit AST
             )
             end_time = time.perf_counter()
             telemetry_json_string["end_to_end_duration"] = end_time - start_time

@@ -27,8 +27,7 @@
     build_table_name,
 )
 from snowflake.snowpark._internal.data_source.utils import (
-    STATEMENT_PARAMS_DATA_SOURCE,
-    DATA_SOURCE_DBAPI_SIGNATURE,
+    track_data_source_statement_params,
 )
 from snowflake.snowpark._internal.open_telemetry import open_telemetry_context_manager
 from snowflake.snowpark._internal.telemetry import (
@@ -109,24 +108,6 @@ def __init__(
             self._ast = writer
             dataframe._set_ast_ref(self._ast.dataframe_writer.df)
 
-    @staticmethod
-    def _track_data_source_statement_params(
-        dataframe, statement_params: Optional[Dict] = None
-    ) -> Optional[Dict]:
-        """
-        Helper method to initialize and update data source tracking statement_params based on dataframe attributes.
-        """
-        statement_params = statement_params or {}
-        if (
-            dataframe._plan
-            and dataframe._plan.api_calls
-            and dataframe._plan.api_calls[0].get("name") == DATA_SOURCE_DBAPI_SIGNATURE
-        ):
-            # Track data source ingestion
-            statement_params[STATEMENT_PARAMS_DATA_SOURCE] = "1"
-
-        return statement_params if statement_params else None
-
     @publicapi
     def mode(self, save_mode: str, _emit_ast: bool = True) -> "DataFrameWriter":
         """Set the save mode of this :class:`DataFrameWriter`.
@@ -372,7 +353,7 @@ def save_as_table(
             >>> df.write.mode("overwrite").save_as_table("my_table", iceberg_config=iceberg_config) # doctest: +SKIP
         """
 
-        statement_params = self._track_data_source_statement_params(
+        statement_params = track_data_source_statement_params(
             self._dataframe, statement_params or self._dataframe._statement_params
         )
         if _emit_ast and self._ast is not None:
@@ -688,7 +669,7 @@ def _internal_copy_into_location(
         # This method is not intended to be used directly by users.
         # AST.
         kwargs = {}
-        statement_params = self._track_data_source_statement_params(
+        statement_params = track_data_source_statement_params(
             self._dataframe, statement_params or self._dataframe._statement_params
         )
         if _emit_ast and self._ast is not None:

@@ -444,7 +444,7 @@ def test_telemetry_tracking(caplog, session, fetch_with_process):
     called, comment_showed = 0, 0
 
     def assert_datasource_statement_params_run_query(*args, **kwargs):
-        # assert we set statement_parameters to track datasourcee api usage
+        # assert we set statement_parameters to track datasource api usage
         nonlocal comment_showed
         statement_parameters = kwargs.get("_statement_params")
         query = args[0]
@@ -496,6 +496,55 @@ def assert_datasource_statement_params_run_query(*args, **kwargs):
         assert called == 2
 
 
+def test_telemetry_tracking_for_udtf(caplog, session):
+
+    original_func = session._conn.run_query
+    called = 0
+
+    def assert_datasource_statement_params_run_query(*args, **kwargs):
+        # assert we set statement_parameters to track datasource udtf api usage
+        assert kwargs.get("_statement_params")[STATEMENT_PARAMS_DATA_SOURCE] == "1"
+        nonlocal called
+        called += 1
+        return original_func(*args, **kwargs)
+
+    def create_connection():
+        class FakeConnection:
+            def cursor(self):
+                class FakeCursor:
+                    def execute(self, query):
+                        pass
+
+                    @property
+                    def description(self):
+                        return [("c1", int, None, None, None, None, None)]
+
+                    def fetchmany(self, *args, **kwargs):
+                        return None
+
+                return FakeCursor()
+
+        return FakeConnection()
+
+    df = session.read.dbapi(
+        create_connection,
+        table="Fake",
+        custom_schema="c1 INT",
+        udtf_configs={
+            "external_access_integration": ORACLEDB_TEST_EXTERNAL_ACCESS_INTEGRATION,
+            "packages": ["snowflake-snowpark-python"],
+        },
+    )
+    with mock.patch(
+        "snowflake.snowpark._internal.server_connection.ServerConnection.run_query",
+        side_effect=assert_datasource_statement_params_run_query,
+    ):
+        df.select("*").collect()
+    assert (
+        "'name': 'DataFrameReader.dbapi'" in str(df._plan.api_calls[0]) and called == 1
+    )
+
+
 @pytest.mark.skipif(
     IS_WINDOWS,
     reason="sqlite3 file can not be shared accorss processes on windows",