snowflakedb
diff --git a/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/snowflake/snowpark/_internal/analyzer/schema_utils.py‎
Lines changed: 23 additions & 12 deletions b/‎src/snowflake/snowpark/_internal/analyzer/schema_utils.py‎
Lines changed: 23 additions & 12 deletions
diff --git a/‎src/snowflake/snowpark/_internal/analyzer/select_statement.py‎
Lines changed: 3 additions & 3 deletions b/‎src/snowflake/snowpark/_internal/analyzer/select_statement.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/snowflake/snowpark/_internal/analyzer/snowflake_plan.py‎
Lines changed: 4 additions & 2 deletions b/‎src/snowflake/snowpark/_internal/analyzer/snowflake_plan.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎src/snowflake/snowpark/_internal/compiler/plan_compiler.py‎
Lines changed: 64 additions & 69 deletions b/‎src/snowflake/snowpark/_internal/compiler/plan_compiler.py‎
Lines changed: 64 additions & 69 deletions
diff --git a/‎src/snowflake/snowpark/_internal/data_source/drivers/base_driver.py‎
Lines changed: 20 additions & 18 deletions b/‎src/snowflake/snowpark/_internal/data_source/drivers/base_driver.py‎
Lines changed: 20 additions & 18 deletions
@@ -16,6 +16,7 @@
   - `xpath_long`
   - `xpath_short`
 - Added support for parameter `use_vectorized_scanner` in function `Session.write_arrow()`.
+- Dataframe profiler adds the following information about each query: describe query time, execution time, and sql query text. To view this information, call session.dataframe_profiler.enable() and call get_execution_profile on a dataframe.
 
 #### Bug Fixes
 
 
@@ -1,9 +1,8 @@
 #
 # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
 #
-import time
 import traceback
-from typing import TYPE_CHECKING, List, Union
+from typing import TYPE_CHECKING, List, Union, Optional
 
 import snowflake.snowpark
 from snowflake.connector.cursor import ResultMetadata, SnowflakeCursor
@@ -12,7 +11,7 @@
 )
 from snowflake.snowpark._internal.analyzer.expression import Attribute
 from snowflake.snowpark._internal.type_utils import convert_metadata_to_sp_type
-from snowflake.snowpark._internal.utils import ttl_cache
+from snowflake.snowpark._internal.utils import ttl_cache, measure_time
 from snowflake.snowpark.types import DecimalType, LongType, StringType
 
 if TYPE_CHECKING:
@@ -68,7 +67,9 @@ def get_attributes() -> List[Attribute]:
 
 
 def analyze_attributes(
-    sql: str, session: "snowflake.snowpark.session.Session"
+    sql: str,
+    session: "snowflake.snowpark.session.Session",
+    dataframe_uuid: Optional[str] = None,
 ) -> List[Attribute]:
     lowercase = sql.strip().lower()
 
@@ -87,29 +88,39 @@ def analyze_attributes(
     if lowercase.startswith("get"):
         return get_attributes()
     if lowercase.startswith("describe"):
-        session._run_query(sql)
+        with measure_time() as e2e_time:
+            session._run_query(sql)
+        # Add the time taken to describe the dataframe to query history
+        if dataframe_uuid:
+            session.dataframe_profiler.add_describe_query_time(
+                dataframe_uuid, sql, e2e_time()
+            )
+
         return convert_result_meta_to_attribute(
             session._conn._cursor.description, session._conn.max_string_size
         )
 
-    # collect describe query details for telemetry
+    # collect describe query details for telemetry and dataframe profiling
     stack = traceback.extract_stack(limit=10)[:-1]
     stack_trace = [frame.line for frame in stack] if len(stack) > 0 else None
-    start_time = time.time()
-    attributes = session._get_result_attributes(sql)
-    e2e_time = time.time() - start_time
+    with measure_time() as e2e_time:
+        attributes = session._get_result_attributes(sql)
     session._conn._telemetry_client.send_describe_query_details(
-        session._session_id, sql, e2e_time, stack_trace
+        session._session_id, sql, e2e_time(), stack_trace
     )
+    if dataframe_uuid:
+        session.dataframe_profiler.add_describe_query_time(
+            dataframe_uuid, sql, e2e_time()
+        )
 
     return attributes
 
 
 @ttl_cache(ttl_seconds=15)
 def cached_analyze_attributes(
-    sql: str, session: "snowflake.snowpark.session.Session"  # type: ignore
+    sql: str, session: "snowflake.snowpark.session.Session", dataframe_uuid: Optional[str] = None  # type: ignore
 ) -> List[Attribute]:
-    return analyze_attributes(sql, session)
+    return analyze_attributes(sql, session, dataframe_uuid)
 
 
 def convert_result_meta_to_attribute(
 
@@ -642,9 +642,9 @@ def children_plan_nodes(self) -> List[Union["Selectable", SnowflakePlan]]:
 
 @SnowflakePlan.Decorator.wrap_exception
 def _analyze_attributes(
-    sql: str, session: "snowflake.snowpark.session.Session"  # type: ignore
+    sql: str, session: "snowflake.snowpark.session.Session", dataframe_uuid: Optional[str] = None  # type: ignore
 ) -> List[Attribute]:
-    return analyze_attributes(sql, session)
+    return analyze_attributes(sql, session, dataframe_uuid)
 
 
 class SelectSQL(Selectable):
@@ -677,7 +677,7 @@ def __init__(
                 self.pre_actions[0].query_id_place_holder
             )
             self._schema_query = analyzer_utils.schema_value_statement(
-                _analyze_attributes(sql, self._session)
+                _analyze_attributes(sql, self._session, self._uuid)
             )  # Change to subqueryable schema query so downstream query plan can describe the SQL
             self._query_param = None
         else:
 
@@ -588,9 +588,9 @@ def _analyze_attributes(self) -> List[Attribute]:
             self.schema_query is not None
         ), "No schema query is available for the SnowflakePlan"
         if self.session.reduce_describe_query_enabled:
-            return cached_analyze_attributes(self.schema_query, self.session)
+            return cached_analyze_attributes(self.schema_query, self.session, self.uuid)
         else:
-            return analyze_attributes(self.schema_query, self.session)
+            return analyze_attributes(self.schema_query, self.session, self.uuid)
 
     @property
     def attributes(self) -> List[Attribute]:
@@ -725,6 +725,8 @@ def set_last_query_line_intervals(self) -> None:
             self.uuid,
         )
         final_sql = remove_comments(last_query.sql, child_uuids)
+        if self.schema_query:
+            self.schema_query = remove_comments(self.schema_query, child_uuids)
         last_query.sql = final_sql
         last_query.query_line_intervals = query_line_intervals
 
 
@@ -4,7 +4,6 @@
 
 import copy
 import logging
-import time
 from typing import Any, Dict, List
 
 from snowflake.snowpark._internal.analyzer.query_plan_analysis_utils import (
@@ -30,7 +29,7 @@
     plot_plan_if_enabled,
 )
 from snowflake.snowpark._internal.telemetry import TelemetryField
-from snowflake.snowpark._internal.utils import random_name_for_temp_object
+from snowflake.snowpark._internal.utils import measure_time, random_name_for_temp_object
 from snowflake.snowpark.mock._connection import MockServerConnection
 
 _logger = logging.getLogger(__name__)
@@ -90,79 +89,75 @@ def compile(self) -> Dict[PlanQueryType, List[Query]]:
         if self.should_start_query_compilation():
             session = self._plan.session
             try:
-                # preparation for compilation
-                # 1. make a copy of the original plan
-                start_time = time.time()
-                complexity_score_before_compilation = get_complexity_score(self._plan)
-                logical_plans: List[LogicalPlan] = [copy.deepcopy(self._plan)]
-                plot_plan_if_enabled(self._plan, "original_plan")
-                plot_plan_if_enabled(logical_plans[0], "deep_copied_plan")
-                deep_copy_end_time = time.time()
-
-                # 2. create a code generator with the original plan
-                query_generator = create_query_generator(self._plan)
-
-                extra_optimization_status: Dict[str, Any] = {}
-                # 3. apply each optimizations if needed
-                # CTE optimization
-                cte_start_time = time.time()
-                if session.cte_optimization_enabled:
-                    repeated_subquery_eliminator = RepeatedSubqueryElimination(
-                        logical_plans, query_generator
-                    )
-                    elimination_result = repeated_subquery_eliminator.apply()
-                    logical_plans = elimination_result.logical_plans
-                    # add the extra repeated subquery elimination status
-                    extra_optimization_status[
-                        CompilationStageTelemetryField.CTE_NODE_CREATED.value
-                    ] = elimination_result.total_num_of_ctes
-
-                cte_end_time = time.time()
-                complexity_scores_after_cte = [
-                    get_complexity_score(logical_plan) for logical_plan in logical_plans
-                ]
-                for i, plan in enumerate(logical_plans):
-                    plot_plan_if_enabled(plan, f"cte_optimized_plan_{i}")
-
-                # Large query breakdown
-                breakdown_summary, skipped_summary = {}, {}
-                if session.large_query_breakdown_enabled:
-                    large_query_breakdown = LargeQueryBreakdown(
-                        session,
-                        query_generator,
-                        logical_plans,
-                        session.large_query_breakdown_complexity_bounds,
-                    )
-                    breakdown_result = large_query_breakdown.apply()
-                    logical_plans = breakdown_result.logical_plans
-                    breakdown_summary = breakdown_result.breakdown_summary
-                    skipped_summary = breakdown_result.skipped_summary
-
-                large_query_breakdown_end_time = time.time()
-                complexity_scores_after_large_query_breakdown = [
-                    get_complexity_score(logical_plan) for logical_plan in logical_plans
-                ]
-                for i, plan in enumerate(logical_plans):
-                    plot_plan_if_enabled(plan, f"large_query_breakdown_plan_{i}")
-
-                # 4. do a final pass of code generation
-                queries = query_generator.generate_queries(logical_plans)
+                with measure_time() as total_time:
+                    # preparation for compilation
+                    # 1. make a copy of the original plan
+                    with measure_time() as deep_copy_time:
+                        complexity_score_before_compilation = get_complexity_score(
+                            self._plan
+                        )
+                        logical_plans: List[LogicalPlan] = [copy.deepcopy(self._plan)]
+                        plot_plan_if_enabled(self._plan, "original_plan")
+                        plot_plan_if_enabled(logical_plans[0], "deep_copied_plan")
+
+                    # 2. create a code generator with the original plan
+                    query_generator = create_query_generator(self._plan)
+
+                    extra_optimization_status: Dict[str, Any] = {}
+                    # 3. apply each optimizations if needed
+                    # CTE optimization
+                    with measure_time() as cte_time:
+                        if session.cte_optimization_enabled:
+                            repeated_subquery_eliminator = RepeatedSubqueryElimination(
+                                logical_plans, query_generator
+                            )
+                            elimination_result = repeated_subquery_eliminator.apply()
+                            logical_plans = elimination_result.logical_plans
+                            # add the extra repeated subquery elimination status
+                            extra_optimization_status[
+                                CompilationStageTelemetryField.CTE_NODE_CREATED.value
+                            ] = elimination_result.total_num_of_ctes
+                    complexity_scores_after_cte = [
+                        get_complexity_score(logical_plan)
+                        for logical_plan in logical_plans
+                    ]
+                    for i, plan in enumerate(logical_plans):
+                        plot_plan_if_enabled(plan, f"cte_optimized_plan_{i}")
+
+                    # Large query breakdown
+                    breakdown_summary, skipped_summary = {}, {}
+                    with measure_time() as large_query_breakdown_time:
+                        if session.large_query_breakdown_enabled:
+                            large_query_breakdown = LargeQueryBreakdown(
+                                session,
+                                query_generator,
+                                logical_plans,
+                                session.large_query_breakdown_complexity_bounds,
+                            )
+                            breakdown_result = large_query_breakdown.apply()
+                            logical_plans = breakdown_result.logical_plans
+                            breakdown_summary = breakdown_result.breakdown_summary
+                            skipped_summary = breakdown_result.skipped_summary
+
+                    complexity_scores_after_large_query_breakdown = [
+                        get_complexity_score(logical_plan)
+                        for logical_plan in logical_plans
+                    ]
+                    for i, plan in enumerate(logical_plans):
+                        plot_plan_if_enabled(plan, f"large_query_breakdown_plan_{i}")
+
+                    # 4. do a final pass of code generation
+                    queries = query_generator.generate_queries(logical_plans)
 
                 # log telemetry data
-                deep_copy_time = deep_copy_end_time - start_time
-                cte_time = cte_end_time - cte_start_time
-                large_query_breakdown_time = (
-                    large_query_breakdown_end_time - cte_end_time
-                )
-                total_time = time.time() - start_time
                 summary_value = {
                     TelemetryField.CTE_OPTIMIZATION_ENABLED.value: session.cte_optimization_enabled,
                     TelemetryField.LARGE_QUERY_BREAKDOWN_ENABLED.value: session.large_query_breakdown_enabled,
                     CompilationStageTelemetryField.COMPLEXITY_SCORE_BOUNDS.value: session.large_query_breakdown_complexity_bounds,
-                    CompilationStageTelemetryField.TIME_TAKEN_FOR_COMPILATION.value: total_time,
-                    CompilationStageTelemetryField.TIME_TAKEN_FOR_DEEP_COPY_PLAN.value: deep_copy_time,
-                    CompilationStageTelemetryField.TIME_TAKEN_FOR_CTE_OPTIMIZATION.value: cte_time,
-                    CompilationStageTelemetryField.TIME_TAKEN_FOR_LARGE_QUERY_BREAKDOWN.value: large_query_breakdown_time,
+                    CompilationStageTelemetryField.TIME_TAKEN_FOR_COMPILATION.value: total_time(),
+                    CompilationStageTelemetryField.TIME_TAKEN_FOR_DEEP_COPY_PLAN.value: deep_copy_time(),
+                    CompilationStageTelemetryField.TIME_TAKEN_FOR_CTE_OPTIMIZATION.value: cte_time(),
+                    CompilationStageTelemetryField.TIME_TAKEN_FOR_LARGE_QUERY_BREAKDOWN.value: large_query_breakdown_time(),
                     CompilationStageTelemetryField.COMPLEXITY_SCORE_BEFORE_COMPILATION.value: complexity_score_before_compilation,
                     CompilationStageTelemetryField.COMPLEXITY_SCORE_AFTER_CTE_OPTIMIZATION.value: complexity_scores_after_cte,
                     CompilationStageTelemetryField.COMPLEXITY_SCORE_AFTER_LARGE_QUERY_BREAKDOWN.value: complexity_scores_after_large_query_breakdown,
 
@@ -2,7 +2,6 @@
 # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
 #
 from enum import Enum
-import time
 import datetime
 from typing import List, Callable, Any, Optional, TYPE_CHECKING
 from snowflake.connector.options import pandas as pd
@@ -12,8 +11,11 @@
     Connection,
     Cursor,
 )
-from snowflake.snowpark._internal.utils import generate_random_alphanumeric
-from snowflake.snowpark._internal.utils import get_sorted_key_for_version
+from snowflake.snowpark._internal.utils import (
+    generate_random_alphanumeric,
+    get_sorted_key_for_version,
+    measure_time,
+)
 from snowflake.snowpark.exceptions import SnowparkDataframeReaderException
 from snowflake.snowpark.types import (
     StructType,
@@ -141,21 +143,21 @@ def udtf_ingestion(
         from snowflake.snowpark._internal.data_source.utils import UDTF_PACKAGE_MAP
 
         udtf_name = f"data_source_udtf_{generate_random_alphanumeric(5)}"
-        start = time.perf_counter()
-        session.udtf.register(
-            self.udtf_class_builder(fetch_size=fetch_size, schema=schema),
-            name=udtf_name,
-            output_schema=StructType(
-                [
-                    StructField(field.name, VariantType(), field.nullable)
-                    for field in schema.fields
-                ]
-            ),
-            external_access_integrations=[external_access_integrations],
-            packages=packages or UDTF_PACKAGE_MAP.get(self.dbms_type),
-            imports=imports,
-        )
-        logger.debug(f"register ingestion udtf takes: {time.time() - start} seconds")
+        with measure_time() as udtf_register_time:
+            session.udtf.register(
+                self.udtf_class_builder(fetch_size=fetch_size, schema=schema),
+                name=udtf_name,
+                output_schema=StructType(
+                    [
+                        StructField(field.name, VariantType(), field.nullable)
+                        for field in schema.fields
+                    ]
+                ),
+                external_access_integrations=[external_access_integrations],
+                packages=packages or UDTF_PACKAGE_MAP.get(self.dbms_type),
+                imports=imports,
+            )
+        logger.debug(f"register ingestion udtf takes: {udtf_register_time()} seconds")
         call_udtf_sql = f"""
             select * from {partition_table}, table({udtf_name}({PARTITION_TABLE_COLUMN_NAME}))
             """