|
4 | 4 |
|
5 | 5 | import copy |
6 | 6 | import logging |
7 | | -import time |
8 | 7 | from typing import Any, Dict, List |
9 | 8 |
|
10 | 9 | from snowflake.snowpark._internal.analyzer.query_plan_analysis_utils import ( |
|
30 | 29 | plot_plan_if_enabled, |
31 | 30 | ) |
32 | 31 | from snowflake.snowpark._internal.telemetry import TelemetryField |
33 | | -from snowflake.snowpark._internal.utils import random_name_for_temp_object |
| 32 | +from snowflake.snowpark._internal.utils import measure_time, random_name_for_temp_object |
34 | 33 | from snowflake.snowpark.mock._connection import MockServerConnection |
35 | 34 |
|
36 | 35 | _logger = logging.getLogger(__name__) |
@@ -90,79 +89,75 @@ def compile(self) -> Dict[PlanQueryType, List[Query]]: |
90 | 89 | if self.should_start_query_compilation(): |
91 | 90 | session = self._plan.session |
92 | 91 | try: |
93 | | - # preparation for compilation |
94 | | - # 1. make a copy of the original plan |
95 | | - start_time = time.time() |
96 | | - complexity_score_before_compilation = get_complexity_score(self._plan) |
97 | | - logical_plans: List[LogicalPlan] = [copy.deepcopy(self._plan)] |
98 | | - plot_plan_if_enabled(self._plan, "original_plan") |
99 | | - plot_plan_if_enabled(logical_plans[0], "deep_copied_plan") |
100 | | - deep_copy_end_time = time.time() |
101 | | - |
102 | | - # 2. create a code generator with the original plan |
103 | | - query_generator = create_query_generator(self._plan) |
104 | | - |
105 | | - extra_optimization_status: Dict[str, Any] = {} |
106 | | - # 3. apply each optimizations if needed |
107 | | - # CTE optimization |
108 | | - cte_start_time = time.time() |
109 | | - if session.cte_optimization_enabled: |
110 | | - repeated_subquery_eliminator = RepeatedSubqueryElimination( |
111 | | - logical_plans, query_generator |
112 | | - ) |
113 | | - elimination_result = repeated_subquery_eliminator.apply() |
114 | | - logical_plans = elimination_result.logical_plans |
115 | | - # add the extra repeated subquery elimination status |
116 | | - extra_optimization_status[ |
117 | | - CompilationStageTelemetryField.CTE_NODE_CREATED.value |
118 | | - ] = elimination_result.total_num_of_ctes |
119 | | - |
120 | | - cte_end_time = time.time() |
121 | | - complexity_scores_after_cte = [ |
122 | | - get_complexity_score(logical_plan) for logical_plan in logical_plans |
123 | | - ] |
124 | | - for i, plan in enumerate(logical_plans): |
125 | | - plot_plan_if_enabled(plan, f"cte_optimized_plan_{i}") |
126 | | - |
127 | | - # Large query breakdown |
128 | | - breakdown_summary, skipped_summary = {}, {} |
129 | | - if session.large_query_breakdown_enabled: |
130 | | - large_query_breakdown = LargeQueryBreakdown( |
131 | | - session, |
132 | | - query_generator, |
133 | | - logical_plans, |
134 | | - session.large_query_breakdown_complexity_bounds, |
135 | | - ) |
136 | | - breakdown_result = large_query_breakdown.apply() |
137 | | - logical_plans = breakdown_result.logical_plans |
138 | | - breakdown_summary = breakdown_result.breakdown_summary |
139 | | - skipped_summary = breakdown_result.skipped_summary |
140 | | - |
141 | | - large_query_breakdown_end_time = time.time() |
142 | | - complexity_scores_after_large_query_breakdown = [ |
143 | | - get_complexity_score(logical_plan) for logical_plan in logical_plans |
144 | | - ] |
145 | | - for i, plan in enumerate(logical_plans): |
146 | | - plot_plan_if_enabled(plan, f"large_query_breakdown_plan_{i}") |
147 | | - |
148 | | - # 4. do a final pass of code generation |
149 | | - queries = query_generator.generate_queries(logical_plans) |
| 92 | + with measure_time() as total_time: |
| 93 | + # preparation for compilation |
| 94 | + # 1. make a copy of the original plan |
| 95 | + with measure_time() as deep_copy_time: |
| 96 | + complexity_score_before_compilation = get_complexity_score( |
| 97 | + self._plan |
| 98 | + ) |
| 99 | + logical_plans: List[LogicalPlan] = [copy.deepcopy(self._plan)] |
| 100 | + plot_plan_if_enabled(self._plan, "original_plan") |
| 101 | + plot_plan_if_enabled(logical_plans[0], "deep_copied_plan") |
| 102 | + |
| 103 | + # 2. create a code generator with the original plan |
| 104 | + query_generator = create_query_generator(self._plan) |
| 105 | + |
| 106 | + extra_optimization_status: Dict[str, Any] = {} |
| 107 | + # 3. apply each optimizations if needed |
| 108 | + # CTE optimization |
| 109 | + with measure_time() as cte_time: |
| 110 | + if session.cte_optimization_enabled: |
| 111 | + repeated_subquery_eliminator = RepeatedSubqueryElimination( |
| 112 | + logical_plans, query_generator |
| 113 | + ) |
| 114 | + elimination_result = repeated_subquery_eliminator.apply() |
| 115 | + logical_plans = elimination_result.logical_plans |
| 116 | + # add the extra repeated subquery elimination status |
| 117 | + extra_optimization_status[ |
| 118 | + CompilationStageTelemetryField.CTE_NODE_CREATED.value |
| 119 | + ] = elimination_result.total_num_of_ctes |
| 120 | + complexity_scores_after_cte = [ |
| 121 | + get_complexity_score(logical_plan) |
| 122 | + for logical_plan in logical_plans |
| 123 | + ] |
| 124 | + for i, plan in enumerate(logical_plans): |
| 125 | + plot_plan_if_enabled(plan, f"cte_optimized_plan_{i}") |
| 126 | + |
| 127 | + # Large query breakdown |
| 128 | + breakdown_summary, skipped_summary = {}, {} |
| 129 | + with measure_time() as large_query_breakdown_time: |
| 130 | + if session.large_query_breakdown_enabled: |
| 131 | + large_query_breakdown = LargeQueryBreakdown( |
| 132 | + session, |
| 133 | + query_generator, |
| 134 | + logical_plans, |
| 135 | + session.large_query_breakdown_complexity_bounds, |
| 136 | + ) |
| 137 | + breakdown_result = large_query_breakdown.apply() |
| 138 | + logical_plans = breakdown_result.logical_plans |
| 139 | + breakdown_summary = breakdown_result.breakdown_summary |
| 140 | + skipped_summary = breakdown_result.skipped_summary |
| 141 | + |
| 142 | + complexity_scores_after_large_query_breakdown = [ |
| 143 | + get_complexity_score(logical_plan) |
| 144 | + for logical_plan in logical_plans |
| 145 | + ] |
| 146 | + for i, plan in enumerate(logical_plans): |
| 147 | + plot_plan_if_enabled(plan, f"large_query_breakdown_plan_{i}") |
| 148 | + |
| 149 | + # 4. do a final pass of code generation |
| 150 | + queries = query_generator.generate_queries(logical_plans) |
150 | 151 |
|
151 | 152 | # log telemetry data |
152 | | - deep_copy_time = deep_copy_end_time - start_time |
153 | | - cte_time = cte_end_time - cte_start_time |
154 | | - large_query_breakdown_time = ( |
155 | | - large_query_breakdown_end_time - cte_end_time |
156 | | - ) |
157 | | - total_time = time.time() - start_time |
158 | 153 | summary_value = { |
159 | 154 | TelemetryField.CTE_OPTIMIZATION_ENABLED.value: session.cte_optimization_enabled, |
160 | 155 | TelemetryField.LARGE_QUERY_BREAKDOWN_ENABLED.value: session.large_query_breakdown_enabled, |
161 | 156 | CompilationStageTelemetryField.COMPLEXITY_SCORE_BOUNDS.value: session.large_query_breakdown_complexity_bounds, |
162 | | - CompilationStageTelemetryField.TIME_TAKEN_FOR_COMPILATION.value: total_time, |
163 | | - CompilationStageTelemetryField.TIME_TAKEN_FOR_DEEP_COPY_PLAN.value: deep_copy_time, |
164 | | - CompilationStageTelemetryField.TIME_TAKEN_FOR_CTE_OPTIMIZATION.value: cte_time, |
165 | | - CompilationStageTelemetryField.TIME_TAKEN_FOR_LARGE_QUERY_BREAKDOWN.value: large_query_breakdown_time, |
| 157 | + CompilationStageTelemetryField.TIME_TAKEN_FOR_COMPILATION.value: total_time(), |
| 158 | + CompilationStageTelemetryField.TIME_TAKEN_FOR_DEEP_COPY_PLAN.value: deep_copy_time(), |
| 159 | + CompilationStageTelemetryField.TIME_TAKEN_FOR_CTE_OPTIMIZATION.value: cte_time(), |
| 160 | + CompilationStageTelemetryField.TIME_TAKEN_FOR_LARGE_QUERY_BREAKDOWN.value: large_query_breakdown_time(), |
166 | 161 | CompilationStageTelemetryField.COMPLEXITY_SCORE_BEFORE_COMPILATION.value: complexity_score_before_compilation, |
167 | 162 | CompilationStageTelemetryField.COMPLEXITY_SCORE_AFTER_CTE_OPTIMIZATION.value: complexity_scores_after_cte, |
168 | 163 | CompilationStageTelemetryField.COMPLEXITY_SCORE_AFTER_LARGE_QUERY_BREAKDOWN.value: complexity_scores_after_large_query_breakdown, |
|
0 commit comments