33
44from databricks .labs .blueprint .installation import Installation
55from databricks .labs .lsql .backends import RuntimeBackend , SqlBackend
6+ from databricks .labs .ucx .hive_metastore .table_migration_status import TableMigrationStatus
67from databricks .sdk import WorkspaceClient , core
78
89from databricks .labs .ucx .__about__ import __version__
9- from databricks .labs .ucx .assessment .clusters import ClustersCrawler , PoliciesCrawler
10+ from databricks .labs .ucx .assessment .clusters import (
11+ ClustersCrawler ,
12+ PoliciesCrawler ,
13+ ClusterOwnership ,
14+ ClusterInfo ,
15+ ClusterPolicyOwnership ,
16+ PolicyInfo ,
17+ )
1018from databricks .labs .ucx .assessment .init_scripts import GlobalInitScriptCrawler
11- from databricks .labs .ucx .assessment .jobs import JobsCrawler , SubmitRunsCrawler
12- from databricks .labs .ucx .assessment .pipelines import PipelinesCrawler
19+ from databricks .labs .ucx .assessment .jobs import JobOwnership , JobInfo , JobsCrawler , SubmitRunsCrawler
20+ from databricks .labs .ucx .assessment .pipelines import PipelinesCrawler , PipelineInfo , PipelineOwnership
1321from databricks .labs .ucx .config import WorkspaceConfig
1422from databricks .labs .ucx .contexts .application import GlobalContext
1523from databricks .labs .ucx .hive_metastore import TablesInMounts , TablesCrawler
24+ from databricks .labs .ucx .hive_metastore .grants import Grant
1625from databricks .labs .ucx .hive_metastore .table_size import TableSizeCrawler
17- from databricks .labs .ucx .hive_metastore .tables import FasterTableScanCrawler
26+ from databricks .labs .ucx .hive_metastore .tables import FasterTableScanCrawler , Table
27+ from databricks .labs .ucx .hive_metastore .udfs import Udf
1828from databricks .labs .ucx .installer .logs import TaskRunWarningRecorder
29+ from databricks .labs .ucx .progress .history import HistoryLog
1930from databricks .labs .ucx .progress .workflow_runs import WorkflowRunRecorder
2031
32+ # As with GlobalContext, service factories unavoidably have a lot of public methods.
33+ # pylint: disable=too-many-public-methods
34+
2135
2236class RuntimeContext (GlobalContext ):
2337 @cached_property
@@ -54,6 +68,10 @@ def installation(self) -> Installation:
5468 def jobs_crawler (self ) -> JobsCrawler :
5569 return JobsCrawler (self .workspace_client , self .sql_backend , self .inventory_database )
5670
71+ @cached_property
72+ def job_ownership (self ) -> JobOwnership :
73+ return JobOwnership (self .administrator_locator )
74+
5775 @cached_property
5876 def submit_runs_crawler (self ) -> SubmitRunsCrawler :
5977 return SubmitRunsCrawler (
@@ -67,10 +85,18 @@ def submit_runs_crawler(self) -> SubmitRunsCrawler:
6785 def clusters_crawler (self ) -> ClustersCrawler :
6886 return ClustersCrawler (self .workspace_client , self .sql_backend , self .inventory_database )
6987
88+ @cached_property
89+ def cluster_ownership (self ) -> ClusterOwnership :
90+ return ClusterOwnership (self .administrator_locator )
91+
7092 @cached_property
7193 def pipelines_crawler (self ) -> PipelinesCrawler :
7294 return PipelinesCrawler (self .workspace_client , self .sql_backend , self .inventory_database )
7395
96+ @cached_property
97+ def pipeline_ownership (self ) -> PipelineOwnership :
98+ return PipelineOwnership (self .administrator_locator )
99+
74100 @cached_property
75101 def table_size_crawler (self ) -> TableSizeCrawler :
76102 return TableSizeCrawler (self .tables_crawler )
@@ -79,12 +105,18 @@ def table_size_crawler(self) -> TableSizeCrawler:
79105 def policies_crawler (self ) -> PoliciesCrawler :
80106 return PoliciesCrawler (self .workspace_client , self .sql_backend , self .inventory_database )
81107
108+ @cached_property
109+ def cluster_policy_ownership (self ) -> ClusterPolicyOwnership :
110+ return ClusterPolicyOwnership (self .administrator_locator )
111+
82112 @cached_property
83113 def global_init_scripts_crawler (self ) -> GlobalInitScriptCrawler :
84114 return GlobalInitScriptCrawler (self .workspace_client , self .sql_backend , self .inventory_database )
85115
86116 @cached_property
87117 def tables_crawler (self ) -> TablesCrawler :
118+ # Warning: Not all runtime contexts support the fast-scan implementation; it requires the JVM bridge to Spark
119+ # and that's not always available.
88120 return FasterTableScanCrawler (self .sql_backend , self .inventory_database , self .config .include_databases )
89121
90122 @cached_property
@@ -116,10 +148,102 @@ def workflow_run_recorder(self) -> WorkflowRunRecorder:
116148 return WorkflowRunRecorder (
117149 self .sql_backend ,
118150 self .config .ucx_catalog ,
119- workspace_id = self .workspace_client . get_workspace_id () ,
151+ workspace_id = self .workspace_id ,
120152 workflow_name = self .named_parameters ["workflow" ],
121153 workflow_id = int (self .named_parameters ["job_id" ]),
122154 workflow_run_id = int (self .named_parameters ["parent_run_id" ]),
123155 workflow_run_attempt = int (self .named_parameters .get ("attempt" , 0 )),
124156 workflow_start_time = self .named_parameters ["start_time" ],
125157 )
158+
159+ @cached_property
160+ def workspace_id (self ) -> int :
161+ return self .workspace_client .get_workspace_id ()
162+
163+ @cached_property
164+ def historical_clusters_log (self ) -> HistoryLog [ClusterInfo ]:
165+ return HistoryLog (
166+ self .sql_backend ,
167+ self .cluster_ownership ,
168+ ClusterInfo ,
169+ int (self .named_parameters ["parent_run_id" ]),
170+ self .workspace_id ,
171+ self .config .ucx_catalog ,
172+ )
173+
174+ @cached_property
175+ def historical_cluster_policies_log (self ) -> HistoryLog [PolicyInfo ]:
176+ return HistoryLog (
177+ self .sql_backend ,
178+ self .cluster_policy_ownership ,
179+ PolicyInfo ,
180+ int (self .named_parameters ["parent_run_id" ]),
181+ self .workspace_id ,
182+ self .config .ucx_catalog ,
183+ )
184+
185+ @cached_property
186+ def historical_grants_log (self ) -> HistoryLog [Grant ]:
187+ return HistoryLog (
188+ self .sql_backend ,
189+ self .grant_ownership ,
190+ Grant ,
191+ int (self .named_parameters ["parent_run_id" ]),
192+ self .workspace_id ,
193+ self .config .ucx_catalog ,
194+ )
195+
196+ @cached_property
197+ def historical_jobs_log (self ) -> HistoryLog [JobInfo ]:
198+ return HistoryLog (
199+ self .sql_backend ,
200+ self .job_ownership ,
201+ JobInfo ,
202+ int (self .named_parameters ["parent_run_id" ]),
203+ self .workspace_id ,
204+ self .config .ucx_catalog ,
205+ )
206+
207+ @cached_property
208+ def historical_pipelines_log (self ) -> HistoryLog [PipelineInfo ]:
209+ return HistoryLog (
210+ self .sql_backend ,
211+ self .pipeline_ownership ,
212+ PipelineInfo ,
213+ int (self .named_parameters ["parent_run_id" ]),
214+ self .workspace_id ,
215+ self .config .ucx_catalog ,
216+ )
217+
218+ @cached_property
219+ def historical_tables_log (self ) -> HistoryLog [Table ]:
220+ return HistoryLog (
221+ self .sql_backend ,
222+ self .table_ownership ,
223+ Table ,
224+ int (self .named_parameters ["parent_run_id" ]),
225+ self .workspace_id ,
226+ self .config .ucx_catalog ,
227+ )
228+
229+ @cached_property
230+ def historical_table_migration_log (self ) -> HistoryLog [TableMigrationStatus ]:
231+ return HistoryLog (
232+ self .sql_backend ,
233+ self .table_migration_ownership ,
234+ TableMigrationStatus ,
235+ int (self .named_parameters ["parent_run_id" ]),
236+ self .workspace_id ,
237+ self .config .ucx_catalog ,
238+ )
239+
240+ @cached_property
241+ def historical_udfs_log (self ) -> HistoryLog [Udf ]:
242+ return HistoryLog (
243+ self .sql_backend ,
244+ self .udf_ownership ,
245+ Udf ,
246+ int (self .named_parameters ["parent_run_id" ]),
247+ self .workspace_id ,
248+ self .config .ucx_catalog ,
249+ )
0 commit comments