11import json
2- import typing
32from dataclasses import dataclass
43
54from databricks .sdk import WorkspaceClient
65from databricks .sdk .service .jobs import BaseJob
76
87from databricks .labs .ucx .framework .crawlers import CrawlerBase
9- from databricks .labs .ucx .hive_metastore .data_objects import ExternalLocationCrawler
108from databricks .labs .ucx .hive_metastore .table_acls import SqlBackend
119
10+ INCOMPATIBLE_SPARK_CONFIG_KEYS = [
11+ "spark.databricks.passthrough.enabled" ,
12+ "spark.hadoop.javax.jdo.option.ConnectionURL" ,
13+ "spark.databricks.hive.metastore.glueCatalog.enabled" ,
14+ ]
15+
1216
1317@dataclass
1418class JobInfo :
@@ -47,42 +51,6 @@ def spark_version_compatibility(spark_version: str) -> str:
4751 return "supported"
4852
4953
50- class AssessmentToolkit :
51- incompatible_spark_config_keys : typing .ClassVar [tuple ] = {
52- "spark.databricks.passthrough.enabled" ,
53- "spark.hadoop.javax.jdo.option.ConnectionURL" ,
54- "spark.databricks.hive.metastore.glueCatalog.enabled" ,
55- }
56-
57- def __init__ (self , ws : WorkspaceClient , inventory_schema , backend = None ):
58- self ._all_jobs = None
59- self ._all_clusters_by_id = None
60- self ._ws = ws
61- self ._inventory_schema = inventory_schema
62- self ._backend = backend
63- self ._external_locations = None
64-
65- @staticmethod
66- def _verify_ws_client (w : WorkspaceClient ):
67- _me = w .current_user .me ()
68- is_workspace_admin = any (g .display == "admins" for g in _me .groups )
69- if not is_workspace_admin :
70- msg = "Current user is not a workspace admin"
71- raise RuntimeError (msg )
72-
73- def generate_external_location_list (self ):
74- crawler = ExternalLocationCrawler (self ._ws , self ._backend , self ._inventory_schema )
75- return crawler .snapshot ()
76-
77- def generate_job_assessment (self ):
78- crawler = JobsCrawler (self ._ws , self ._backend , self ._inventory_schema )
79- return crawler .snapshot ()
80-
81- def generate_cluster_assessment (self ):
82- crawler = ClustersCrawler (self ._ws , self ._backend , self ._inventory_schema )
83- return crawler .snapshot ()
84-
85-
8654class ClustersCrawler (CrawlerBase ):
8755 def __init__ (self , ws : WorkspaceClient , sbe : SqlBackend , schema ):
8856 super ().__init__ (sbe , "hive_metastore" , schema , "clusters" )
@@ -101,7 +69,7 @@ def _assess_clusters(self, all_clusters):
10169 failures .append (f"not supported DBR: { cluster .spark_version } " )
10270
10371 if cluster .spark_conf is not None :
104- for k in AssessmentToolkit . incompatible_spark_config_keys :
72+ for k in INCOMPATIBLE_SPARK_CONFIG_KEYS :
10573 if k in cluster .spark_conf :
10674 failures .append (f"unsupported config: { k } " )
10775
@@ -162,7 +130,7 @@ def _assess_jobs(self, all_jobs: list[BaseJob], all_clusters_by_id) -> list[JobI
162130 job_assessment [job .job_id ].add (f"not supported DBR: { cluster_config .spark_version } " )
163131
164132 if cluster_config .spark_conf is not None :
165- for k in AssessmentToolkit . incompatible_spark_config_keys :
133+ for k in INCOMPATIBLE_SPARK_CONFIG_KEYS :
166134 if k in cluster_config .spark_conf :
167135 job_assessment [job .job_id ].add (f"unsupported config: { k } " )
168136
0 commit comments