Skip to content

Commit 6d30d12

Browse files
Refactor code and clean out HTML markup (#252)
This was missed merging PR #244
1 parent 24e354b commit 6d30d12

File tree

5 files changed

+30
-77
lines changed

5 files changed

+30
-77
lines changed
Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +0,0 @@
1-
from databricks.labs.ucx.assessment.assessment import AssessmentToolkit
2-
3-
__all__ = ["AssessmentToolkit"]

src/databricks/labs/ucx/assessment/assessment.py renamed to src/databricks/labs/ucx/assessment/crawlers.py

Lines changed: 8 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11
import json
2-
import typing
32
from dataclasses import dataclass
43

54
from databricks.sdk import WorkspaceClient
65
from databricks.sdk.service.jobs import BaseJob
76

87
from databricks.labs.ucx.framework.crawlers import CrawlerBase
9-
from databricks.labs.ucx.hive_metastore.data_objects import ExternalLocationCrawler
108
from databricks.labs.ucx.hive_metastore.table_acls import SqlBackend
119

10+
INCOMPATIBLE_SPARK_CONFIG_KEYS = [
11+
"spark.databricks.passthrough.enabled",
12+
"spark.hadoop.javax.jdo.option.ConnectionURL",
13+
"spark.databricks.hive.metastore.glueCatalog.enabled",
14+
]
15+
1216

1317
@dataclass
1418
class JobInfo:
@@ -47,42 +51,6 @@ def spark_version_compatibility(spark_version: str) -> str:
4751
return "supported"
4852

4953

50-
class AssessmentToolkit:
51-
incompatible_spark_config_keys: typing.ClassVar[tuple] = {
52-
"spark.databricks.passthrough.enabled",
53-
"spark.hadoop.javax.jdo.option.ConnectionURL",
54-
"spark.databricks.hive.metastore.glueCatalog.enabled",
55-
}
56-
57-
def __init__(self, ws: WorkspaceClient, inventory_schema, backend=None):
58-
self._all_jobs = None
59-
self._all_clusters_by_id = None
60-
self._ws = ws
61-
self._inventory_schema = inventory_schema
62-
self._backend = backend
63-
self._external_locations = None
64-
65-
@staticmethod
66-
def _verify_ws_client(w: WorkspaceClient):
67-
_me = w.current_user.me()
68-
is_workspace_admin = any(g.display == "admins" for g in _me.groups)
69-
if not is_workspace_admin:
70-
msg = "Current user is not a workspace admin"
71-
raise RuntimeError(msg)
72-
73-
def generate_external_location_list(self):
74-
crawler = ExternalLocationCrawler(self._ws, self._backend, self._inventory_schema)
75-
return crawler.snapshot()
76-
77-
def generate_job_assessment(self):
78-
crawler = JobsCrawler(self._ws, self._backend, self._inventory_schema)
79-
return crawler.snapshot()
80-
81-
def generate_cluster_assessment(self):
82-
crawler = ClustersCrawler(self._ws, self._backend, self._inventory_schema)
83-
return crawler.snapshot()
84-
85-
8654
class ClustersCrawler(CrawlerBase):
8755
def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema):
8856
super().__init__(sbe, "hive_metastore", schema, "clusters")
@@ -101,7 +69,7 @@ def _assess_clusters(self, all_clusters):
10169
failures.append(f"not supported DBR: {cluster.spark_version}")
10270

10371
if cluster.spark_conf is not None:
104-
for k in AssessmentToolkit.incompatible_spark_config_keys:
72+
for k in INCOMPATIBLE_SPARK_CONFIG_KEYS:
10573
if k in cluster.spark_conf:
10674
failures.append(f"unsupported config: {k}")
10775

@@ -162,7 +130,7 @@ def _assess_jobs(self, all_jobs: list[BaseJob], all_clusters_by_id) -> list[JobI
162130
job_assessment[job.job_id].add(f"not supported DBR: {cluster_config.spark_version}")
163131

164132
if cluster_config.spark_conf is not None:
165-
for k in AssessmentToolkit.incompatible_spark_config_keys:
133+
for k in INCOMPATIBLE_SPARK_CONFIG_KEYS:
166134
if k in cluster_config.spark_conf:
167135
job_assessment[job.job_id].add(f"unsupported config: {k}")
168136

src/databricks/labs/ucx/runtime.py

Lines changed: 20 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@
44

55
from databricks.sdk import WorkspaceClient
66

7-
from databricks.labs.ucx.assessment import AssessmentToolkit
7+
from databricks.labs.ucx.assessment.crawlers import ClustersCrawler, JobsCrawler
88
from databricks.labs.ucx.config import MigrationConfig
99
from databricks.labs.ucx.framework.crawlers import RuntimeBackend
1010
from databricks.labs.ucx.framework.tasks import task, trigger
1111
from databricks.labs.ucx.hive_metastore import TaclToolkit
12+
from databricks.labs.ucx.hive_metastore.data_objects import ExternalLocationCrawler
1213
from databricks.labs.ucx.hive_metastore.list_mounts import Mounts
1314
from databricks.labs.ucx.workspace_access import GroupMigrationToolkit
1415

@@ -80,44 +81,38 @@ def inventorize_external_locations(cfg: MigrationConfig):
8081
These external_locations will be created in a later stage before the table can be migrated.
8182
"""
8283
ws = WorkspaceClient(config=cfg.to_databricks_config())
83-
assess = AssessmentToolkit(ws, cfg.inventory_database, RuntimeBackend())
84-
assess.generate_external_location_list()
84+
crawler = ExternalLocationCrawler(ws, RuntimeBackend(), cfg.inventory_database)
85+
crawler.snapshot()
8586

8687

8788
@task("assessment", depends_on=[setup_schema])
8889
def inventorize_jobs(cfg: MigrationConfig):
8990
"""This part scan through all the jobs and locate ones that are not compatible with UC.
90-
It looks for:<br>
91-
<ol>
92-
<li>Clusters with DBR version earlier than 11.3<br>
93-
<li>Clusters using Passthru Authentication<br>
94-
<li>Clusters with incompatible spark config tags<br>
95-
<li>Clusters referencing DBFS locations in one or more config options<br>
96-
</ol>
97-
<br>
98-
A report with a list of all the Jobs is saved to the `$inventory.jobs` Table.
91+
It looks for:
92+
- Clusters with DBR version earlier than 11.3
93+
- Clusters using Passthru Authentication
94+
- Clusters with incompatible spark config tags
95+
- Clusters referencing DBFS locations in one or more config options
96+
A report with a list of all the Jobs is saved to the `$inventory.jobs` table.
9997
"""
10098
ws = WorkspaceClient(config=cfg.to_databricks_config())
101-
assess = AssessmentToolkit(ws, cfg.inventory_database, RuntimeBackend())
102-
assess.generate_job_assessment()
99+
crawler = JobsCrawler(ws, RuntimeBackend(), cfg.inventory_database)
100+
crawler.snapshot()
103101

104102

105103
@task("assessment", depends_on=[setup_schema])
106104
def inventorize_clusters(cfg: MigrationConfig):
107105
"""This part scan through all the clusters and locate ones that are not compatible with UC.
108-
It looks for:<br>
109-
<ol>
110-
<li>Clusters with DBR version earlier than 11.3<br>
111-
<li>Clusters using Passthru Authentication<br>
112-
<li>Clusters with incompatible spark config tags<br>
113-
<li>Clusters referencing DBFS locations in one or more config options<br>
114-
</ol>
115-
<br>
116-
A report with a list of all the Jobs is saved to the `$inventory.clusters` Table.
106+
It looks for:
107+
- Clusters with DBR version earlier than 11.3
108+
- Clusters using Passthru Authentication
109+
- Clusters with incompatible spark config tags
110+
- Clusters referencing DBFS locations in one or more config options
111+
A report with a list of all the Jobs is saved to the `$inventory.clusters` table.
117112
"""
118113
ws = WorkspaceClient(config=cfg.to_databricks_config())
119-
assess = AssessmentToolkit(ws, cfg.inventory_database, RuntimeBackend())
120-
assess.generate_cluster_assessment()
114+
crawler = ClustersCrawler(ws, RuntimeBackend(), cfg.inventory_database)
115+
crawler.snapshot()
121116

122117

123118
@task("assessment", depends_on=[setup_schema])
Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
import pytest
22

3-
from databricks.labs.ucx.assessment.assessment import AssessmentToolkit
4-
53

64
def test_table_inventory(ws, make_catalog, make_schema):
75
pytest.skip("test is broken")
8-
assess = AssessmentToolkit(ws, make_catalog(), make_schema())
9-
assess.table_inventory()

tests/unit/assessment/test_assessment.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,8 @@
33
from databricks.sdk.service.compute import AutoScale, ClusterDetails
44
from databricks.sdk.service.jobs import BaseJob, JobSettings, NotebookTask, Task
55

6-
from databricks.labs.ucx.assessment.assessment import (
7-
ClustersCrawler,
8-
ExternalLocationCrawler,
9-
JobsCrawler,
10-
)
6+
from databricks.labs.ucx.assessment.crawlers import ClustersCrawler, JobsCrawler
7+
from databricks.labs.ucx.hive_metastore.data_objects import ExternalLocationCrawler
118
from databricks.labs.ucx.hive_metastore.list_mounts import Mount
129
from databricks.labs.ucx.mixins.sql import Row
1310
from tests.unit.framework.mocks import MockBackend

0 commit comments

Comments
 (0)