Skip to content

Commit cc4146b

Browse files
Added legacy cluster types to the assessment result (#932)
## Changes Adding missing support in the assessment for no isolation shared and legacy cluster types ### Linked issues Resolves #925 ### Tests - [X] manually tested - [x] added unit tests - [X] added integration tests - [ ] verified on staging environment (screenshot attached)
1 parent ba9c52f commit cc4146b

File tree

4 files changed

+67
-0
lines changed

4 files changed

+67
-0
lines changed

docs/assessment.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,14 @@ A spark config option was found in a cluster compute definition that is incompat
128128
Passthrough security model is not supported by Unity Catalog. Passthrough mode relied upon file based authorization which is incompatible with Fine Grained Access Controls supported by Unity Catalog.
129129
Recommend mapping your Passthrough security model to a External Location/Volume/Table/View based security model compatible with Unity Catalog.
130130

131+
### AF116 - No isolation shared clusters not supported in UC
132+
Unity Catalog data cannot be accessed from No Isolation clusters, they should not be used.
133+
134+
### AF117 - cluster type not supported
135+
Only Assigned and Shared access mode are supported in UC.
136+
You must change your cluster configuration to match UC compliant access modes.
137+
138+
131139
### AF201 - Inplace Sync
132140
Short description: We found that the table or database can be SYNC'd without moving data because the data is stored directly on cloud storage specified via a mount or a cloud storage URL (not DBFS).
133141
How: Run the SYNC command on the table or schema. If the tables (or source database) is 'managed' first set this spark setting in your session or in the interactive cluster configuration: `spark.databricks.sync.command.enableManagedTable=true`

src/databricks/labs/ucx/assessment/clusters.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from databricks.sdk.service.compute import (
1010
ClusterDetails,
1111
ClusterSource,
12+
DataSecurityMode,
1213
InitScriptInfo,
1314
Policy,
1415
)
@@ -101,6 +102,11 @@ def check_spark_conf(self, conf: dict[str, str], source: str) -> list[str]:
101102
def check_cluster_failures(self, cluster: ClusterDetails, source: str) -> list[str]:
102103
failures: list[str] = []
103104

105+
unsupported_cluster_types = [
106+
DataSecurityMode.LEGACY_PASSTHROUGH,
107+
DataSecurityMode.LEGACY_SINGLE_USER,
108+
DataSecurityMode.LEGACY_TABLE_ACL,
109+
]
104110
support_status = spark_version_compatibility(cluster.spark_version)
105111
if support_status != "supported":
106112
failures.append(f"not supported DBR: {cluster.spark_version}")
@@ -111,6 +117,10 @@ def check_cluster_failures(self, cluster: ClusterDetails, source: str) -> list[s
111117
failures.extend(self._check_cluster_policy(cluster.policy_id, source))
112118
if cluster.init_scripts is not None:
113119
failures.extend(self._check_cluster_init_script(cluster.init_scripts, source))
120+
if cluster.data_security_mode == DataSecurityMode.NONE:
121+
failures.append("No isolation shared clusters not supported in UC")
122+
if cluster.data_security_mode in unsupported_cluster_types:
123+
failures.append(f"cluster type not supported : {cluster.data_security_mode.value}")
114124

115125
return failures
116126

tests/integration/assessment/test_clusters.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from databricks.sdk.errors import NotFound
44
from databricks.sdk.retries import retried
5+
from databricks.sdk.service.compute import DataSecurityMode
56

67
from databricks.labs.ucx.assessment.clusters import ClustersCrawler
78

@@ -22,3 +23,16 @@ def test_cluster_crawler(ws, make_cluster, inventory_schema, sql_backend):
2223

2324
assert len(results) >= 1
2425
assert results[0].cluster_id == created_cluster.cluster_id
26+
27+
28+
def test_cluster_crawler_no_isolation(ws, make_cluster, inventory_schema, sql_backend):
29+
created_cluster = make_cluster(data_security_mode=DataSecurityMode.NONE, num_workers=1)
30+
cluster_crawler = ClustersCrawler(ws=ws, sbe=sql_backend, schema=inventory_schema)
31+
clusters = cluster_crawler.snapshot()
32+
results = []
33+
for cluster in clusters:
34+
if cluster.cluster_id == created_cluster.cluster_id:
35+
results.append(cluster)
36+
37+
assert len(results) == 1
38+
assert results[0].failures == '["No isolation shared clusters not supported in UC"]'

tests/unit/assessment/test_clusters.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
AutoScale,
88
ClusterDetails,
99
ClusterSource,
10+
DataSecurityMode,
1011
DbfsStorageInfo,
1112
InitScriptInfo,
1213
WorkspaceStorageInfo,
@@ -265,3 +266,37 @@ def test_try_fetch():
265266
assert result_set[0].cluster_id == "000"
266267
assert result_set[0].success == 1
267268
assert result_set[0].failures == "123"
269+
270+
271+
def test_no_isolation_clusters():
272+
sample_clusters = [
273+
ClusterDetails(
274+
cluster_name="No isolation shared",
275+
spark_version="12.3.x-cpu-ml-scala2.12",
276+
data_security_mode=DataSecurityMode.NONE,
277+
)
278+
]
279+
ws = workspace_client_mock()
280+
mockBackend = MagicMock()
281+
ws.clusters.list.return_value = sample_clusters
282+
crawler = ClustersCrawler(ws, mockBackend, "ucx")
283+
result_set = list(crawler.snapshot())
284+
assert len(result_set) == 1
285+
assert result_set[0].failures == '["No isolation shared clusters not supported in UC"]'
286+
287+
288+
def test_unsupported_clusters():
289+
sample_clusters = [
290+
ClusterDetails(
291+
cluster_name="Passthrough cluster",
292+
spark_version="12.3.x-cpu-ml-scala2.12",
293+
data_security_mode=DataSecurityMode.LEGACY_PASSTHROUGH,
294+
)
295+
]
296+
ws = workspace_client_mock()
297+
mockBackend = MagicMock()
298+
ws.clusters.list.return_value = sample_clusters
299+
crawler = ClustersCrawler(ws, mockBackend, "ucx")
300+
result_set = list(crawler.snapshot())
301+
assert len(result_set) == 1
302+
assert result_set[0].failures == '["cluster type not supported : LEGACY_PASSTHROUGH"]'

0 commit comments

Comments
 (0)