Added legacy cluster types to the assessment result (#932)

william-conti · web-flow · commit cc4146bfcfd4 · 2024-02-13T15:12:35.000Z
## Changes Adding missing support in the assessment for no isolation shared and legacy cluster types ### Linked issues Resolves #925 ### Tests - [X] manually tested - [x] added unit tests - [X] added integration tests - [ ] verified on staging environment (screenshot attached)
diff --git a/docs/assessment.md b/docs/assessment.md
@@ -128,6 +128,14 @@ A spark config option was found in a cluster compute definition that is incompat
 Passthrough security model is not supported by Unity Catalog. Passthrough mode relied upon file based authorization which is incompatible with Fine Grained Access Controls supported by Unity Catalog.
 Recommend mapping your Passthrough security model to a External Location/Volume/Table/View based security model compatible with Unity Catalog.
 
+### AF116 - No isolation shared clusters not supported in UC
+Unity Catalog data cannot be accessed from No Isolation clusters, they should not be used.
+
+### AF117 - cluster type not supported
+Only Assigned and Shared access mode are supported in UC.
+You must change your cluster configuration to match UC compliant access modes.
+
+
 ### AF201 - Inplace Sync
 Short description: We found that the table or database can be SYNC'd without moving data because the data is stored directly on cloud storage specified via a mount or a cloud storage URL (not DBFS).
 How: Run the SYNC command on the table or schema.  If the tables (or source database) is 'managed' first set this spark setting in your session or in the interactive cluster configuration: `spark.databricks.sync.command.enableManagedTable=true`
diff --git a/src/databricks/labs/ucx/assessment/clusters.py b/src/databricks/labs/ucx/assessment/clusters.py
@@ -9,6 +9,7 @@
 from databricks.sdk.service.compute import (
     ClusterDetails,
     ClusterSource,
+    DataSecurityMode,
     InitScriptInfo,
     Policy,
 )
@@ -101,6 +102,11 @@ def check_spark_conf(self, conf: dict[str, str], source: str) -> list[str]:
     def check_cluster_failures(self, cluster: ClusterDetails, source: str) -> list[str]:
         failures: list[str] = []
 
+        unsupported_cluster_types = [
+            DataSecurityMode.LEGACY_PASSTHROUGH,
+            DataSecurityMode.LEGACY_SINGLE_USER,
+            DataSecurityMode.LEGACY_TABLE_ACL,
+        ]
         support_status = spark_version_compatibility(cluster.spark_version)
         if support_status != "supported":
             failures.append(f"not supported DBR: {cluster.spark_version}")
@@ -111,6 +117,10 @@ def check_cluster_failures(self, cluster: ClusterDetails, source: str) -> list[s
             failures.extend(self._check_cluster_policy(cluster.policy_id, source))
         if cluster.init_scripts is not None:
             failures.extend(self._check_cluster_init_script(cluster.init_scripts, source))
+        if cluster.data_security_mode == DataSecurityMode.NONE:
+            failures.append("No isolation shared clusters not supported in UC")
+        if cluster.data_security_mode in unsupported_cluster_types:
+            failures.append(f"cluster type not supported : {cluster.data_security_mode.value}")
 
         return failures
 
diff --git a/tests/integration/assessment/test_clusters.py b/tests/integration/assessment/test_clusters.py
@@ -2,6 +2,7 @@
 
 from databricks.sdk.errors import NotFound
 from databricks.sdk.retries import retried
+from databricks.sdk.service.compute import DataSecurityMode
 
 from databricks.labs.ucx.assessment.clusters import ClustersCrawler
 
@@ -22,3 +23,16 @@ def test_cluster_crawler(ws, make_cluster, inventory_schema, sql_backend):
 
     assert len(results) >= 1
     assert results[0].cluster_id == created_cluster.cluster_id
+
+
+def test_cluster_crawler_no_isolation(ws, make_cluster, inventory_schema, sql_backend):
+    created_cluster = make_cluster(data_security_mode=DataSecurityMode.NONE, num_workers=1)
+    cluster_crawler = ClustersCrawler(ws=ws, sbe=sql_backend, schema=inventory_schema)
+    clusters = cluster_crawler.snapshot()
+    results = []
+    for cluster in clusters:
+        if cluster.cluster_id == created_cluster.cluster_id:
+            results.append(cluster)
+
+    assert len(results) == 1
+    assert results[0].failures == '["No isolation shared clusters not supported in UC"]'
diff --git a/tests/unit/assessment/test_clusters.py b/tests/unit/assessment/test_clusters.py
@@ -7,6 +7,7 @@
     AutoScale,
     ClusterDetails,
     ClusterSource,
+    DataSecurityMode,
     DbfsStorageInfo,
     InitScriptInfo,
     WorkspaceStorageInfo,
@@ -265,3 +266,37 @@ def test_try_fetch():
     assert result_set[0].cluster_id == "000"
     assert result_set[0].success == 1
     assert result_set[0].failures == "123"
+
+
+def test_no_isolation_clusters():
+    sample_clusters = [
+        ClusterDetails(
+            cluster_name="No isolation shared",
+            spark_version="12.3.x-cpu-ml-scala2.12",
+            data_security_mode=DataSecurityMode.NONE,
+        )
+    ]
+    ws = workspace_client_mock()
+    mockBackend = MagicMock()
+    ws.clusters.list.return_value = sample_clusters
+    crawler = ClustersCrawler(ws, mockBackend, "ucx")
+    result_set = list(crawler.snapshot())
+    assert len(result_set) == 1
+    assert result_set[0].failures == '["No isolation shared clusters not supported in UC"]'
+
+
+def test_unsupported_clusters():
+    sample_clusters = [
+        ClusterDetails(
+            cluster_name="Passthrough cluster",
+            spark_version="12.3.x-cpu-ml-scala2.12",
+            data_security_mode=DataSecurityMode.LEGACY_PASSTHROUGH,
+        )
+    ]
+    ws = workspace_client_mock()
+    mockBackend = MagicMock()
+    ws.clusters.list.return_value = sample_clusters
+    crawler = ClustersCrawler(ws, mockBackend, "ucx")
+    result_set = list(crawler.snapshot())
+    assert len(result_set) == 1
+    assert result_set[0].failures == '["cluster type not supported : LEGACY_PASSTHROUGH"]'