Cleaned up Job assessment and Cluster assessment to improve testing and reduce redundancy. (#825)

FastLee · web-flow · commit c03596eb2538 · 2024-01-24T09:45:47.000+01:00
diff --git a/src/databricks/labs/ucx/assessment/clusters.py b/src/databricks/labs/ucx/assessment/clusters.py
@@ -4,7 +4,7 @@
 
 from databricks.sdk import WorkspaceClient
 from databricks.sdk.errors import NotFound
-from databricks.sdk.service.compute import ClusterSource, Policy
+from databricks.sdk.service.compute import ClusterDetails, ClusterSource, Policy
 
 from databricks.labs.ucx.assessment.crawlers import (
     _AZURE_SP_CONF_FAILURE_MSG,
@@ -67,6 +67,35 @@ def _check_init_scripts(self, cluster, failures):
                 continue
             failures.append(f"{_AZURE_SP_CONF_FAILURE_MSG} cluster.")
 
+    def _check_cluster_failures(self, cluster: ClusterDetails):
+        failures = []
+        if not cluster.creator_user_name:
+            logger.warning(
+                f"Cluster {cluster.cluster_id} have Unknown creator, it means that the original creator "
+                f"has been deleted and should be re-created"
+            )
+        cluster_info = ClusterInfo(
+            cluster_id=cluster.cluster_id if cluster.cluster_id else "",
+            cluster_name=cluster.cluster_name,
+            creator=cluster.creator_user_name,
+            success=1,
+            failures="[]",
+        )
+        support_status = spark_version_compatibility(cluster.spark_version)
+        if support_status != "supported":
+            failures.append(f"not supported DBR: {cluster.spark_version}")
+        if cluster.spark_conf is not None:
+            self._check_spark_conf(cluster, failures)
+        # Checking if Azure cluster config is present in cluster policies
+        if cluster.policy_id:
+            self._check_cluster_policy(cluster, failures)
+        if cluster.init_scripts:
+            self._check_init_scripts(cluster, failures)
+        cluster_info.failures = json.dumps(failures)
+        if len(failures) > 0:
+            cluster_info.success = 0
+        return cluster_info
+
 
 class ClustersCrawler(CrawlerBase[ClusterInfo], ClustersMixin):
     def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema):
@@ -81,37 +110,7 @@ def _assess_clusters(self, all_clusters):
         for cluster in all_clusters:
             if cluster.cluster_source == ClusterSource.JOB:
                 continue
-            if not cluster.creator_user_name:
-                logger.warning(
-                    f"Cluster {cluster.cluster_id} have Unknown creator, it means that the original creator "
-                    f"has been deleted and should be re-created"
-                )
-            cluster_info = ClusterInfo(
-                cluster_id=cluster.cluster_id,
-                cluster_name=cluster.cluster_name,
-                creator=cluster.creator_user_name,
-                success=1,
-                failures="[]",
-            )
-            support_status = spark_version_compatibility(cluster.spark_version)
-            failures = []
-            if support_status != "supported":
-                failures.append(f"not supported DBR: {cluster.spark_version}")
-
-            if cluster.spark_conf is not None:
-                self._check_spark_conf(cluster, failures)
-
-            # Checking if Azure cluster config is present in cluster policies
-            if cluster.policy_id:
-                self._check_cluster_policy(cluster, failures)
-
-            if cluster.init_scripts:
-                self._check_init_scripts(cluster, failures)
-
-            cluster_info.failures = json.dumps(failures)
-            if len(failures) > 0:
-                cluster_info.success = 0
-            yield cluster_info
+            yield self._check_cluster_failures(cluster)
 
     def snapshot(self) -> Iterable[ClusterInfo]:
         return self._snapshot(self._try_fetch, self._crawl)
diff --git a/src/databricks/labs/ucx/assessment/crawlers.py b/src/databricks/labs/ucx/assessment/crawlers.py
@@ -63,7 +63,9 @@ def _azure_sp_conf_present_check(config: dict) -> bool:
     return False
 
 
-def spark_version_compatibility(spark_version: str) -> str:
+def spark_version_compatibility(spark_version: str | None) -> str:
+    if not spark_version:
+        return "unreported version"
     first_comp_custom_rt = 3
     first_comp_custom_x = 2
     dbr_version_components = spark_version.split("-")
diff --git a/src/databricks/labs/ucx/assessment/jobs.py b/src/databricks/labs/ucx/assessment/jobs.py
@@ -3,19 +3,10 @@
 from dataclasses import dataclass
 
 from databricks.sdk import WorkspaceClient
-from databricks.sdk.errors import NotFound
-from databricks.sdk.service.compute import Policy
 from databricks.sdk.service.jobs import BaseJob
 
-from databricks.labs.ucx.assessment.crawlers import (
-    _AZURE_SP_CONF_FAILURE_MSG,
-    INCOMPATIBLE_SPARK_CONFIG_KEYS,
-    _azure_sp_conf_in_init_scripts,
-    _azure_sp_conf_present_check,
-    _get_init_script_data,
-    logger,
-    spark_version_compatibility,
-)
+from databricks.labs.ucx.assessment.clusters import ClustersMixin
+from databricks.labs.ucx.assessment.crawlers import logger
 from databricks.labs.ucx.framework.crawlers import CrawlerBase, SqlBackend
 
 
@@ -28,7 +19,7 @@ class JobInfo:
     creator: str | None = None
 
 
-class JobsMixin:
+class JobsMixin(ClustersMixin):
     @staticmethod
     def _get_cluster_configs_from_all_jobs(all_jobs, all_clusters_by_id):
         for j in all_jobs:
@@ -90,30 +81,12 @@ def _assess_jobs(self, all_jobs: list[BaseJob], all_clusters_by_id) -> Iterable[
             )
 
         for job, cluster_config in self._get_cluster_configs_from_all_jobs(all_jobs, all_clusters_by_id):
-            support_status = spark_version_compatibility(cluster_config.spark_version)
             job_id = job.job_id
             if not job_id:
                 continue
-            if support_status != "supported":
-                job_assessment[job_id].add(f"not supported DBR: {cluster_config.spark_version}")
-
-            if cluster_config.spark_conf is not None:
-                self._job_spark_conf(cluster_config, job_assessment, job_id)
-
-            # Checking if Azure cluster config is present in cluster policies
-            if cluster_config.policy_id:
-                policy = self._safe_get_cluster_policy(cluster_config.policy_id)
-                if policy is None:
-                    continue
-                if policy.definition:
-                    if _azure_sp_conf_present_check(json.loads(policy.definition)):
-                        job_assessment[job_id].add(f"{_AZURE_SP_CONF_FAILURE_MSG} Job cluster.")
-                if policy.policy_family_definition_overrides:
-                    if _azure_sp_conf_present_check(json.loads(policy.policy_family_definition_overrides)):
-                        job_assessment[job_id].add(f"{_AZURE_SP_CONF_FAILURE_MSG} Job cluster.")
-
-            if cluster_config.init_scripts:
-                self._init_scripts(cluster_config, job_assessment, job_id)
+            cluster_failures = self._check_cluster_failures(cluster_config)
+            for failure in json.loads(cluster_failures.failures):
+                job_assessment[job_id].add(failure)
 
         # TODO: next person looking at this - rewrite, as this code makes no sense
         for job_key in job_details.keys():  # pylint: disable=consider-using-dict-items,consider-iterating-dictionary
@@ -122,33 +95,6 @@ def _assess_jobs(self, all_jobs: list[BaseJob], all_clusters_by_id) -> Iterable[
                 job_details[job_key].success = 0
         return list(job_details.values())
 
-    def _init_scripts(self, cluster_config, job_assessment, job_id):
-        for init_script_info in cluster_config.init_scripts:
-            init_script_data = _get_init_script_data(self._ws, init_script_info)
-            if not init_script_data:
-                continue
-            if not _azure_sp_conf_in_init_scripts(init_script_data):
-                continue
-            job_assessment[job_id].add(f"{_AZURE_SP_CONF_FAILURE_MSG} Job cluster.")
-
-    def _job_spark_conf(self, cluster_config, job_assessment, job_id):
-        for k in INCOMPATIBLE_SPARK_CONFIG_KEYS:
-            if k in cluster_config.spark_conf:
-                job_assessment[job_id].add(f"unsupported config: {k}")
-        for value in cluster_config.spark_conf.values():
-            if "dbfs:/mnt" in value or "/dbfs/mnt" in value:
-                job_assessment[job_id].add(f"using DBFS mount in configuration: {value}")
-        # Checking if Azure cluster config is present in spark config
-        if _azure_sp_conf_present_check(cluster_config.spark_conf):
-            job_assessment[job_id].add(f"{_AZURE_SP_CONF_FAILURE_MSG} Job cluster.")
-
-    def _safe_get_cluster_policy(self, policy_id: str) -> Policy | None:
-        try:
-            return self._ws.cluster_policies.get(policy_id)
-        except NotFound:
-            logger.warning(f"The cluster policy was deleted: {policy_id}")
-            return None
-
     def snapshot(self) -> Iterable[JobInfo]:
         return self._snapshot(self._try_fetch, self._crawl)
 
diff --git a/tests/unit/assessment/clusters/assortment-conf.json b/tests/unit/assessment/clusters/assortment-conf.json
@@ -0,0 +1,64 @@
+ [
+   {
+    "autoscale": {
+      "max_workers": 6,
+      "min_workers": 1
+    },
+    "creator_user_name":"fake@fake.org",
+    "cluster_id": "0123-190044-11111111",
+    "cluster_name": "Single User Cluster Name",
+    "policy_id": "single-user-with-spn",
+    "spark_version": "13.3.x-cpu-ml-scala2.12",
+    "spark_conf" : {
+        "spark.databricks.delta.preview.enabled": "true"
+    },
+    "spark_context_id":"5134472582179565315"
+  },
+    {
+    "autoscale": {
+      "max_workers": 6,
+      "min_workers": 1
+    },
+    "creator_user_name":"fake@fake.org",
+    "cluster_id": "0123-190044-1122334444",
+    "cluster_name": "Single User Cluster Name",
+    "policy_id": "single-user-with-spn",
+    "spark_version": "13.3.x-cpu-ml-scala2.12",
+    "spark_conf" : {
+      "spark.hadoop.fs.azure.account.oauth2.client.id.abcde.dfs.core.windows.net": "{{secrets/abcff/sp_app_client_id}}",
+      "spark.hadoop.fs.azure.account.oauth2.client.endpoint.abcde.dfs.core.windows.net": "https://login.microsoftonline.com/dedededede/token",
+      "spark.hadoop.fs.azure.account.oauth2.client.secret.abcde.dfs.core.windows.net": "{{secrets/abcff/sp_secret}}"
+},
+    "spark_context_id":"5134472582179565315"
+  },
+    {
+    "autoscale": {
+      "max_workers": 6,
+      "min_workers": 1
+    },
+    "creator_user_name":"fake@fake.org",
+    "cluster_id": "0123-190044-1122334422",
+    "cluster_name": "Single User Cluster Name",
+    "policy_id": "single-user-with-spn",
+    "spark_version": "9.3.x-cpu-ml-scala2.12",
+    "spark_conf" : {
+        "spark.databricks.delta.preview.enabled": "true"
+    },
+    "spark_context_id":"5134472582179565315"
+  },
+    {
+    "autoscale": {
+      "max_workers": 6,
+      "min_workers": 1
+    },
+    "creator_user_name":"fake@fake.org",
+    "cluster_id": "0123-190044-1122334411",
+    "cluster_name": "Single User Cluster Name",
+    "policy_id": "azure-oauth",
+    "spark_version": "13.3.x-cpu-ml-scala2.12",
+    "spark_conf" : {
+        "spark.databricks.delta.preview.enabled": "true"
+    },
+    "spark_context_id":"5134472582179565315"
+  }
+]
diff --git a/tests/unit/assessment/clusters/dbfs-init-scripts.json b/tests/unit/assessment/clusters/dbfs-init-scripts.json
@@ -0,0 +1,34 @@
+[
+  {
+    "autoscale": {
+      "max_workers": 6,
+      "min_workers": 1
+    },
+    "cluster_id": "01234-11223344-1122334455",
+    "cluster_name": "UCX Cluster",
+    "policy_id": "single-user-with-spn",
+    "spark_version": "13.3.x-cpu-ml-scala2.12",
+    "init_scripts": [
+      {
+        "dbfs": {
+          "destination": ":/users/test@test.com/init_scripts/test.sh"
+        }
+      },
+      {
+        "dbfs": {
+          "destination": "dbfs"
+        }
+      },
+      {
+        "dbfs": {
+          "destination": "dbfs:"
+        }
+      },
+      {
+            "workspace": {
+                "destination": "/Users/user@databricks.com/.ucx/startup.sh"
+            }
+        }
+    ]
+  }
+]
diff --git a/tests/unit/assessment/clusters/multiple-failures-conf.json b/tests/unit/assessment/clusters/multiple-failures-conf.json
@@ -0,0 +1,15 @@
+[
+  {
+    "autoscale": {
+      "max_workers": 6,
+      "min_workers": 1
+    },
+    "cluster_id": "0915-190044-3dqy6751",
+    "cluster_name": "Tech Summit FY24 Cluster-2",
+    "policy_id": "single-user-with-spn",
+    "spark_version": "9.3.x-cpu-ml-scala2.12",
+    "spark_conf" : {
+        "spark.databricks.passthrough.enabled": "True"
+    }
+  }
+]
diff --git a/tests/unit/assessment/policies/azure-oauth.json b/tests/unit/assessment/policies/azure-oauth.json
@@ -0,0 +1,56 @@
+{
+  "definition": {
+    "spark_conf.fs.azure.account.auth.type": {
+      "type": "fixed",
+      "value": "OAuth",
+      "hidden": true
+    },
+    "spark_conf.fs.azure.account.oauth.provider.type": {
+      "type": "fixed",
+      "value": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
+      "hidden": true
+    },
+    "spark_conf.fs.azure.account.oauth2.client.id": {
+      "type": "fixed",
+      "value": "fsfsfsfsffsfsf",
+      "hidden": true
+    },
+    "spark_conf.fs.azure.account.oauth2.client.secret": {
+      "type": "fixed",
+      "value": "gfgfgfgfggfggfgfdds",
+      "hidden": true
+    },
+    "spark_conf.fs.azure.account.oauth2.client.endpoint": {
+      "type": "fixed",
+      "value": "https://login.microsoftonline.com/1234ededed/oauth2/token",
+      "hidden": true
+    }
+  },
+  "policy_family_definition_overrides": {
+        "spark_conf.fs.azure.account.auth.type": {
+      "type": "fixed",
+      "value": "OAuth",
+      "hidden": true
+    },
+    "spark_conf.fs.azure.account.oauth.provider.type": {
+      "type": "fixed",
+      "value": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
+      "hidden": true
+    },
+    "spark_conf.fs.azure.account.oauth2.client.id": {
+      "type": "fixed",
+      "value": "fsfsfsfsffsfsf",
+      "hidden": true
+    },
+    "spark_conf.fs.azure.account.oauth2.client.secret": {
+      "type": "fixed",
+      "value": "gfgfgfgfggfggfgfdds",
+      "hidden": true
+    },
+    "spark_conf.fs.azure.account.oauth2.client.endpoint": {
+      "type": "fixed",
+      "value": "https://login.microsoftonline.com/1234ededed/oauth2/token",
+      "hidden": true
+    }
+  }
+}
diff --git a/tests/unit/assessment/test_clusters.py b/tests/unit/assessment/test_clusters.py
diff --git a/tests/unit/assessment/test_jobs.py b/tests/unit/assessment/test_jobs.py