Skip to content

Commit c03596e

Browse files
authored
Cleaned up Job assessment and Cluster assessment to improve testing and reduce redundancy. (#825)
1 parent 42da002 commit c03596e

File tree

9 files changed

+246
-268
lines changed

9 files changed

+246
-268
lines changed

src/databricks/labs/ucx/assessment/clusters.py

Lines changed: 31 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from databricks.sdk import WorkspaceClient
66
from databricks.sdk.errors import NotFound
7-
from databricks.sdk.service.compute import ClusterSource, Policy
7+
from databricks.sdk.service.compute import ClusterDetails, ClusterSource, Policy
88

99
from databricks.labs.ucx.assessment.crawlers import (
1010
_AZURE_SP_CONF_FAILURE_MSG,
@@ -67,6 +67,35 @@ def _check_init_scripts(self, cluster, failures):
6767
continue
6868
failures.append(f"{_AZURE_SP_CONF_FAILURE_MSG} cluster.")
6969

70+
def _check_cluster_failures(self, cluster: ClusterDetails):
71+
failures = []
72+
if not cluster.creator_user_name:
73+
logger.warning(
74+
f"Cluster {cluster.cluster_id} have Unknown creator, it means that the original creator "
75+
f"has been deleted and should be re-created"
76+
)
77+
cluster_info = ClusterInfo(
78+
cluster_id=cluster.cluster_id if cluster.cluster_id else "",
79+
cluster_name=cluster.cluster_name,
80+
creator=cluster.creator_user_name,
81+
success=1,
82+
failures="[]",
83+
)
84+
support_status = spark_version_compatibility(cluster.spark_version)
85+
if support_status != "supported":
86+
failures.append(f"not supported DBR: {cluster.spark_version}")
87+
if cluster.spark_conf is not None:
88+
self._check_spark_conf(cluster, failures)
89+
# Checking if Azure cluster config is present in cluster policies
90+
if cluster.policy_id:
91+
self._check_cluster_policy(cluster, failures)
92+
if cluster.init_scripts:
93+
self._check_init_scripts(cluster, failures)
94+
cluster_info.failures = json.dumps(failures)
95+
if len(failures) > 0:
96+
cluster_info.success = 0
97+
return cluster_info
98+
7099

71100
class ClustersCrawler(CrawlerBase[ClusterInfo], ClustersMixin):
72101
def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema):
@@ -81,37 +110,7 @@ def _assess_clusters(self, all_clusters):
81110
for cluster in all_clusters:
82111
if cluster.cluster_source == ClusterSource.JOB:
83112
continue
84-
if not cluster.creator_user_name:
85-
logger.warning(
86-
f"Cluster {cluster.cluster_id} have Unknown creator, it means that the original creator "
87-
f"has been deleted and should be re-created"
88-
)
89-
cluster_info = ClusterInfo(
90-
cluster_id=cluster.cluster_id,
91-
cluster_name=cluster.cluster_name,
92-
creator=cluster.creator_user_name,
93-
success=1,
94-
failures="[]",
95-
)
96-
support_status = spark_version_compatibility(cluster.spark_version)
97-
failures = []
98-
if support_status != "supported":
99-
failures.append(f"not supported DBR: {cluster.spark_version}")
100-
101-
if cluster.spark_conf is not None:
102-
self._check_spark_conf(cluster, failures)
103-
104-
# Checking if Azure cluster config is present in cluster policies
105-
if cluster.policy_id:
106-
self._check_cluster_policy(cluster, failures)
107-
108-
if cluster.init_scripts:
109-
self._check_init_scripts(cluster, failures)
110-
111-
cluster_info.failures = json.dumps(failures)
112-
if len(failures) > 0:
113-
cluster_info.success = 0
114-
yield cluster_info
113+
yield self._check_cluster_failures(cluster)
115114

116115
def snapshot(self) -> Iterable[ClusterInfo]:
117116
return self._snapshot(self._try_fetch, self._crawl)

src/databricks/labs/ucx/assessment/crawlers.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@ def _azure_sp_conf_present_check(config: dict) -> bool:
6363
return False
6464

6565

66-
def spark_version_compatibility(spark_version: str) -> str:
66+
def spark_version_compatibility(spark_version: str | None) -> str:
67+
if not spark_version:
68+
return "unreported version"
6769
first_comp_custom_rt = 3
6870
first_comp_custom_x = 2
6971
dbr_version_components = spark_version.split("-")

src/databricks/labs/ucx/assessment/jobs.py

Lines changed: 6 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,10 @@
33
from dataclasses import dataclass
44

55
from databricks.sdk import WorkspaceClient
6-
from databricks.sdk.errors import NotFound
7-
from databricks.sdk.service.compute import Policy
86
from databricks.sdk.service.jobs import BaseJob
97

10-
from databricks.labs.ucx.assessment.crawlers import (
11-
_AZURE_SP_CONF_FAILURE_MSG,
12-
INCOMPATIBLE_SPARK_CONFIG_KEYS,
13-
_azure_sp_conf_in_init_scripts,
14-
_azure_sp_conf_present_check,
15-
_get_init_script_data,
16-
logger,
17-
spark_version_compatibility,
18-
)
8+
from databricks.labs.ucx.assessment.clusters import ClustersMixin
9+
from databricks.labs.ucx.assessment.crawlers import logger
1910
from databricks.labs.ucx.framework.crawlers import CrawlerBase, SqlBackend
2011

2112

@@ -28,7 +19,7 @@ class JobInfo:
2819
creator: str | None = None
2920

3021

31-
class JobsMixin:
22+
class JobsMixin(ClustersMixin):
3223
@staticmethod
3324
def _get_cluster_configs_from_all_jobs(all_jobs, all_clusters_by_id):
3425
for j in all_jobs:
@@ -90,30 +81,12 @@ def _assess_jobs(self, all_jobs: list[BaseJob], all_clusters_by_id) -> Iterable[
9081
)
9182

9283
for job, cluster_config in self._get_cluster_configs_from_all_jobs(all_jobs, all_clusters_by_id):
93-
support_status = spark_version_compatibility(cluster_config.spark_version)
9484
job_id = job.job_id
9585
if not job_id:
9686
continue
97-
if support_status != "supported":
98-
job_assessment[job_id].add(f"not supported DBR: {cluster_config.spark_version}")
99-
100-
if cluster_config.spark_conf is not None:
101-
self._job_spark_conf(cluster_config, job_assessment, job_id)
102-
103-
# Checking if Azure cluster config is present in cluster policies
104-
if cluster_config.policy_id:
105-
policy = self._safe_get_cluster_policy(cluster_config.policy_id)
106-
if policy is None:
107-
continue
108-
if policy.definition:
109-
if _azure_sp_conf_present_check(json.loads(policy.definition)):
110-
job_assessment[job_id].add(f"{_AZURE_SP_CONF_FAILURE_MSG} Job cluster.")
111-
if policy.policy_family_definition_overrides:
112-
if _azure_sp_conf_present_check(json.loads(policy.policy_family_definition_overrides)):
113-
job_assessment[job_id].add(f"{_AZURE_SP_CONF_FAILURE_MSG} Job cluster.")
114-
115-
if cluster_config.init_scripts:
116-
self._init_scripts(cluster_config, job_assessment, job_id)
87+
cluster_failures = self._check_cluster_failures(cluster_config)
88+
for failure in json.loads(cluster_failures.failures):
89+
job_assessment[job_id].add(failure)
11790

11891
# TODO: next person looking at this - rewrite, as this code makes no sense
11992
for job_key in job_details.keys(): # pylint: disable=consider-using-dict-items,consider-iterating-dictionary
@@ -122,33 +95,6 @@ def _assess_jobs(self, all_jobs: list[BaseJob], all_clusters_by_id) -> Iterable[
12295
job_details[job_key].success = 0
12396
return list(job_details.values())
12497

125-
def _init_scripts(self, cluster_config, job_assessment, job_id):
126-
for init_script_info in cluster_config.init_scripts:
127-
init_script_data = _get_init_script_data(self._ws, init_script_info)
128-
if not init_script_data:
129-
continue
130-
if not _azure_sp_conf_in_init_scripts(init_script_data):
131-
continue
132-
job_assessment[job_id].add(f"{_AZURE_SP_CONF_FAILURE_MSG} Job cluster.")
133-
134-
def _job_spark_conf(self, cluster_config, job_assessment, job_id):
135-
for k in INCOMPATIBLE_SPARK_CONFIG_KEYS:
136-
if k in cluster_config.spark_conf:
137-
job_assessment[job_id].add(f"unsupported config: {k}")
138-
for value in cluster_config.spark_conf.values():
139-
if "dbfs:/mnt" in value or "/dbfs/mnt" in value:
140-
job_assessment[job_id].add(f"using DBFS mount in configuration: {value}")
141-
# Checking if Azure cluster config is present in spark config
142-
if _azure_sp_conf_present_check(cluster_config.spark_conf):
143-
job_assessment[job_id].add(f"{_AZURE_SP_CONF_FAILURE_MSG} Job cluster.")
144-
145-
def _safe_get_cluster_policy(self, policy_id: str) -> Policy | None:
146-
try:
147-
return self._ws.cluster_policies.get(policy_id)
148-
except NotFound:
149-
logger.warning(f"The cluster policy was deleted: {policy_id}")
150-
return None
151-
15298
def snapshot(self) -> Iterable[JobInfo]:
15399
return self._snapshot(self._try_fetch, self._crawl)
154100

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
[
2+
{
3+
"autoscale": {
4+
"max_workers": 6,
5+
"min_workers": 1
6+
},
7+
"creator_user_name":"[email protected]",
8+
"cluster_id": "0123-190044-11111111",
9+
"cluster_name": "Single User Cluster Name",
10+
"policy_id": "single-user-with-spn",
11+
"spark_version": "13.3.x-cpu-ml-scala2.12",
12+
"spark_conf" : {
13+
"spark.databricks.delta.preview.enabled": "true"
14+
},
15+
"spark_context_id":"5134472582179565315"
16+
},
17+
{
18+
"autoscale": {
19+
"max_workers": 6,
20+
"min_workers": 1
21+
},
22+
"creator_user_name":"[email protected]",
23+
"cluster_id": "0123-190044-1122334444",
24+
"cluster_name": "Single User Cluster Name",
25+
"policy_id": "single-user-with-spn",
26+
"spark_version": "13.3.x-cpu-ml-scala2.12",
27+
"spark_conf" : {
28+
"spark.hadoop.fs.azure.account.oauth2.client.id.abcde.dfs.core.windows.net": "{{secrets/abcff/sp_app_client_id}}",
29+
"spark.hadoop.fs.azure.account.oauth2.client.endpoint.abcde.dfs.core.windows.net": "https://login.microsoftonline.com/dedededede/token",
30+
"spark.hadoop.fs.azure.account.oauth2.client.secret.abcde.dfs.core.windows.net": "{{secrets/abcff/sp_secret}}"
31+
},
32+
"spark_context_id":"5134472582179565315"
33+
},
34+
{
35+
"autoscale": {
36+
"max_workers": 6,
37+
"min_workers": 1
38+
},
39+
"creator_user_name":"[email protected]",
40+
"cluster_id": "0123-190044-1122334422",
41+
"cluster_name": "Single User Cluster Name",
42+
"policy_id": "single-user-with-spn",
43+
"spark_version": "9.3.x-cpu-ml-scala2.12",
44+
"spark_conf" : {
45+
"spark.databricks.delta.preview.enabled": "true"
46+
},
47+
"spark_context_id":"5134472582179565315"
48+
},
49+
{
50+
"autoscale": {
51+
"max_workers": 6,
52+
"min_workers": 1
53+
},
54+
"creator_user_name":"[email protected]",
55+
"cluster_id": "0123-190044-1122334411",
56+
"cluster_name": "Single User Cluster Name",
57+
"policy_id": "azure-oauth",
58+
"spark_version": "13.3.x-cpu-ml-scala2.12",
59+
"spark_conf" : {
60+
"spark.databricks.delta.preview.enabled": "true"
61+
},
62+
"spark_context_id":"5134472582179565315"
63+
}
64+
]
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
[
2+
{
3+
"autoscale": {
4+
"max_workers": 6,
5+
"min_workers": 1
6+
},
7+
"cluster_id": "01234-11223344-1122334455",
8+
"cluster_name": "UCX Cluster",
9+
"policy_id": "single-user-with-spn",
10+
"spark_version": "13.3.x-cpu-ml-scala2.12",
11+
"init_scripts": [
12+
{
13+
"dbfs": {
14+
"destination": ":/users/[email protected]/init_scripts/test.sh"
15+
}
16+
},
17+
{
18+
"dbfs": {
19+
"destination": "dbfs"
20+
}
21+
},
22+
{
23+
"dbfs": {
24+
"destination": "dbfs:"
25+
}
26+
},
27+
{
28+
"workspace": {
29+
"destination": "/Users/[email protected]/.ucx/startup.sh"
30+
}
31+
}
32+
]
33+
}
34+
]
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
[
2+
{
3+
"autoscale": {
4+
"max_workers": 6,
5+
"min_workers": 1
6+
},
7+
"cluster_id": "0915-190044-3dqy6751",
8+
"cluster_name": "Tech Summit FY24 Cluster-2",
9+
"policy_id": "single-user-with-spn",
10+
"spark_version": "9.3.x-cpu-ml-scala2.12",
11+
"spark_conf" : {
12+
"spark.databricks.passthrough.enabled": "True"
13+
}
14+
}
15+
]
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
{
2+
"definition": {
3+
"spark_conf.fs.azure.account.auth.type": {
4+
"type": "fixed",
5+
"value": "OAuth",
6+
"hidden": true
7+
},
8+
"spark_conf.fs.azure.account.oauth.provider.type": {
9+
"type": "fixed",
10+
"value": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
11+
"hidden": true
12+
},
13+
"spark_conf.fs.azure.account.oauth2.client.id": {
14+
"type": "fixed",
15+
"value": "fsfsfsfsffsfsf",
16+
"hidden": true
17+
},
18+
"spark_conf.fs.azure.account.oauth2.client.secret": {
19+
"type": "fixed",
20+
"value": "gfgfgfgfggfggfgfdds",
21+
"hidden": true
22+
},
23+
"spark_conf.fs.azure.account.oauth2.client.endpoint": {
24+
"type": "fixed",
25+
"value": "https://login.microsoftonline.com/1234ededed/oauth2/token",
26+
"hidden": true
27+
}
28+
},
29+
"policy_family_definition_overrides": {
30+
"spark_conf.fs.azure.account.auth.type": {
31+
"type": "fixed",
32+
"value": "OAuth",
33+
"hidden": true
34+
},
35+
"spark_conf.fs.azure.account.oauth.provider.type": {
36+
"type": "fixed",
37+
"value": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
38+
"hidden": true
39+
},
40+
"spark_conf.fs.azure.account.oauth2.client.id": {
41+
"type": "fixed",
42+
"value": "fsfsfsfsffsfsf",
43+
"hidden": true
44+
},
45+
"spark_conf.fs.azure.account.oauth2.client.secret": {
46+
"type": "fixed",
47+
"value": "gfgfgfgfggfggfgfdds",
48+
"hidden": true
49+
},
50+
"spark_conf.fs.azure.account.oauth2.client.endpoint": {
51+
"type": "fixed",
52+
"value": "https://login.microsoftonline.com/1234ededed/oauth2/token",
53+
"hidden": true
54+
}
55+
}
56+
}

0 commit comments

Comments
 (0)