Skip to content

Commit cfbf595

Browse files
authored
Relax default num_workers=0 for job_clusters (#4080)
## Why Terraform only adds it if neither autoscale nor num_workers are not set. It filters it out if autoscale is set. ## Tests New acceptance test that has different configurations and asserts that requests are the same. Difference also disappears in existing tests: in default-python recorded requests now have no difference wrt num_workers. In migrate/default-python num_workers no longer triggers drift post migration #4068
1 parent 27ba2ab commit cfbf595

26 files changed

+252
-57
lines changed

acceptance/bundle/migrate/default-python/out.plan_after_deploy.json

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
},
2727
"data_security_mode": "SINGLE_USER",
2828
"node_type_id": "[NODE_TYPE_ID]",
29-
"num_workers": 0,
3029
"spark_version": "16.4.x-scala2.12"
3130
}
3231
}
@@ -130,7 +129,6 @@
130129
},
131130
"data_security_mode": "SINGLE_USER",
132131
"node_type_id": "[NODE_TYPE_ID]",
133-
"num_workers": 0,
134132
"spark_version": "16.4.x-scala2.12"
135133
}
136134
}

acceptance/bundle/migrate/default-python/out.plan_after_migration.json

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
},
2727
"data_security_mode": "SINGLE_USER",
2828
"node_type_id": "[NODE_TYPE_ID]",
29-
"num_workers": 0,
3029
"spark_version": "16.4.x-scala2.12"
3130
}
3231
}
@@ -231,10 +230,6 @@
231230
"action": "skip",
232231
"reason": "server_side_default"
233232
},
234-
"job_clusters[0].new_cluster.num_workers": {
235-
"action": "update",
236-
"old": 0
237-
},
238233
"tasks[task_key='notebook_task'].libraries[0].whl": {
239234
"action": "update",
240235
"old": "/Workspace/Users/[USERNAME]/.bundle/my_default_python/dev/artifacts/.internal/my_default_python-0.0.1+[UNIX_TIME_NANOS][2]-py3-none-any.whl",

acceptance/bundle/migrate/default-python/out.state_after_migration.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
},
2222
"data_security_mode": "SINGLE_USER",
2323
"node_type_id": "[NODE_TYPE_ID]",
24-
"num_workers": 0,
2524
"spark_version": "16.4.x-scala2.12"
2625
}
2726
}

acceptance/bundle/migrate/default-python/output.txt

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,6 @@ Building python_artifact...
8282
"action": "skip",
8383
"reason": "server_side_default"
8484
},
85-
"job_clusters[0].new_cluster.num_workers": {
86-
"action": "update",
87-
"old": 0
88-
},
8985
"tasks[task_key='notebook_task'].libraries[0].whl": {
9086
"action": "update",
9187
"old": "/Workspace/Users/[USERNAME]/.bundle/my_default_python/dev/artifacts/.internal/my_default_python-0.0.1+[UNIX_TIME_NANOS][1]-py3-none-any.whl",
@@ -160,10 +156,6 @@ Building python_artifact...
160156
"action": "skip",
161157
"reason": "server_side_default"
162158
},
163-
"job_clusters[0].new_cluster.num_workers": {
164-
"action": "update",
165-
"old": 0
166-
},
167159
"tasks[task_key='notebook_task'].libraries[0].whl": {
168160
"action": "update",
169161
"old": "/Workspace/Users/[USERNAME]/.bundle/my_default_python/dev/artifacts/.internal/my_default_python-0.0.1+[UNIX_TIME_NANOS][1]-py3-none-any.whl",

acceptance/bundle/resource_deps/jobs_update_remote/job_update.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
{
1212
"job_cluster_key": "key",
1313
"new_cluster": {
14-
"num_workers": 0,
1514
"spark_version": "13.3.x-scala2.12"
1615
}
1716
}

acceptance/bundle/resource_deps/jobs_update_remote/out.plan_update.direct.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@
9898
{
9999
"job_cluster_key": "key",
100100
"new_cluster": {
101-
"num_workers": 0,
102101
"spark_version": "13.3.x-scala2.12"
103102
}
104103
}
@@ -125,6 +124,10 @@
125124
"action": "skip",
126125
"reason": "server_side_default"
127126
},
127+
"job_clusters[0].new_cluster.num_workers": {
128+
"action": "update",
129+
"old": 0
130+
},
128131
"timeout_seconds": {
129132
"action": "skip",
130133
"reason": "server_side_default"

acceptance/bundle/resource_deps/jobs_update_remote/output.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@ Destroy complete!
107107
{
108108
"job_cluster_key": "key",
109109
"new_cluster": {
110-
"num_workers": 0,
111110
"spark_version": "13.3.x-scala2.12"
112111
}
113112
}

acceptance/bundle/resources/jobs/create-error/output.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11

22
>>> musterr [CLI] bundle deploy --force-lock
3-
Warning: required field "new_cluster" is not set
4-
at resources.jobs.foo.job_clusters[0]
5-
in databricks.yml:7:11
6-
73
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files...
84
Deploying resources...
95
Error: cannot create resources.jobs.foo: Shared job cluster feature is only supported in multi-task jobs. (400 INVALID_PARAMETER_VALUE)
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
bundle:
2+
name: test-bundle
3+
4+
resources:
5+
jobs:
6+
sample_job:
7+
name: sample_job
8+
9+
trigger:
10+
# Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
11+
periodic:
12+
interval: 1
13+
unit: DAYS
14+
15+
tasks:
16+
- task_key: notebook_task
17+
notebook_task:
18+
notebook_path: sample_notebook.py
19+
source: WORKSPACE # Without this, there is a different request between direct and terraform
20+
21+
job_clusters:
22+
- job_cluster_key: job_cluster_autoscale
23+
new_cluster:
24+
spark_version: 16.4.x-scala2.12
25+
node_type_id: $NODE_TYPE_ID
26+
data_security_mode: SINGLE_USER
27+
autoscale:
28+
min_workers: 1
29+
max_workers: 4
30+
31+
# This config results in different request between terraform and direct:
32+
# Terraform removes "num_workers: 0" and direct sends it as is.
33+
# This is acceptable difference, users will get appropriate error message backend and can correct their config.
34+
#
35+
#- job_cluster_key: job_cluster_autoscale_num_workers0
36+
# new_cluster:
37+
# spark_version: 16.4.x-scala2.13
38+
# node_type_id: $NODE_TYPE_ID
39+
# data_security_mode: SINGLE_USER
40+
# autoscale:
41+
# min_workers: 1
42+
# max_workers: 4
43+
# num_workers: 0
44+
45+
- job_cluster_key: job_cluster_autoscale_num_workers1
46+
new_cluster:
47+
spark_version: 16.4.x-scala2.14
48+
node_type_id: $NODE_TYPE_ID
49+
data_security_mode: SINGLE_USER
50+
autoscale:
51+
min_workers: 1
52+
max_workers: 4
53+
num_workers: 1
54+
55+
- job_cluster_key: job_cluster_num_workers1
56+
new_cluster:
57+
spark_version: 16.4.x-scala2.15
58+
node_type_id: $NODE_TYPE_ID
59+
data_security_mode: SINGLE_USER
60+
num_workers: 1
61+
62+
- job_cluster_key: job_cluster_num_workers0
63+
new_cluster:
64+
spark_version: 16.4.x-scala2.16
65+
node_type_id: $NODE_TYPE_ID
66+
data_security_mode: SINGLE_USER
67+
num_workers: 0
68+
69+
- job_cluster_key: job_cluster_default
70+
new_cluster:
71+
spark_version: 16.4.x-scala2.17

acceptance/bundle/resources/jobs/num_workers/out.test.toml

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)