Added support for shared job clusters (#1098)

pateusz · Mateusz Poplawski · web-flow · commit 7ec5a0176e3d · 2022-02-10T15:36:47.000+01:00
Co-authored-by: Mateusz Poplawski &lt;mateusz.poplawski@hm.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,7 @@
 ## 0.4.9
 
 * Prevent creation of `databricks_group` with `users` and `admins` reserved names ([#1089](https://github.com/databrickslabs/terraform-provider-databricks/issues/1089)).
+* Shared job cluster functionality added. ([#1082](https://github.com/databrickslabs/terraform-provider-databricks/issues/1082))
 
 ## 0.4.8
 
diff --git a/docs/resources/job.md b/docs/resources/job.md
@@ -57,6 +57,15 @@ It is possible to create [jobs with multiple tasks](https://docs.databricks.com/
 resource "databricks_job" "this" {
   name = "Job with multiple tasks"
 
+  job_cluster {
+    job_cluster_key = "j"
+    new_cluster {
+      num_workers   = 2
+      spark_version = data.databricks_spark_version.latest.id
+      node_type_id  = data.databricks_node_type.smallest.id
+    }
+  }
+
   task {
     task_key = "a"
 
@@ -84,6 +93,16 @@ resource "databricks_job" "this" {
       main_class_name = "com.acme.data.Main"
     }
   }
+
+  task {
+    task_key = "c"
+
+    job_cluster_key = "j"
+
+    notebook_task {
+      notebook_path = databricks_notebook.this.path
+    }
+  }
 }
 ```
 
@@ -106,6 +125,11 @@ The following arguments are required:
 * `email_notifications` - (Optional) (List) An optional set of email addresses notified when runs of this job begin and complete and when this job is deleted. The default behavior is to not send any emails. This field is a block and is documented below.
 * `schedule` - (Optional) (List) An optional periodic schedule for this job. The default behavior is that the job runs when triggered by clicking Run Now in the Jobs UI or sending an API request to runNow. This field is a block and is documented below.
 
+### job_cluster Configuration Block
+[Shared job cluster](https://docs.databricks.com/jobs.html#use-shared-job-clusters) specification. Allows multiple tasks in the same job run to reuse the cluster. 
+* `job_cluster_key` - (Required) Identifier that can be referenced in `task` block, so that cluster is shared between tasks
+* `new_cluster` - Same set of parameters as for [databricks_cluster](cluster.md) resource.
+
 ### schedule Configuration Block
 
 * `quartz_cron_expression` - (Required) A [Cron expression using Quartz syntax](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) that describes the schedule for a job. This field is required.
diff --git a/jobs/acceptance/job_test.go b/jobs/acceptance/job_test.go
@@ -113,6 +113,16 @@ func TestPreviewAccJobTasks(t *testing.T) {
 
 			resource "databricks_job" "this" {
 				name = "{var.RANDOM}"
+
+				job_cluster {
+					job_cluster_key = "j"
+					new_cluster {
+						num_workers   = 20
+						spark_version = data.databricks_spark_version.latest.id
+						node_type_id  = data.databricks_node_type.smallest.id
+					}
+				}
+
 				task {
 					task_key = "a"
 
@@ -147,17 +157,13 @@ func TestPreviewAccJobTasks(t *testing.T) {
 
 				task {
 					task_key = "c"
+					
+					job_cluster_key = "j"
 
 					depends_on {
 						task_key = "b"
 					}
 
-					new_cluster {
-						num_workers   = 20
-						spark_version = data.databricks_spark_version.latest.id
-						node_type_id  = data.databricks_node_type.smallest.id
-					}
-
 					notebook_task {
 						notebook_path = databricks_notebook.this.path
 					}
diff --git a/jobs/resource_job.go b/jobs/resource_job.go
@@ -81,6 +81,7 @@ type JobTaskSettings struct {
 
 	ExistingClusterID      string              `json:"existing_cluster_id,omitempty" tf:"group:cluster_type"`
 	NewCluster             *clusters.Cluster   `json:"new_cluster,omitempty" tf:"group:cluster_type"`
+	JobClusterKey          string              `json:"job_cluster_key,omitempty" tf:"group:cluster_type"`
 	Libraries              []libraries.Library `json:"libraries,omitempty" tf:"slice_set,alias:library"`
 	NotebookTask           *NotebookTask       `json:"notebook_task,omitempty" tf:"group:task_type"`
 	SparkJarTask           *SparkJarTask       `json:"spark_jar_task,omitempty" tf:"group:task_type"`
@@ -95,6 +96,11 @@ type JobTaskSettings struct {
 	RetryOnTimeout         bool                `json:"retry_on_timeout,omitempty" tf:"computed"`
 }
 
+type JobCluster struct {
+	JobClusterKey string            `json:"job_cluster_key,omitempty" tf:"group:cluster_type"`
+	NewCluster    *clusters.Cluster `json:"new_cluster,omitempty" tf:"group:cluster_type"`
+}
+
 // JobSettings contains the information for configuring a job on databricks
 type JobSettings struct {
 	Name string `json:"name,omitempty" tf:"default:Untitled"`
@@ -116,8 +122,9 @@ type JobSettings struct {
 	// END Jobs API 2.0
 
 	// BEGIN Jobs API 2.1
-	Tasks  []JobTaskSettings `json:"tasks,omitempty" tf:"alias:task"`
-	Format string            `json:"format,omitempty" tf:"computed"`
+	Tasks       []JobTaskSettings `json:"tasks,omitempty" tf:"alias:task"`
+	Format      string            `json:"format,omitempty" tf:"computed"`
+	JobClusters []JobCluster      `json:"job_clusters,omitempty" tf:"alias:job_cluster"`
 	// END Jobs API 2.1
 
 	Schedule           *CronSchedule       `json:"schedule,omitempty"`
diff --git a/jobs/resource_job_test.go b/jobs/resource_job_test.go
@@ -207,6 +207,119 @@ func TestResourceJobCreate_MultiTask(t *testing.T) {
 	assert.Equal(t, "789", d.Id())
 }
 
+func TestResourceJobCreate_JobClusters(t *testing.T) {
+	d, err := qa.ResourceFixture{
+		Fixtures: []qa.HTTPFixture{
+			{
+				Method:   "POST",
+				Resource: "/api/2.1/jobs/create",
+				ExpectedRequest: JobSettings{
+					Name: "JobClustered",
+					Tasks: []JobTaskSettings{
+						{
+							TaskKey:       "a",
+							JobClusterKey: "j",
+						},
+						{
+							TaskKey: "b",
+							NewCluster: &clusters.Cluster{
+								SparkVersion: "a",
+								NodeTypeID:   "b",
+								NumWorkers:   3,
+							},
+							NotebookTask: &NotebookTask{
+								NotebookPath: "/Stuff",
+							},
+						},
+					},
+					MaxConcurrentRuns: 1,
+					JobClusters: []JobCluster{
+						{
+							JobClusterKey: "j",
+							NewCluster: &clusters.Cluster{
+								SparkVersion: "b",
+								NodeTypeID:   "c",
+								NumWorkers:   7,
+							},
+						},
+						{
+							JobClusterKey: "k",
+							NewCluster: &clusters.Cluster{
+								SparkVersion: "x",
+								NodeTypeID:   "y",
+								NumWorkers:   9,
+							},
+						},
+					},
+				},
+				Response: Job{
+					JobID: 17,
+				},
+			},
+			{
+				Method:   "GET",
+				Resource: "/api/2.1/jobs/get?job_id=17",
+				Response: Job{
+					// good enough for mock
+					Settings: &JobSettings{
+						Tasks: []JobTaskSettings{
+							{
+								TaskKey: "b",
+							},
+							{
+								TaskKey: "a",
+							},
+						},
+					},
+				},
+			},
+		},
+		Create:   true,
+		Resource: ResourceJob(),
+		HCL: `
+		name = "JobClustered"
+
+		job_cluster {
+			job_cluster_key = "j"
+			new_cluster {
+			  num_workers   = 7
+			  spark_version = "b"
+			  node_type_id  = "c"
+			}
+		}
+		
+		job_cluster {
+			job_cluster_key = "k"
+			new_cluster {
+			  num_workers   = 9
+			  spark_version = "x"
+			  node_type_id  = "y"
+			}
+		}
+		
+		task {
+			task_key = "a"
+			job_cluster_key = "j"
+		}
+
+		task {
+			task_key = "b"
+
+			new_cluster {
+				spark_version = "a"
+				node_type_id = "b"
+				num_workers = 3
+			}
+
+			notebook_task {
+				notebook_path = "/Stuff"
+			}
+		}`,
+	}.Apply(t)
+	assert.NoError(t, err, err)
+	assert.Equal(t, "17", d.Id())
+}
+
 func TestResourceJobCreate_AlwaysRunning(t *testing.T) {
 	d, err := qa.ResourceFixture{
 		Fixtures: []qa.HTTPFixture{