Support single node clusters (#375)

sabino · nfx · web-flow · commit c28b20928c76 · 2020-11-04T19:50:44.000+01:00
* Default value for num_workers
* fmt
* NumWorkers Test
* Missing stub
* Update documentation
* Update CHANGELOG.md

Co-authored-by: Serge Smertin &lt;259697+nfx@users.noreply.github.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@
 * Added [databricks_node_type](https://github.com/databrickslabs/terraform-provider-databricks/pull/376) data source for simpler selection of node types across AWS & Azure.
 * Added [Azure Key Vault support](https://github.com/databrickslabs/terraform-provider-databricks/pull/381) for databricks_secret_scope for Azure CLI authenticated users.
 * Added [is_pinned](https://github.com/databrickslabs/terraform-provider-databricks/pull/348) support for `databricks_cluster` resource.
+* Fixed [single node clusters](https://docs.databricks.com/clusters/single-node.html) support by allowing [`num_workers` to be `0`](https://github.com/databrickslabs/terraform-provider-databricks/pull/375).
 * Internal: API for retrieval of the cluster events.
 
 Updated dependency versions:
diff --git a/compute/model.go b/compute/model.go
@@ -228,7 +228,7 @@ type Cluster struct {
 	ClusterName string `json:"cluster_name,omitempty"`
 
 	SparkVersion              string     `json:"spark_version"` // TODO: perhaps make a default
-	NumWorkers                int32      `json:"num_workers,omitempty" tf:"group:size"`
+	NumWorkers                int32      `json:"num_workers" tf:"group:size"`
 	Autoscale                 *AutoScale `json:"autoscale,omitempty" tf:"group:size"`
 	EnableElasticDisk         bool       `json:"enable_elastic_disk,omitempty" tf:"computed"`
 	EnableLocalDiskEncryption bool       `json:"enable_local_disk_encryption,omitempty"`
@@ -260,7 +260,7 @@ type ClusterList struct {
 
 // ClusterInfo contains the information when getting cluster info from the get request.
 type ClusterInfo struct {
-	NumWorkers                int32              `json:"num_workers,omitempty"`
+	NumWorkers                int32              `json:"num_workers"`
 	AutoScale                 *AutoScale         `json:"autoscale,omitempty"`
 	ClusterID                 string             `json:"cluster_id,omitempty"`
 	CreatorUserName           string             `json:"creator_user_name,omitempty"`
diff --git a/compute/resource_cluster.go b/compute/resource_cluster.go
@@ -101,6 +101,11 @@ func resourceClusterSchema() map[string]*schema.Schema {
 			p.Sensitive = true
 		}
 		s["autotermination_minutes"].Default = 60
+		s["num_workers"] = &schema.Schema{
+			Type:     schema.TypeInt,
+			Optional: true,
+			Default:  0,
+		}
 		s["idempotency_token"].ForceNew = true
 		s["cluster_id"] = &schema.Schema{
 			Type:     schema.TypeString,
diff --git a/compute/resource_cluster_test.go b/compute/resource_cluster_test.go
@@ -495,6 +495,73 @@ func TestResourceClusterCreate_WithLibraries(t *testing.T) {
 	assert.Equal(t, "abc", d.Id())
 }
 
+func TestResourceClusterCreate_WithoutNumWorkers(t *testing.T) {
+	d, err := qa.ResourceFixture{
+		Fixtures: []qa.HTTPFixture{
+			{
+				Method:   "POST",
+				Resource: "/api/2.0/clusters/create",
+				ExpectedRequest: Cluster{
+					NumWorkers:             0,
+					ClusterName:            "Single Node Cluster",
+					SparkVersion:           "7.1-scala12",
+					NodeTypeID:             "dev-tier-node",
+					AutoterminationMinutes: 120,
+				},
+				Response: ClusterInfo{
+					ClusterID: "abc",
+					State:     ClusterStateRunning,
+				},
+			},
+			{
+				Method:   "POST",
+				Resource: "/api/2.0/clusters/events",
+				ExpectedRequest: EventsRequest{
+					ClusterID:  "abc",
+					Limit:      1,
+					Order:      SortDescending,
+					EventTypes: []ClusterEventType{EvTypePinned, EvTypeUnpinned},
+				},
+				Response: EventsResponse{
+					Events:     []ClusterEvent{},
+					TotalCount: 0,
+				},
+			},
+			{
+				Method:       "GET",
+				ReuseRequest: true,
+				Resource:     "/api/2.0/clusters/get?cluster_id=abc",
+				Response: ClusterInfo{
+					ClusterID:              "abc",
+					ClusterName:            "Single Node Cluster",
+					SparkVersion:           "7.1-scala12",
+					NodeTypeID:             "dev-tier-node",
+					AutoterminationMinutes: 120,
+					State:                  ClusterStateRunning,
+				},
+			},
+			{
+				Method:   "GET",
+				Resource: "/api/2.0/libraries/cluster-status?cluster_id=abc",
+				Response: ClusterLibraryStatuses{
+					LibraryStatuses: []LibraryStatus{},
+				},
+			},
+		},
+		Create:   true,
+		Resource: ResourceCluster(),
+		State: map[string]interface{}{
+			"autotermination_minutes": 120,
+			"cluster_name":            "Single Node Cluster",
+			"spark_version":           "7.1-scala12",
+			"node_type_id":            "dev-tier-node",
+			"is_pinned":               false,
+		},
+	}.Apply(t)
+	assert.NoError(t, err, err)
+	assert.Equal(t, 0, d.Get("num_workers"))
+}
+
 func TestResourceClusterCreate_Error(t *testing.T) {
 	d, err := qa.ResourceFixture{
 		Fixtures: []qa.HTTPFixture{
diff --git a/docs/resources/cluster.md b/docs/resources/cluster.md
@@ -67,7 +67,9 @@ resource "databricks_cluster" "shared_autoscaling" {
 
 When you [create a Databricks cluster](https://docs.databricks.com/clusters/configure.html#cluster-size-and-autoscaling), you can either provide a `num_workers` for the fixed size cluster or provide `min_workers` and/or `max_workers` for the cluster withing `autoscale` group. When you provide a fixed size cluster, Databricks ensures that your cluster has the specified number of workers. When you provide a range for the number of workers, Databricks chooses the appropriate number of workers required to run your job. This is referred to as autoscaling. With autoscaling, Databricks dynamically reallocates workers to account for the characteristics of your job. Certain parts of your pipeline may be more computationally demanding than others, and Databricks automatically adds additional workers during these phases of your job (and removes them when they’re no longer needed). It is advised to keep all common configurations in [Cluster Policies](cluster_policy.md) to maintain control of the environments launched.
 
-* `num_workers` - (Optional) Number of worker nodes that this cluster should have. A cluster has one Spark Driver and num_workers Executors for a total of num_workers + 1 Spark node.
+When using a [Single Node cluster](https://docs.databricks.com/clusters/single-node.html), `num_workers` needs to be `0`. In this case you can choose to explicitly add the argument or not, as it defaults to `0`.
+
+* `num_workers` - (Optional) Number of worker nodes that this cluster should have. A cluster has one Spark Driver and num_workers Executors for a total of num_workers + 1 Spark node. Set to `0` when not provided.
 
 `autoscale` optional configuration block supports the following: