Fixed databricks_pipeline incorrect generation of cluster blocks (#1416)

alexott · web-flow · commit da5b37fed418 · 2022-07-21T18:38:01.000+02:00
Fix incorrect generation of cluster blocks for DLT pipelines Nested `suppress_diff` doesn't play well with the `slice_set`... this fixes #1401
diff --git a/internal/acceptance/acceptance.go b/internal/acceptance/acceptance.go
@@ -45,7 +45,7 @@ func Test(t *testing.T, steps []Step, otherVars ...map[string]string) {
 		t.Skip(err.Error())
 	}
 	awsAttrs := ""
-	if cloudEnv == "AWS" {
+	if cloudEnv == "aws" {
 		awsAttrs = "aws_attributes {}"
 	}
 	instancePoolID := ""
diff --git a/pipelines/acceptance/pipeline_test.go b/pipelines/acceptance/pipeline_test.go
@@ -1,53 +1,58 @@
 package acceptance
 
 import (
+	"os"
 	"testing"
 
 	"github.com/databricks/terraform-provider-databricks/internal/acceptance"
 )
 
+var (
+	dltNotebookResource = `
+	resource "databricks_notebook" "this" {
+		content_base64 = base64encode(<<-EOT
+			CREATE LIVE TABLE clickstream_raw AS 
+			SELECT * FROM json.` + "`/databricks-datasets/wikipedia-datasets/data-001/clickstream/raw-uncompressed-json/2015_2_clickstream.json`" + `
+			
+			-- COMMAND ----------
+			
+			CREATE LIVE TABLE clickstream_clean(
+			  CONSTRAINT valid_current_page EXPECT (current_page_id IS NOT NULL and current_page_title IS NOT NULL),
+			  CONSTRAINT valid_count EXPECT (click_count > 0) ON VIOLATION FAIL UPDATE
+			) TBLPROPERTIES ("quality" = "silver")
+			AS SELECT
+			  CAST (curr_id AS INT) AS current_page_id,
+			  curr_title AS current_page_title,
+			  CAST(n AS INT) AS click_count,
+			  CAST (prev_id AS INT) AS previous_page_id,
+			  prev_title AS previous_page_title
+			FROM live.clickstream_raw
+			
+			-- COMMAND ----------
+			
+			CREATE LIVE TABLE top_spark_referers TBLPROPERTIES ("quality" = "gold")
+			AS SELECT
+			  previous_page_title as referrer,
+			  click_count
+			FROM live.clickstream_clean
+			WHERE current_page_title = 'Apache_Spark'
+			ORDER BY click_count DESC
+			LIMIT 10					
+		  EOT
+		)
+		path = "/Shared/${local.name}"
+		language = "SQL"
+	}
+`
+)
+
 func TestAccPipelineResource_CreatePipeline(t *testing.T) {
 	acceptance.Test(t, []acceptance.Step{
 		{
 			Template: `
 			locals {
 				name = "pipeline-acceptance-{var.RANDOM}"
 			}
-			resource "databricks_notebook" "this" {
-				content_base64 = base64encode(<<-EOT
-					CREATE LIVE TABLE clickstream_raw AS 
-					SELECT * FROM json.` + "`/databricks-datasets/wikipedia-datasets/data-001/clickstream/raw-uncompressed-json/2015_2_clickstream.json`" + `
-					
-					-- COMMAND ----------
-					
-					CREATE LIVE TABLE clickstream_clean(
-					  CONSTRAINT valid_current_page EXPECT (current_page_id IS NOT NULL and current_page_title IS NOT NULL),
-					  CONSTRAINT valid_count EXPECT (click_count > 0) ON VIOLATION FAIL UPDATE
-					) TBLPROPERTIES ("quality" = "silver")
-					AS SELECT
-					  CAST (curr_id AS INT) AS current_page_id,
-					  curr_title AS current_page_title,
-					  CAST(n AS INT) AS click_count,
-					  CAST (prev_id AS INT) AS previous_page_id,
-					  prev_title AS previous_page_title
-					FROM live.clickstream_raw
-					
-					-- COMMAND ----------
-					
-					CREATE LIVE TABLE top_spark_referers TBLPROPERTIES ("quality" = "gold")
-					AS SELECT
-					  previous_page_title as referrer,
-					  click_count
-					FROM live.clickstream_clean
-					WHERE current_page_title = 'Apache_Spark'
-					ORDER BY click_count DESC
-					LIMIT 10					
-				  EOT
-				)
-				path = "/Shared/${local.name}"
-				language = "SQL"
-			}
-
 			resource "databricks_pipeline" "this" {
 				name = local.name
 				storage = "/test/${local.name}"
@@ -81,14 +86,99 @@ func TestAccPipelineResource_CreatePipeline(t *testing.T) {
 					}
 				}
 
-				filters {
-					include = ["com.databricks.include"]
-					exclude = ["com.databricks.exclude"]
+				continuous = false
+			}
+			` + dltNotebookResource,
+		},
+	})
+}
+
+func TestAccAwsPipelineResource_CreatePipeline(t *testing.T) {
+	if cloud, ok := os.LookupEnv("CLOUD_ENV"); !ok || cloud != "aws" {
+		t.Skip("Test is only for CLOUD_ENV=AWS")
+	}
+	acceptance.Test(t, []acceptance.Step{
+		{
+			Template: `
+			locals {
+				name = "pipeline-acceptance-aws-{var.RANDOM}"
+			}
+			resource "databricks_pipeline" "this" {
+				name = local.name
+				storage = "/test/${local.name}"		
+				configuration = {
+					key1 = "value1"
+					key2 = "value2"
+				}
+				library {
+					notebook {
+						path = databricks_notebook.this.path
+					}
 				}
 
+				cluster {
+					instance_pool_id = "{var.COMMON_INSTANCE_POOL_ID}"
+					label = "default"
+					num_workers = 2
+					custom_tags = {
+						cluster_type = "default"
+					}
+					aws_attributes {
+						first_on_demand = 1
+					}
+				}
+				cluster {
+					instance_pool_id = "{var.COMMON_INSTANCE_POOL_ID}"
+					label = "maintenance"
+					num_workers = 1
+					custom_tags = {
+						cluster_type = "maintenance"
+					}
+				}
+				continuous = false
+			}
+			` + dltNotebookResource,
+		},
+		{
+			Template: `
+			locals {
+				name = "pipeline-acceptance-aws-{var.RANDOM}"
+			}
+			resource "databricks_pipeline" "this" {
+				name = local.name
+				storage = "/test/${local.name}"		
+				configuration = {
+					key1 = "value1"
+					key2 = "value2"
+				}
+				library {
+					notebook {
+						path = databricks_notebook.this.path
+					}
+				}
+
+				cluster {
+					instance_pool_id = "{var.COMMON_INSTANCE_POOL_ID}"
+					label = "default"
+					num_workers = 3
+					custom_tags = {
+						cluster_type = "default"
+					}
+					aws_attributes {
+						first_on_demand = 2
+					}
+				}
+				cluster {
+					instance_pool_id = "{var.COMMON_INSTANCE_POOL_ID}"
+					label = "maintenance"
+					num_workers = 1
+					custom_tags = {
+						cluster_type = "maintenance"
+					}
+				}
 				continuous = false
 			}
-			`,
+			` + dltNotebookResource,
 		},
 	})
 }
diff --git a/pipelines/resource_pipeline.go b/pipelines/resource_pipeline.go
@@ -32,8 +32,8 @@ type pipelineCluster struct {
 	DriverNodeTypeID     string                  `json:"driver_node_type_id,omitempty" tf:"computed"`
 	InstancePoolID       string                  `json:"instance_pool_id,omitempty" tf:"group:node_type"`
 	DriverInstancePoolID string                  `json:"driver_instance_pool_id,omitempty"`
-	AwsAttributes        *clusters.AwsAttributes `json:"aws_attributes,omitempty" tf:"suppress_diff"`
-	GcpAttributes        *clusters.GcpAttributes `json:"gcp_attributes,omitempty" tf:"suppress_diff"`
+	AwsAttributes        *clusters.AwsAttributes `json:"aws_attributes,omitempty"`
+	GcpAttributes        *clusters.GcpAttributes `json:"gcp_attributes,omitempty"`
 
 	SparkConf    map[string]string `json:"spark_conf,omitempty"`
 	SparkEnvVars map[string]string `json:"spark_env_vars,omitempty"`
diff --git a/scim/acceptance/service_principal_test.go b/scim/acceptance/service_principal_test.go
@@ -23,8 +23,8 @@ func TestAccServicePrincipalResourceOnAzure(t *testing.T) {
 }
 
 func TestAccServicePrincipalResourceOnAws(t *testing.T) {
-	if cloud, ok := os.LookupEnv("CLOUD_ENV"); !ok || cloud != "AWS" {
-		t.Skip("Test is only for CLOUD_ENV=AWS")
+	if cloud, ok := os.LookupEnv("CLOUD_ENV"); !ok || cloud != "aws" {
+		t.Skip("Test is only for CLOUD_ENV=aws")
 	}
 	t.Parallel()
 	acceptance.Test(t, []acceptance.Step{
diff --git a/scripts/README.md b/scripts/README.md
@@ -12,9 +12,9 @@ By default, we don't encourage creation/destruction of infrastructure multiple t
 * `azsp` - Azure authenticated with Service Principal's ID/Secret pairs. Runnable test name prefixes are `TestAcc` and `TestAzureAcc`. Service pricipal must have `Storage Blob Data Contributor` role on ADLS account used. `ARM_SUBSCRIPTION_ID`, `ARM_CLIENT_SECRET`, `ARM_CLIENT_ID`, `ARM_TENANT_ID`, `OWNER` environment vars required. Note that these integration tests will use service principal based auth. Even though it is using a service principal, it will still be generating a personal access token to perform creation of resources.
 
 * `mws` - AWS with Databricks Multiworkspace API. Runnable test name prefix is `TestMws`. Please [check if you're able to use it](https://docs.databricks.com/administration-guide/multiworkspace/new-workspace-aws.html). Required variables are `DATABRICKS_ACCOUNT_ID`, `DATABRICKS_USERNAME`, `DATABRICKS_PASSWORD` (something you use for https://accounts.cloud.databricks.com/), `AWS_REGION`, `TEST_CIDR`, `OWNER`. Only multiworkspace resources are tested.
-* `awsst` - `DATABRICKS_CONFIG_PROFILE` (section within Databricks CLI `~/.databrickscfg` file) & `CLOUD_ENV=AWS`. In case you want to test provider on existing development single-tenant shard. Runnable test name prefixes are `TestAcc` and `TestAwsAcc`.
+* `awsst` - `DATABRICKS_CONFIG_PROFILE` (section within Databricks CLI `~/.databrickscfg` file) & `CLOUD_ENV=aws`. In case you want to test provider on existing development single-tenant shard. Runnable test name prefixes are `TestAcc` and `TestAwsAcc`.
 * `awsmt` - AWS with Databricks Multitenant Workspace. Currently work in progress and the test environment cannot be fully started.
-* most of the tests should aim to be cloud-agnostic. Though, in case of specific branching needed, you can check `CLOUD_ENV` value (possible values are `Azure`, `AWS` & `MWS`).
+* most of the tests should aim to be cloud-agnostic. Though, in case of specific branching needed, you can check `CLOUD_ENV` value (possible values are `Azure`, `aws` & `MWS`).
 * all environment variables are used by *DatabricksClient*, *provider integration tests* and *terraform configuration*.
 * **each `output` becomes an environment variable** with the case changed to upper. This gives an easy way to manage the complexity of the testing environment. This is what gives those variables for `export $(scripts/run.sh azcli --export)` under the hood.
 * `qa.EnvironmentTemplate` must be used to make readable templates with environment variable presence validation.
@@ -94,7 +94,7 @@ func TestAccListClustersIntegration(t *testing.T) {
 		AutoterminationMinutes: 15,
 	}
 
-	if cloud == "AWS" {
+	if cloud == "aws" {
 		cluster.AwsAttributes = &AwsAttributes{
 			EbsVolumeType:  EbsVolumeTypeGeneralPurposeSsd,
 			EbsVolumeCount: 1,
diff --git a/scripts/nightly/awsit.tf b/scripts/nightly/awsit.tf
@@ -261,7 +261,7 @@ resource "azurerm_container_group" "aws" {
     cpu    = "2"
     memory = "2"
     environment_variables = {
-      CLOUD_ENV                 = "AWS"
+      CLOUD_ENV                 = "aws"
       TEST_FILTER               = "TestAcc"
       DATABRICKS_HOST           = databricks_mws_workspaces.this.workspace_url
       TEST_S3_BUCKET            = aws_s3_bucket.ds.bucket
@@ -290,4 +290,4 @@ output "aws_workspace_id" {
 output "aws_workspace_pat" {
   value = databricks_mws_workspaces.this.token[0].token_value
   sensitive = true
-}
+}

Original file line number	Diff line number	Diff line change
`@@ -45,7 +45,7 @@ func Test(t *testing.T, steps []Step, otherVars ...map[string]string) {`
`45`	`45`	`t.Skip(err.Error())`
`46`	`46`	`}`
`47`	`47`	`awsAttrs := ""`
`48`		`- if cloudEnv == "AWS" {`
	`48`	`+ if cloudEnv == "aws" {`
`49`	`49`	`awsAttrs = "aws_attributes {}"`
`50`	`50`	`}`
`51`	`51`	`instancePoolID := ""`
Original file line number	Diff line number	Diff line change
`@@ -23,8 +23,8 @@ func TestAccServicePrincipalResourceOnAzure(t *testing.T) {`
`23`	`23`	`}`
`24`	`24`
`25`	`25`	`func TestAccServicePrincipalResourceOnAws(t *testing.T) {`
`26`		`- if cloud, ok := os.LookupEnv("CLOUD_ENV"); !ok \|\| cloud != "AWS" {`
`27`		`- t.Skip("Test is only for CLOUD_ENV=AWS")`
	`26`	`+ if cloud, ok := os.LookupEnv("CLOUD_ENV"); !ok \|\| cloud != "aws" {`
	`27`	`+ t.Skip("Test is only for CLOUD_ENV=aws")`
`28`	`28`	`}`
`29`	`29`	`t.Parallel()`
`30`	`30`	`acceptance.Test(t, []acceptance.Step{`