FIX #606 by having all preview env tests runnable

nfx · nfx · commit 0ff1735cd9f0 · 2021-06-01T11:18:17.000+02:00
diff --git a/README.md b/README.md
@@ -41,6 +41,7 @@
 | [databricks_notebook](docs/data-sources/notebook.md) data
 | [databricks_notebook_paths](docs/data-sources/notebook_paths.md) data
 | [databricks_permissions](docs/resources/permissions.md)
+| [databricks_pipeline](docs/resources/pipeline.md)
 | [databricks_secret](docs/resources/secret.md)
 | [databricks_secret_acl](docs/resources/secret_acl.md)
 | [databricks_secret_scope](docs/resources/secret_scope.md)
diff --git a/compute/acceptance/pipeline_test.go b/compute/acceptance/pipeline_test.go
@@ -13,14 +13,58 @@ func TestPreviewAccPipelineResource_CreatePipeline(t *testing.T) {
 			locals {
 				name = "pipeline-acceptance-{var.RANDOM}"
 			}
+			resource "databricks_notebook" "this" {
+				content_base64 = base64encode(<<-EOT
+					CREATE LIVE TABLE clickstream_raw AS 
+					SELECT * FROM json.` + "`/databricks-datasets/wikipedia-datasets/data-001/clickstream/raw-uncompressed-json/2015_2_clickstream.json`" + `
+					
+					-- COMMAND ----------
+					
+					CREATE LIVE TABLE clickstream_clean(
+					  CONSTRAINT valid_current_page EXPECT (current_page_id IS NOT NULL and current_page_title IS NOT NULL),
+					  CONSTRAINT valid_count EXPECT (click_count > 0) ON VIOLATION FAIL UPDATE
+					) TBLPROPERTIES ("quality" = "silver")
+					AS SELECT
+					  CAST (curr_id AS INT) AS current_page_id,
+					  curr_title AS current_page_title,
+					  CAST(n AS INT) AS click_count,
+					  CAST (prev_id AS INT) AS previous_page_id,
+					  prev_title AS previous_page_title
+					FROM live.clickstream_raw
+					
+					-- COMMAND ----------
+					
+					CREATE LIVE TABLE top_spark_referers TBLPROPERTIES ("quality" = "gold")
+					AS SELECT
+					  previous_page_title as referrer,
+					  click_count
+					FROM live.clickstream_clean
+					WHERE current_page_title = 'Apache_Spark'
+					ORDER BY click_count DESC
+					LIMIT 10					
+				  EOT
+				)
+				path = "/Shared/${local.name}"
+				language = "SQL"
+			}
+
 			resource "databricks_pipeline" "this" {
-				name = locals.name
-				storage = "/test/${locals.name}"
+				name = local.name
+				storage = "/test/${local.name}"
+				
 				configuration = {
 					key1 = "value1"
 					key2 = "value2"
 				}
-				clusters {
+
+				library {
+					notebook {
+						path = databricks_notebook.this.path
+					}
+				}
+
+				cluster {
+					instance_pool_id = "{var.COMMON_INSTANCE_POOL_ID}"
 					label = "default"
 					num_workers = 2
 					custom_tags = {
@@ -29,22 +73,19 @@ func TestPreviewAccPipelineResource_CreatePipeline(t *testing.T) {
 				}
 
 				cluster {
+					instance_pool_id = "{var.COMMON_INSTANCE_POOL_ID}"
 					label = "maintenance"
 					num_workers = 1
 					custom_tags = {
-						cluster_type = "maintenance
+						cluster_type = "maintenance"
 					}
 				}
 
-				library {
-					maven {
-						coordinates = "com.microsoft.azure:azure-eventhubs-spark_2.11:2.3.7"
-					}
-				}
 				filters {
 					include = ["com.databricks.include"]
 					exclude = ["com.databricks.exclude"]
 				}
+
 				continuous = false
 			}
 			`,
diff --git a/compute/resource_pipeline.go b/compute/resource_pipeline.go
@@ -60,8 +60,8 @@ type pipelineSpec struct {
 	Name                string            `json:"name,omitempty"`
 	Storage             string            `json:"storage,omitempty"`
 	Configuration       map[string]string `json:"configuration,omitempty"`
-	Clusters            []pipelineCluster `json:"clusters,omitempty"`
-	Libraries           []pipelineLibrary `json:"libraries,omitempty"`
+	Clusters            []pipelineCluster `json:"clusters,omitempty" tf:"slice_set,alias:cluster"`
+	Libraries           []pipelineLibrary `json:"libraries,omitempty" tf:"slice_set,alias:library"`
 	Filters             *filters          `json:"filters"`
 	Continuous          bool              `json:"continuous,omitempty"`
 	AllowDuplicateNames bool              `json:"allow_duplicate_names,omitempty"`
@@ -122,6 +122,9 @@ func (a pipelinesAPI) create(s pipelineSpec, timeout time.Duration) (string, err
 		return "", err
 	}
 	id := resp.PipelineID
+	if !s.Continuous {
+		return id, nil
+	}
 	err = a.waitForState(id, timeout, StateRunning)
 	if err != nil {
 		log.Printf("[INFO] Pipeline creation failed, attempting to clean up pipeline %s", id)
@@ -190,7 +193,7 @@ func (a pipelinesAPI) waitForState(id string, timeout time.Duration, desiredStat
 }
 
 func adjustPipelineResourceSchema(m map[string]*schema.Schema) map[string]*schema.Schema {
-	clusters, _ := m["clusters"].Elem.(*schema.Resource)
+	clusters, _ := m["cluster"].Elem.(*schema.Resource)
 	clustersSchema := clusters.Schema
 	clustersSchema["spark_conf"].DiffSuppressFunc = sparkConfDiffSuppressFunc
 
@@ -203,7 +206,7 @@ func adjustPipelineResourceSchema(m map[string]*schema.Schema) map[string]*schem
 	delete(awsAttributesSchema, "ebs_volume_count")
 	delete(awsAttributesSchema, "ebs_volume_size")
 
-	m["libraries"].MinItems = 1
+	m["library"].MinItems = 1
 
 	return m
 }
diff --git a/docs/resources/pipeline.md b/docs/resources/pipeline.md
@@ -0,0 +1,68 @@
+---
+subcategory: "Compute"
+---
+# databricks_pipeline Resource
+
+Use `databricks_pipeline` to deploy [Delta Live Tables](https://docs.databricks.com/data-engineering/delta-live-tables/index.html). 
+
+## Example Usage
+
+```hcl
+resource "databricks_pipeline" "this" {
+    name = "Pipeline Name"
+    storage = "/test/first-pipeline"
+    configuration = {
+        key1 = "value1"
+        key2 = "value2"
+    }
+
+    cluster {
+        label = "default"
+        num_workers = 2
+        custom_tags = {
+            cluster_type = "default"
+        }
+    }
+
+    cluster {
+        label = "maintenance"
+        num_workers = 1
+        custom_tags = {
+            cluster_type = "maintenance"
+        }
+    }
+
+    library {
+        maven {
+            coordinates = "com.microsoft.azure:azure-eventhubs-spark_2.11:2.3.7"
+        }
+    }
+
+    filters {
+        include = ["com.databricks.include"]
+        exclude = ["com.databricks.exclude"]
+    }
+
+    continuous = false
+}
+```
+
+## Argument Reference
+
+The following arguments are required:
+
+* `name` - A user-friendly name for this pipeline. The name can be used to identify pipeline jobs in the UI.
+* `storage` - A location on DBFS or cloud storage where output data and metadata required for pipeline execution are stored. By default, tables are stored in a subdirectory of this location.
+* `configuration` - An optional list of values to apply to the entire pipeline. Elements must be formatted as key:value pairs.
+* `library` block - An array of notebooks containing the pipeline code and required artifacts. Syntax resembles [library](cluster.md#library-configuration-block) configuration block with the addition of special `notebook` type of library.
+* `cluster` block - An array of specifications for the [clusters](cluster.md) to run the pipeline. If this is not specified, pipelines will automatically select a default cluster configuration for the pipeline.
+* `continuous` - A flag indicating whether to run the pipeline continuously. The default value is `false`.
+* `target` - The name of a database for persisting pipeline output data. Configuring the target setting allows you to view and query the pipeline output data from the Databricks UI.
+
+## Import
+
+The resource job can be imported using the id of the pipeline
+
+```bash
+$ terraform import databricks_pipeline.this <pipeline-id>
+```
diff --git a/scripts/preview-integration/require_env b/scripts/preview-integration/require_env
@@ -1,3 +1,2 @@
-DATABRICKS_HOST
-DATABRICKS_TOKEN
+DATABRICKS_CONFIG_PROFILE
 CLOUD_ENV