Skip to content

Commit 0ff1735

Browse files
committed
FIX #606 by having all preview env tests runnable
1 parent b3d5ec8 commit 0ff1735

File tree

5 files changed

+127
-15
lines changed

5 files changed

+127
-15
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
| [databricks_notebook](docs/data-sources/notebook.md) data
4242
| [databricks_notebook_paths](docs/data-sources/notebook_paths.md) data
4343
| [databricks_permissions](docs/resources/permissions.md)
44+
| [databricks_pipeline](docs/resources/pipeline.md)
4445
| [databricks_secret](docs/resources/secret.md)
4546
| [databricks_secret_acl](docs/resources/secret_acl.md)
4647
| [databricks_secret_scope](docs/resources/secret_scope.md)

compute/acceptance/pipeline_test.go

Lines changed: 50 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,58 @@ func TestPreviewAccPipelineResource_CreatePipeline(t *testing.T) {
1313
locals {
1414
name = "pipeline-acceptance-{var.RANDOM}"
1515
}
16+
resource "databricks_notebook" "this" {
17+
content_base64 = base64encode(<<-EOT
18+
CREATE LIVE TABLE clickstream_raw AS
19+
SELECT * FROM json.` + "`/databricks-datasets/wikipedia-datasets/data-001/clickstream/raw-uncompressed-json/2015_2_clickstream.json`" + `
20+
21+
-- COMMAND ----------
22+
23+
CREATE LIVE TABLE clickstream_clean(
24+
CONSTRAINT valid_current_page EXPECT (current_page_id IS NOT NULL and current_page_title IS NOT NULL),
25+
CONSTRAINT valid_count EXPECT (click_count > 0) ON VIOLATION FAIL UPDATE
26+
) TBLPROPERTIES ("quality" = "silver")
27+
AS SELECT
28+
CAST (curr_id AS INT) AS current_page_id,
29+
curr_title AS current_page_title,
30+
CAST(n AS INT) AS click_count,
31+
CAST (prev_id AS INT) AS previous_page_id,
32+
prev_title AS previous_page_title
33+
FROM live.clickstream_raw
34+
35+
-- COMMAND ----------
36+
37+
CREATE LIVE TABLE top_spark_referers TBLPROPERTIES ("quality" = "gold")
38+
AS SELECT
39+
previous_page_title as referrer,
40+
click_count
41+
FROM live.clickstream_clean
42+
WHERE current_page_title = 'Apache_Spark'
43+
ORDER BY click_count DESC
44+
LIMIT 10
45+
EOT
46+
)
47+
path = "/Shared/${local.name}"
48+
language = "SQL"
49+
}
50+
1651
resource "databricks_pipeline" "this" {
17-
name = locals.name
18-
storage = "/test/${locals.name}"
52+
name = local.name
53+
storage = "/test/${local.name}"
54+
1955
configuration = {
2056
key1 = "value1"
2157
key2 = "value2"
2258
}
23-
clusters {
59+
60+
library {
61+
notebook {
62+
path = databricks_notebook.this.path
63+
}
64+
}
65+
66+
cluster {
67+
instance_pool_id = "{var.COMMON_INSTANCE_POOL_ID}"
2468
label = "default"
2569
num_workers = 2
2670
custom_tags = {
@@ -29,22 +73,19 @@ func TestPreviewAccPipelineResource_CreatePipeline(t *testing.T) {
2973
}
3074
3175
cluster {
76+
instance_pool_id = "{var.COMMON_INSTANCE_POOL_ID}"
3277
label = "maintenance"
3378
num_workers = 1
3479
custom_tags = {
35-
cluster_type = "maintenance
80+
cluster_type = "maintenance"
3681
}
3782
}
3883
39-
library {
40-
maven {
41-
coordinates = "com.microsoft.azure:azure-eventhubs-spark_2.11:2.3.7"
42-
}
43-
}
4484
filters {
4585
include = ["com.databricks.include"]
4686
exclude = ["com.databricks.exclude"]
4787
}
88+
4889
continuous = false
4990
}
5091
`,

compute/resource_pipeline.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ type pipelineSpec struct {
6060
Name string `json:"name,omitempty"`
6161
Storage string `json:"storage,omitempty"`
6262
Configuration map[string]string `json:"configuration,omitempty"`
63-
Clusters []pipelineCluster `json:"clusters,omitempty"`
64-
Libraries []pipelineLibrary `json:"libraries,omitempty"`
63+
Clusters []pipelineCluster `json:"clusters,omitempty" tf:"slice_set,alias:cluster"`
64+
Libraries []pipelineLibrary `json:"libraries,omitempty" tf:"slice_set,alias:library"`
6565
Filters *filters `json:"filters"`
6666
Continuous bool `json:"continuous,omitempty"`
6767
AllowDuplicateNames bool `json:"allow_duplicate_names,omitempty"`
@@ -122,6 +122,9 @@ func (a pipelinesAPI) create(s pipelineSpec, timeout time.Duration) (string, err
122122
return "", err
123123
}
124124
id := resp.PipelineID
125+
if !s.Continuous {
126+
return id, nil
127+
}
125128
err = a.waitForState(id, timeout, StateRunning)
126129
if err != nil {
127130
log.Printf("[INFO] Pipeline creation failed, attempting to clean up pipeline %s", id)
@@ -190,7 +193,7 @@ func (a pipelinesAPI) waitForState(id string, timeout time.Duration, desiredStat
190193
}
191194

192195
func adjustPipelineResourceSchema(m map[string]*schema.Schema) map[string]*schema.Schema {
193-
clusters, _ := m["clusters"].Elem.(*schema.Resource)
196+
clusters, _ := m["cluster"].Elem.(*schema.Resource)
194197
clustersSchema := clusters.Schema
195198
clustersSchema["spark_conf"].DiffSuppressFunc = sparkConfDiffSuppressFunc
196199

@@ -203,7 +206,7 @@ func adjustPipelineResourceSchema(m map[string]*schema.Schema) map[string]*schem
203206
delete(awsAttributesSchema, "ebs_volume_count")
204207
delete(awsAttributesSchema, "ebs_volume_size")
205208

206-
m["libraries"].MinItems = 1
209+
m["library"].MinItems = 1
207210

208211
return m
209212
}

docs/resources/pipeline.md

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
---
2+
subcategory: "Compute"
3+
---
4+
# databricks_pipeline Resource
5+
6+
Use `databricks_pipeline` to deploy [Delta Live Tables](https://docs.databricks.com/data-engineering/delta-live-tables/index.html).
7+
8+
## Example Usage
9+
10+
```hcl
11+
resource "databricks_pipeline" "this" {
12+
name = "Pipeline Name"
13+
storage = "/test/first-pipeline"
14+
configuration = {
15+
key1 = "value1"
16+
key2 = "value2"
17+
}
18+
19+
cluster {
20+
label = "default"
21+
num_workers = 2
22+
custom_tags = {
23+
cluster_type = "default"
24+
}
25+
}
26+
27+
cluster {
28+
label = "maintenance"
29+
num_workers = 1
30+
custom_tags = {
31+
cluster_type = "maintenance"
32+
}
33+
}
34+
35+
library {
36+
maven {
37+
coordinates = "com.microsoft.azure:azure-eventhubs-spark_2.11:2.3.7"
38+
}
39+
}
40+
41+
filters {
42+
include = ["com.databricks.include"]
43+
exclude = ["com.databricks.exclude"]
44+
}
45+
46+
continuous = false
47+
}
48+
```
49+
50+
## Argument Reference
51+
52+
The following arguments are required:
53+
54+
* `name` - A user-friendly name for this pipeline. The name can be used to identify pipeline jobs in the UI.
55+
* `storage` - A location on DBFS or cloud storage where output data and metadata required for pipeline execution are stored. By default, tables are stored in a subdirectory of this location.
56+
* `configuration` - An optional list of values to apply to the entire pipeline. Elements must be formatted as key:value pairs.
57+
* `library` block - An array of notebooks containing the pipeline code and required artifacts. Syntax resembles [library](cluster.md#library-configuration-block) configuration block with the addition of special `notebook` type of library.
58+
* `cluster` block - An array of specifications for the [clusters](cluster.md) to run the pipeline. If this is not specified, pipelines will automatically select a default cluster configuration for the pipeline.
59+
* `continuous` - A flag indicating whether to run the pipeline continuously. The default value is `false`.
60+
* `target` - The name of a database for persisting pipeline output data. Configuring the target setting allows you to view and query the pipeline output data from the Databricks UI.
61+
62+
## Import
63+
64+
The resource job can be imported using the id of the pipeline
65+
66+
```bash
67+
$ terraform import databricks_pipeline.this <pipeline-id>
68+
```
Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
1-
DATABRICKS_HOST
2-
DATABRICKS_TOKEN
1+
DATABRICKS_CONFIG_PROFILE
32
CLOUD_ENV

0 commit comments

Comments
 (0)