Skip to content

Commit da5b37f

Browse files
authored
Fixed databricks_pipeline incorrect generation of cluster blocks (#1416)
Fix incorrect generation of cluster blocks for DLT pipelines Nested `suppress_diff` doesn't play well with the `slice_set`... this fixes #1401
1 parent baa5413 commit da5b37f

File tree

6 files changed

+139
-49
lines changed

6 files changed

+139
-49
lines changed

internal/acceptance/acceptance.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ func Test(t *testing.T, steps []Step, otherVars ...map[string]string) {
4545
t.Skip(err.Error())
4646
}
4747
awsAttrs := ""
48-
if cloudEnv == "AWS" {
48+
if cloudEnv == "aws" {
4949
awsAttrs = "aws_attributes {}"
5050
}
5151
instancePoolID := ""
Lines changed: 129 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,58 @@
11
package acceptance
22

33
import (
4+
"os"
45
"testing"
56

67
"github.com/databricks/terraform-provider-databricks/internal/acceptance"
78
)
89

10+
var (
11+
dltNotebookResource = `
12+
resource "databricks_notebook" "this" {
13+
content_base64 = base64encode(<<-EOT
14+
CREATE LIVE TABLE clickstream_raw AS
15+
SELECT * FROM json.` + "`/databricks-datasets/wikipedia-datasets/data-001/clickstream/raw-uncompressed-json/2015_2_clickstream.json`" + `
16+
17+
-- COMMAND ----------
18+
19+
CREATE LIVE TABLE clickstream_clean(
20+
CONSTRAINT valid_current_page EXPECT (current_page_id IS NOT NULL and current_page_title IS NOT NULL),
21+
CONSTRAINT valid_count EXPECT (click_count > 0) ON VIOLATION FAIL UPDATE
22+
) TBLPROPERTIES ("quality" = "silver")
23+
AS SELECT
24+
CAST (curr_id AS INT) AS current_page_id,
25+
curr_title AS current_page_title,
26+
CAST(n AS INT) AS click_count,
27+
CAST (prev_id AS INT) AS previous_page_id,
28+
prev_title AS previous_page_title
29+
FROM live.clickstream_raw
30+
31+
-- COMMAND ----------
32+
33+
CREATE LIVE TABLE top_spark_referers TBLPROPERTIES ("quality" = "gold")
34+
AS SELECT
35+
previous_page_title as referrer,
36+
click_count
37+
FROM live.clickstream_clean
38+
WHERE current_page_title = 'Apache_Spark'
39+
ORDER BY click_count DESC
40+
LIMIT 10
41+
EOT
42+
)
43+
path = "/Shared/${local.name}"
44+
language = "SQL"
45+
}
46+
`
47+
)
48+
949
func TestAccPipelineResource_CreatePipeline(t *testing.T) {
1050
acceptance.Test(t, []acceptance.Step{
1151
{
1252
Template: `
1353
locals {
1454
name = "pipeline-acceptance-{var.RANDOM}"
1555
}
16-
resource "databricks_notebook" "this" {
17-
content_base64 = base64encode(<<-EOT
18-
CREATE LIVE TABLE clickstream_raw AS
19-
SELECT * FROM json.` + "`/databricks-datasets/wikipedia-datasets/data-001/clickstream/raw-uncompressed-json/2015_2_clickstream.json`" + `
20-
21-
-- COMMAND ----------
22-
23-
CREATE LIVE TABLE clickstream_clean(
24-
CONSTRAINT valid_current_page EXPECT (current_page_id IS NOT NULL and current_page_title IS NOT NULL),
25-
CONSTRAINT valid_count EXPECT (click_count > 0) ON VIOLATION FAIL UPDATE
26-
) TBLPROPERTIES ("quality" = "silver")
27-
AS SELECT
28-
CAST (curr_id AS INT) AS current_page_id,
29-
curr_title AS current_page_title,
30-
CAST(n AS INT) AS click_count,
31-
CAST (prev_id AS INT) AS previous_page_id,
32-
prev_title AS previous_page_title
33-
FROM live.clickstream_raw
34-
35-
-- COMMAND ----------
36-
37-
CREATE LIVE TABLE top_spark_referers TBLPROPERTIES ("quality" = "gold")
38-
AS SELECT
39-
previous_page_title as referrer,
40-
click_count
41-
FROM live.clickstream_clean
42-
WHERE current_page_title = 'Apache_Spark'
43-
ORDER BY click_count DESC
44-
LIMIT 10
45-
EOT
46-
)
47-
path = "/Shared/${local.name}"
48-
language = "SQL"
49-
}
50-
5156
resource "databricks_pipeline" "this" {
5257
name = local.name
5358
storage = "/test/${local.name}"
@@ -81,14 +86,99 @@ func TestAccPipelineResource_CreatePipeline(t *testing.T) {
8186
}
8287
}
8388
84-
filters {
85-
include = ["com.databricks.include"]
86-
exclude = ["com.databricks.exclude"]
89+
continuous = false
90+
}
91+
` + dltNotebookResource,
92+
},
93+
})
94+
}
95+
96+
func TestAccAwsPipelineResource_CreatePipeline(t *testing.T) {
97+
if cloud, ok := os.LookupEnv("CLOUD_ENV"); !ok || cloud != "aws" {
98+
t.Skip("Test is only for CLOUD_ENV=AWS")
99+
}
100+
acceptance.Test(t, []acceptance.Step{
101+
{
102+
Template: `
103+
locals {
104+
name = "pipeline-acceptance-aws-{var.RANDOM}"
105+
}
106+
resource "databricks_pipeline" "this" {
107+
name = local.name
108+
storage = "/test/${local.name}"
109+
configuration = {
110+
key1 = "value1"
111+
key2 = "value2"
112+
}
113+
library {
114+
notebook {
115+
path = databricks_notebook.this.path
116+
}
87117
}
88118
119+
cluster {
120+
instance_pool_id = "{var.COMMON_INSTANCE_POOL_ID}"
121+
label = "default"
122+
num_workers = 2
123+
custom_tags = {
124+
cluster_type = "default"
125+
}
126+
aws_attributes {
127+
first_on_demand = 1
128+
}
129+
}
130+
cluster {
131+
instance_pool_id = "{var.COMMON_INSTANCE_POOL_ID}"
132+
label = "maintenance"
133+
num_workers = 1
134+
custom_tags = {
135+
cluster_type = "maintenance"
136+
}
137+
}
138+
continuous = false
139+
}
140+
` + dltNotebookResource,
141+
},
142+
{
143+
Template: `
144+
locals {
145+
name = "pipeline-acceptance-aws-{var.RANDOM}"
146+
}
147+
resource "databricks_pipeline" "this" {
148+
name = local.name
149+
storage = "/test/${local.name}"
150+
configuration = {
151+
key1 = "value1"
152+
key2 = "value2"
153+
}
154+
library {
155+
notebook {
156+
path = databricks_notebook.this.path
157+
}
158+
}
159+
160+
cluster {
161+
instance_pool_id = "{var.COMMON_INSTANCE_POOL_ID}"
162+
label = "default"
163+
num_workers = 3
164+
custom_tags = {
165+
cluster_type = "default"
166+
}
167+
aws_attributes {
168+
first_on_demand = 2
169+
}
170+
}
171+
cluster {
172+
instance_pool_id = "{var.COMMON_INSTANCE_POOL_ID}"
173+
label = "maintenance"
174+
num_workers = 1
175+
custom_tags = {
176+
cluster_type = "maintenance"
177+
}
178+
}
89179
continuous = false
90180
}
91-
`,
181+
` + dltNotebookResource,
92182
},
93183
})
94184
}

pipelines/resource_pipeline.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ type pipelineCluster struct {
3232
DriverNodeTypeID string `json:"driver_node_type_id,omitempty" tf:"computed"`
3333
InstancePoolID string `json:"instance_pool_id,omitempty" tf:"group:node_type"`
3434
DriverInstancePoolID string `json:"driver_instance_pool_id,omitempty"`
35-
AwsAttributes *clusters.AwsAttributes `json:"aws_attributes,omitempty" tf:"suppress_diff"`
36-
GcpAttributes *clusters.GcpAttributes `json:"gcp_attributes,omitempty" tf:"suppress_diff"`
35+
AwsAttributes *clusters.AwsAttributes `json:"aws_attributes,omitempty"`
36+
GcpAttributes *clusters.GcpAttributes `json:"gcp_attributes,omitempty"`
3737

3838
SparkConf map[string]string `json:"spark_conf,omitempty"`
3939
SparkEnvVars map[string]string `json:"spark_env_vars,omitempty"`

scim/acceptance/service_principal_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ func TestAccServicePrincipalResourceOnAzure(t *testing.T) {
2323
}
2424

2525
func TestAccServicePrincipalResourceOnAws(t *testing.T) {
26-
if cloud, ok := os.LookupEnv("CLOUD_ENV"); !ok || cloud != "AWS" {
27-
t.Skip("Test is only for CLOUD_ENV=AWS")
26+
if cloud, ok := os.LookupEnv("CLOUD_ENV"); !ok || cloud != "aws" {
27+
t.Skip("Test is only for CLOUD_ENV=aws")
2828
}
2929
t.Parallel()
3030
acceptance.Test(t, []acceptance.Step{

scripts/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ By default, we don't encourage creation/destruction of infrastructure multiple t
1212
* `azsp` - Azure authenticated with Service Principal's ID/Secret pairs. Runnable test name prefixes are `TestAcc` and `TestAzureAcc`. Service pricipal must have `Storage Blob Data Contributor` role on ADLS account used. `ARM_SUBSCRIPTION_ID`, `ARM_CLIENT_SECRET`, `ARM_CLIENT_ID`, `ARM_TENANT_ID`, `OWNER` environment vars required. Note that these integration tests will use service principal based auth. Even though it is using a service principal, it will still be generating a personal access token to perform creation of resources.
1313

1414
* `mws` - AWS with Databricks Multiworkspace API. Runnable test name prefix is `TestMws`. Please [check if you're able to use it](https://docs.databricks.com/administration-guide/multiworkspace/new-workspace-aws.html). Required variables are `DATABRICKS_ACCOUNT_ID`, `DATABRICKS_USERNAME`, `DATABRICKS_PASSWORD` (something you use for https://accounts.cloud.databricks.com/), `AWS_REGION`, `TEST_CIDR`, `OWNER`. Only multiworkspace resources are tested.
15-
* `awsst` - `DATABRICKS_CONFIG_PROFILE` (section within Databricks CLI `~/.databrickscfg` file) & `CLOUD_ENV=AWS`. In case you want to test provider on existing development single-tenant shard. Runnable test name prefixes are `TestAcc` and `TestAwsAcc`.
15+
* `awsst` - `DATABRICKS_CONFIG_PROFILE` (section within Databricks CLI `~/.databrickscfg` file) & `CLOUD_ENV=aws`. In case you want to test provider on existing development single-tenant shard. Runnable test name prefixes are `TestAcc` and `TestAwsAcc`.
1616
* `awsmt` - AWS with Databricks Multitenant Workspace. Currently work in progress and the test environment cannot be fully started.
17-
* most of the tests should aim to be cloud-agnostic. Though, in case of specific branching needed, you can check `CLOUD_ENV` value (possible values are `Azure`, `AWS` & `MWS`).
17+
* most of the tests should aim to be cloud-agnostic. Though, in case of specific branching needed, you can check `CLOUD_ENV` value (possible values are `Azure`, `aws` & `MWS`).
1818
* all environment variables are used by *DatabricksClient*, *provider integration tests* and *terraform configuration*.
1919
* **each `output` becomes an environment variable** with the case changed to upper. This gives an easy way to manage the complexity of the testing environment. This is what gives those variables for `export $(scripts/run.sh azcli --export)` under the hood.
2020
* `qa.EnvironmentTemplate` must be used to make readable templates with environment variable presence validation.
@@ -94,7 +94,7 @@ func TestAccListClustersIntegration(t *testing.T) {
9494
AutoterminationMinutes: 15,
9595
}
9696

97-
if cloud == "AWS" {
97+
if cloud == "aws" {
9898
cluster.AwsAttributes = &AwsAttributes{
9999
EbsVolumeType: EbsVolumeTypeGeneralPurposeSsd,
100100
EbsVolumeCount: 1,

scripts/nightly/awsit.tf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ resource "azurerm_container_group" "aws" {
261261
cpu = "2"
262262
memory = "2"
263263
environment_variables = {
264-
CLOUD_ENV = "AWS"
264+
CLOUD_ENV = "aws"
265265
TEST_FILTER = "TestAcc"
266266
DATABRICKS_HOST = databricks_mws_workspaces.this.workspace_url
267267
TEST_S3_BUCKET = aws_s3_bucket.ds.bucket
@@ -290,4 +290,4 @@ output "aws_workspace_id" {
290290
output "aws_workspace_pat" {
291291
value = databricks_mws_workspaces.this.token[0].token_value
292292
sensitive = true
293-
}
293+
}

0 commit comments

Comments
 (0)