Skip to content

Commit b6da1f9

Browse files
authored
direct: Support recreate for pipelines (#3339)
## Changes - Support recreation of pipelines in direct deployment. - Fix Recreate() logic in direct deployment (it was unused before). - Add RecreateFields to resource settings for declarative way of configuring recreation - Add RecreateAllowed field to ResourceSettings. - Refactor a bit to share more diff-related logic between plan and apply. ## Tests New acceptance tests for pipeline recreation.
1 parent be331f8 commit b6da1f9

File tree

24 files changed

+636
-37
lines changed

24 files changed

+636
-37
lines changed

acceptance/bundle/deploy/pipeline/recreate/test.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ Local = true
22
Cloud = true
33
RequiresUnityCatalog = true
44

5-
EnvMatrix.DATABRICKS_CLI_DEPLOYMENT = ["terraform"] # pipeline recreation and ${resources}
5+
EnvMatrix.DATABRICKS_CLI_DEPLOYMENT = ["terraform"] # ${resources} support
66

77
Ignore = [
88
"databricks.yml"
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
trace cat databricks.yml
2+
touch foo.py
3+
touch bar.py
4+
trace $CLI bundle plan # should show 'create'
5+
trace $CLI bundle deploy
6+
7+
ppid1=`read_id.py pipelines my`
8+
echo "$ppid1:PIPELINE_ID_1" >> ACC_REPLS
9+
10+
print_requests() {
11+
jq --sort-keys 'select(.method != "GET" and (.path | contains("/pipelines")))' < out.requests.txt
12+
rm -f out.requests.txt
13+
}
14+
15+
trace print_requests
16+
17+
trace update_file.py databricks.yml $CONFIG_UPDATE
18+
trace $CLI bundle plan # should show 'recreate'
19+
trace $CLI bundle deploy --auto-approve
20+
trace print_requests
21+
22+
title "Fetch pipeline ID and verify remote state"
23+
24+
ppid2=`read_id.py pipelines my`
25+
echo "$ppid2:PIPELINE_ID_2" >> ACC_REPLS
26+
trace $CLI pipelines get $ppid2
27+
28+
title "Verify that original pipeline is gone"
29+
trace musterr $CLI pipelines get $ppid1
30+
31+
title "Destroy the pipeline and verify that it's removed from the state and from remote"
32+
trace $CLI bundle destroy --auto-approve
33+
34+
trace print_requests
35+
trace musterr $CLI pipelines get $ppid2
36+
rm out.requests.txt
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Local = true
2+
Cloud = false
3+
4+
[EnvMatrix]
5+
DATABRICKS_CLI_DEPLOYMENT = ["terraform", "direct-exp"]
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
2+
>>> cat databricks.yml
3+
bundle:
4+
name: acc-[UNIQUE_NAME]
5+
6+
resources:
7+
pipelines:
8+
my:
9+
name: test-pipeline-[UNIQUE_NAME]
10+
#storage: dbfs:/pipelines/custom
11+
catalog: mycatalog1
12+
#ingestion_definition: {"connection_name": "my_connection", "objects": [{}]}
13+
libraries:
14+
- file:
15+
path: "./foo.py"
16+
17+
>>> [CLI] bundle plan
18+
create pipelines.my
19+
20+
>>> [CLI] bundle deploy
21+
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/acc-[UNIQUE_NAME]/default/files...
22+
Deploying resources...
23+
Updating deployment state...
24+
Deployment complete!
25+
26+
>>> print_requests
27+
{
28+
"body": {
29+
"catalog": "mycatalog1",
30+
"channel": "CURRENT",
31+
"deployment": {
32+
"kind": "BUNDLE",
33+
"metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/acc-[UNIQUE_NAME]/default/state/metadata.json"
34+
},
35+
"edition": "ADVANCED",
36+
"libraries": [
37+
{
38+
"file": {
39+
"path": "/Workspace/Users/[USERNAME]/.bundle/acc-[UNIQUE_NAME]/default/files/foo.py"
40+
}
41+
}
42+
],
43+
"name": "test-pipeline-[UNIQUE_NAME]"
44+
},
45+
"method": "POST",
46+
"path": "/api/2.0/pipelines"
47+
}
48+
49+
>>> update_file.py databricks.yml catalog1 catalog2
50+
51+
>>> [CLI] bundle plan
52+
recreate pipelines.my
53+
54+
>>> [CLI] bundle deploy --auto-approve
55+
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/acc-[UNIQUE_NAME]/default/files...
56+
57+
This action will result in the deletion or recreation of the following DLT Pipelines along with the
58+
Streaming Tables (STs) and Materialized Views (MVs) managed by them. Recreating the Pipelines will
59+
restore the defined STs and MVs through full refresh. Note that recreation is necessary when pipeline
60+
properties such as the 'catalog' or 'storage' are changed:
61+
recreate pipeline my
62+
Deploying resources...
63+
Updating deployment state...
64+
Deployment complete!
65+
66+
>>> print_requests
67+
{
68+
"method": "DELETE",
69+
"path": "/api/2.0/pipelines/[PIPELINE_ID_1]"
70+
}
71+
{
72+
"body": {
73+
"catalog": "mycatalog2",
74+
"channel": "CURRENT",
75+
"deployment": {
76+
"kind": "BUNDLE",
77+
"metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/acc-[UNIQUE_NAME]/default/state/metadata.json"
78+
},
79+
"edition": "ADVANCED",
80+
"libraries": [
81+
{
82+
"file": {
83+
"path": "/Workspace/Users/[USERNAME]/.bundle/acc-[UNIQUE_NAME]/default/files/foo.py"
84+
}
85+
}
86+
],
87+
"name": "test-pipeline-[UNIQUE_NAME]"
88+
},
89+
"method": "POST",
90+
"path": "/api/2.0/pipelines"
91+
}
92+
93+
=== Fetch pipeline ID and verify remote state
94+
>>> [CLI] pipelines get [PIPELINE_ID_2]
95+
{
96+
"creator_user_name":"[USERNAME]",
97+
"last_modified":[UNIX_TIME_MILLIS],
98+
"name":"test-pipeline-[UNIQUE_NAME]",
99+
"pipeline_id":"[PIPELINE_ID_2]",
100+
"run_as_user_name":"[USERNAME]",
101+
"spec": {
102+
"catalog":"mycatalog2",
103+
"channel":"CURRENT",
104+
"deployment": {
105+
"kind":"BUNDLE",
106+
"metadata_file_path":"/Workspace/Users/[USERNAME]/.bundle/acc-[UNIQUE_NAME]/default/state/metadata.json"
107+
},
108+
"edition":"ADVANCED",
109+
"id":"[PIPELINE_ID_2]",
110+
"libraries": [
111+
{
112+
"file": {
113+
"path":"/Workspace/Users/[USERNAME]/.bundle/acc-[UNIQUE_NAME]/default/files/foo.py"
114+
}
115+
}
116+
],
117+
"name":"test-pipeline-[UNIQUE_NAME]"
118+
},
119+
"state":"IDLE"
120+
}
121+
122+
=== Verify that original pipeline is gone
123+
>>> musterr [CLI] pipelines get [PIPELINE_ID_1]
124+
Error: The specified pipeline [PIPELINE_ID_1] was not found.
125+
126+
Exit code (musterr): 1
127+
128+
=== Destroy the pipeline and verify that it's removed from the state and from remote
129+
>>> [CLI] bundle destroy --auto-approve
130+
The following resources will be deleted:
131+
delete pipeline my
132+
133+
All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/acc-[UNIQUE_NAME]/default
134+
135+
Deleting files...
136+
Destroy complete!
137+
138+
>>> print_requests
139+
{
140+
"method": "DELETE",
141+
"path": "/api/2.0/pipelines/[PIPELINE_ID_2]"
142+
}
143+
144+
>>> musterr [CLI] pipelines get [PIPELINE_ID_2]
145+
Error: The specified pipeline [PIPELINE_ID_2] was not found.
146+
147+
Exit code (musterr): 1
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
envsubst < $TESTDIR/../databricks.yml.tmpl > databricks.yml
2+
source $TESTDIR/../_script
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Env.CONFIG_UPDATE = "catalog1 catalog2"
2+
Env.CATALOG_KEY = "catalog"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Local = true
2+
Cloud = false
3+
4+
[EnvMatrix]
5+
DATABRICKS_CLI_DEPLOYMENT = ["terraform", "direct-exp"]
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
2+
>>> cat databricks.yml
3+
bundle:
4+
name: acc-[UNIQUE_NAME]
5+
6+
resources:
7+
pipelines:
8+
my:
9+
name: test-pipeline-[UNIQUE_NAME]
10+
#storage: dbfs:/pipelines/custom
11+
#catalog: mycatalog1
12+
ingestion_definition: {"connection_name": "my_connection", "objects": [{}]}
13+
libraries:
14+
- file:
15+
path: "./foo.py"
16+
17+
>>> [CLI] bundle plan
18+
create pipelines.my
19+
20+
>>> [CLI] bundle deploy
21+
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/acc-[UNIQUE_NAME]/default/files...
22+
Deploying resources...
23+
Updating deployment state...
24+
Deployment complete!
25+
26+
>>> print_requests
27+
{
28+
"body": {
29+
"channel": "CURRENT",
30+
"deployment": {
31+
"kind": "BUNDLE",
32+
"metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/acc-[UNIQUE_NAME]/default/state/metadata.json"
33+
},
34+
"edition": "ADVANCED",
35+
"ingestion_definition": {
36+
"connection_name": "my_connection",
37+
"objects": [
38+
{}
39+
]
40+
},
41+
"libraries": [
42+
{
43+
"file": {
44+
"path": "/Workspace/Users/[USERNAME]/.bundle/acc-[UNIQUE_NAME]/default/files/foo.py"
45+
}
46+
}
47+
],
48+
"name": "test-pipeline-[UNIQUE_NAME]"
49+
},
50+
"method": "POST",
51+
"path": "/api/2.0/pipelines"
52+
}
53+
54+
>>> update_file.py databricks.yml my_connection my_new_connection
55+
56+
>>> [CLI] bundle plan
57+
recreate pipelines.my
58+
59+
>>> [CLI] bundle deploy --auto-approve
60+
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/acc-[UNIQUE_NAME]/default/files...
61+
62+
This action will result in the deletion or recreation of the following DLT Pipelines along with the
63+
Streaming Tables (STs) and Materialized Views (MVs) managed by them. Recreating the Pipelines will
64+
restore the defined STs and MVs through full refresh. Note that recreation is necessary when pipeline
65+
properties such as the 'catalog' or 'storage' are changed:
66+
recreate pipeline my
67+
Deploying resources...
68+
Updating deployment state...
69+
Deployment complete!
70+
71+
>>> print_requests
72+
{
73+
"method": "DELETE",
74+
"path": "/api/2.0/pipelines/[PIPELINE_ID_1]"
75+
}
76+
{
77+
"body": {
78+
"channel": "CURRENT",
79+
"deployment": {
80+
"kind": "BUNDLE",
81+
"metadata_file_path": "/Workspace/Users/[USERNAME]/.bundle/acc-[UNIQUE_NAME]/default/state/metadata.json"
82+
},
83+
"edition": "ADVANCED",
84+
"ingestion_definition": {
85+
"connection_name": "my_new_connection",
86+
"objects": [
87+
{}
88+
]
89+
},
90+
"libraries": [
91+
{
92+
"file": {
93+
"path": "/Workspace/Users/[USERNAME]/.bundle/acc-[UNIQUE_NAME]/default/files/foo.py"
94+
}
95+
}
96+
],
97+
"name": "test-pipeline-[UNIQUE_NAME]"
98+
},
99+
"method": "POST",
100+
"path": "/api/2.0/pipelines"
101+
}
102+
103+
=== Fetch pipeline ID and verify remote state
104+
>>> [CLI] pipelines get [PIPELINE_ID_2]
105+
{
106+
"creator_user_name":"[USERNAME]",
107+
"last_modified":[UNIX_TIME_MILLIS],
108+
"name":"test-pipeline-[UNIQUE_NAME]",
109+
"pipeline_id":"[PIPELINE_ID_2]",
110+
"run_as_user_name":"[USERNAME]",
111+
"spec": {
112+
"channel":"CURRENT",
113+
"deployment": {
114+
"kind":"BUNDLE",
115+
"metadata_file_path":"/Workspace/Users/[USERNAME]/.bundle/acc-[UNIQUE_NAME]/default/state/metadata.json"
116+
},
117+
"edition":"ADVANCED",
118+
"id":"[PIPELINE_ID_2]",
119+
"ingestion_definition": {
120+
"connection_name":"my_new_connection",
121+
"objects": [
122+
{}
123+
]
124+
},
125+
"libraries": [
126+
{
127+
"file": {
128+
"path":"/Workspace/Users/[USERNAME]/.bundle/acc-[UNIQUE_NAME]/default/files/foo.py"
129+
}
130+
}
131+
],
132+
"name":"test-pipeline-[UNIQUE_NAME]",
133+
"storage":"dbfs:/pipelines/[PIPELINE_ID_2]"
134+
},
135+
"state":"IDLE"
136+
}
137+
138+
=== Verify that original pipeline is gone
139+
>>> musterr [CLI] pipelines get [PIPELINE_ID_1]
140+
Error: The specified pipeline [PIPELINE_ID_1] was not found.
141+
142+
Exit code (musterr): 1
143+
144+
=== Destroy the pipeline and verify that it's removed from the state and from remote
145+
>>> [CLI] bundle destroy --auto-approve
146+
The following resources will be deleted:
147+
delete pipeline my
148+
149+
All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/acc-[UNIQUE_NAME]/default
150+
151+
Deleting files...
152+
Destroy complete!
153+
154+
>>> print_requests
155+
{
156+
"method": "DELETE",
157+
"path": "/api/2.0/pipelines/[PIPELINE_ID_2]"
158+
}
159+
160+
>>> musterr [CLI] pipelines get [PIPELINE_ID_2]
161+
Error: The specified pipeline [PIPELINE_ID_2] was not found.
162+
163+
Exit code (musterr): 1
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
envsubst < $TESTDIR/../databricks.yml.tmpl > databricks.yml
2+
source $TESTDIR/../_script
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Env.CONFIG_UPDATE = "my_connection my_new_connection"
2+
Env.ING_KEY = "ingestion_definition"

0 commit comments

Comments
 (0)