Skip to content

Commit 3c597d1

Browse files
Wait for app deletion in DoCreate instead of DoDelete (#4176)
## Description Moves app deletion wait from `DoDelete` to `DoCreate`, and only retries when appropriate. ## Changes - On `RESOURCE_ALREADY_EXISTS` error, check app state via GET before retrying - Only retry if the app is in `DELETING` state or was just deleted (404) - Return hard error if app exists in any other state (e.g., `ACTIVE`) ## Why? - Handles apps deleted externally (manual deletion), not just via `bundle destroy` - Avoids unnecessary latency in destroy operations - Fixes the issue at the point of failure (create, not delete) - Prevents infinite retries when app exists and is not being deleted Follow up to #4102, addresses feedback from #4006. --------- Co-authored-by: Denis Bilenko <[email protected]>
1 parent de5c81f commit 3c597d1

File tree

11 files changed

+223
-35
lines changed

11 files changed

+223
-35
lines changed

.golangci.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ linters:
9393
- path-except: bundle/direct/dresources
9494
linters:
9595
- exhaustruct
96-
- path: bundle/direct/dresources/all_test.go
96+
- path: bundle/direct/dresources/.*_test.go
9797
linters:
9898
- exhaustruct
9999
- path-except: ^cmd

NEXT_CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
* Skip non-exportable objects (e.g., `MLFLOW_EXPERIMENT`) during `workspace export-dir` instead of failing ([#4081](https://github.com/databricks/cli/issues/4081))
99

1010
### Bundles
11+
* Fix app deployment failure when app is in `DELETING` state ([#4176](https://github.com/databricks/cli/pull/4176))
1112
* Add `ipykernel` to the `default` template to enable Databricks Connect notebooks in Cursor/VS Code ([#4164](https://github.com/databricks/cli/pull/4164))
1213
* Add interactive SQL warehouse picker to `default-sql` and `dbt-sql` bundle templates ([#4170](https://github.com/databricks/cli/pull/4170))
1314
* Add `name`, `target` and `mode` fields to the deployment metadata file ([#4180](https://github.com/databricks/cli/pull/4180))
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
print("Hello world!")
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
bundle:
2+
name: test-app-already-exists
3+
4+
resources:
5+
apps:
6+
myapp:
7+
name: test-app-already-exists
8+
source_code_path: ./app

acceptance/bundle/resources/apps/create_already_exists/out.test.toml

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
2+
>>> [CLI] apps create test-app-already-exists
3+
{
4+
"app_status": {
5+
"message":"Application is running.",
6+
"state":"RUNNING"
7+
},
8+
"compute_status": {
9+
"message":"App compute is active.",
10+
"state":"ACTIVE"
11+
},
12+
"id":"1000",
13+
"name":"test-app-already-exists",
14+
"url":"test-app-already-exists-123.cloud.databricksapps.com"
15+
}
16+
17+
>>> musterr [CLI] bundle deploy
18+
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-app-already-exists/default/files...
19+
Deploying resources...
20+
Error: cannot create resources.apps.myapp: An app with the same name already exists: test-app-already-exists (409 RESOURCE_ALREADY_EXISTS)
21+
22+
Endpoint: POST [DATABRICKS_URL]/api/2.0/apps?no_compute=true
23+
HTTP Status: 409 Conflict
24+
API error_code: RESOURCE_ALREADY_EXISTS
25+
API message: An app with the same name already exists: test-app-already-exists
26+
27+
Updating deployment state...
28+
29+
>>> [CLI] apps delete test-app-already-exists
30+
{
31+
"name":""
32+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Create an app with the same name outside of bundle
2+
trace $CLI apps create test-app-already-exists
3+
4+
# Bundle deploy should fail because app already exists and is not in DELETING state
5+
trace musterr $CLI bundle deploy
6+
7+
# Cleanup: delete the app we created
8+
trace $CLI apps delete test-app-already-exists
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Local = true
2+
Cloud = false
3+
RecordRequests = false
4+
5+
[EnvMatrix]
6+
DATABRICKS_BUNDLE_ENGINE = ["direct"]

bundle/direct/dresources/app.go

Lines changed: 28 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package dresources
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
67
"time"
78

@@ -36,12 +37,36 @@ func (r *ResourceApp) DoCreate(ctx context.Context, config *apps.App) (string, *
3637
NoCompute: true,
3738
ForceSendFields: nil,
3839
}
39-
waiter, err := r.client.Apps.Create(ctx, request)
40+
41+
retrier := retries.New[apps.App](retries.WithTimeout(15*time.Minute), retries.WithRetryFunc(shouldRetry))
42+
app, err := retrier.Run(ctx, func(ctx context.Context) (*apps.App, error) {
43+
waiter, err := r.client.Apps.Create(ctx, request)
44+
if err != nil {
45+
if errors.Is(err, apierr.ErrResourceAlreadyExists) {
46+
// Check if the app is in DELETING state - only then should we retry
47+
existingApp, getErr := r.client.Apps.GetByName(ctx, config.Name)
48+
if getErr != nil {
49+
// If we can't get the app (e.g., it was just deleted), retry the create
50+
if apierr.IsMissing(getErr) {
51+
return nil, retries.Continues("app was deleted, retrying create")
52+
}
53+
return nil, retries.Halt(err)
54+
}
55+
if existingApp.ComputeStatus != nil && existingApp.ComputeStatus.State == apps.ComputeStateDeleting {
56+
return nil, retries.Continues("app is deleting, retrying create")
57+
}
58+
// App exists and is not being deleted - this is a hard error
59+
return nil, retries.Halt(err)
60+
}
61+
return nil, retries.Halt(err)
62+
}
63+
return waiter.Response, nil
64+
})
4065
if err != nil {
4166
return "", nil, err
4267
}
4368

44-
return waiter.Response.Name, nil, nil
69+
return app.Name, nil, nil
4570
}
4671

4772
func (r *ResourceApp) DoUpdate(ctx context.Context, id string, config *apps.App, _ *Changes) (*apps.App, error) {
@@ -63,10 +88,7 @@ func (r *ResourceApp) DoUpdate(ctx context.Context, id string, config *apps.App,
6388

6489
func (r *ResourceApp) DoDelete(ctx context.Context, id string) error {
6590
_, err := r.client.Apps.DeleteByName(ctx, id)
66-
if err != nil {
67-
return err
68-
}
69-
return r.waitForDeletion(ctx, id)
91+
return err
7092
}
7193

7294
func (*ResourceApp) FieldTriggers(_ bool) map[string]deployplan.ActionType {
@@ -79,34 +101,6 @@ func (r *ResourceApp) WaitAfterCreate(ctx context.Context, config *apps.App) (*a
79101
return r.waitForApp(ctx, r.client, config.Name)
80102
}
81103

82-
func (r *ResourceApp) waitForDeletion(ctx context.Context, name string) error {
83-
retrier := retries.New[struct{}](retries.WithTimeout(10*time.Minute), retries.WithRetryFunc(shouldRetry))
84-
_, err := retrier.Run(ctx, func(ctx context.Context) (*struct{}, error) {
85-
app, err := r.client.Apps.GetByName(ctx, name)
86-
if err != nil {
87-
if apierr.IsMissing(err) {
88-
return nil, nil
89-
}
90-
return nil, retries.Halt(err)
91-
}
92-
93-
if app.ComputeStatus == nil {
94-
return nil, retries.Continues("waiting for compute status")
95-
}
96-
97-
switch app.ComputeStatus.State {
98-
case apps.ComputeStateDeleting:
99-
return nil, retries.Continues("app is deleting")
100-
case apps.ComputeStateActive, apps.ComputeStateStopped, apps.ComputeStateError:
101-
err := fmt.Errorf("app %s was not deleted, current state: %s", name, app.ComputeStatus.State)
102-
return nil, retries.Halt(err)
103-
default:
104-
return nil, retries.Continues(fmt.Sprintf("app is in %s state", app.ComputeStatus.State))
105-
}
106-
})
107-
return err
108-
}
109-
110104
// waitForApp waits for the app to reach the target state. The target state is either ACTIVE or STOPPED.
111105
// Apps with no_compute set to true will reach the STOPPED state, otherwise they will reach the ACTIVE state.
112106
// We can't use the default waiter from SDK because it only waits on ACTIVE state but we need also STOPPED state.
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
package dresources
2+
3+
import (
4+
"context"
5+
"testing"
6+
7+
"github.com/databricks/cli/libs/testserver"
8+
"github.com/databricks/databricks-sdk-go"
9+
"github.com/databricks/databricks-sdk-go/service/apps"
10+
"github.com/stretchr/testify/assert"
11+
"github.com/stretchr/testify/require"
12+
)
13+
14+
// TestAppDoCreate_RetriesWhenAppIsDeleting verifies that DoCreate retries when
15+
// an app already exists but is in DELETING state.
16+
func TestAppDoCreate_RetriesWhenAppIsDeleting(t *testing.T) {
17+
server := testserver.New(t)
18+
19+
createCallCount := 0
20+
getCallCount := 0
21+
22+
server.Handle("POST", "/api/2.0/apps", func(req testserver.Request) any {
23+
createCallCount++
24+
if createCallCount == 1 {
25+
return testserver.Response{
26+
StatusCode: 409,
27+
Body: map[string]string{
28+
"error_code": "RESOURCE_ALREADY_EXISTS",
29+
"message": "An app with the same name already exists.",
30+
},
31+
}
32+
}
33+
return apps.App{
34+
Name: "test-app",
35+
ComputeStatus: &apps.ComputeStatus{
36+
State: apps.ComputeStateActive,
37+
},
38+
}
39+
})
40+
41+
server.Handle("GET", "/api/2.0/apps/{name}", func(req testserver.Request) any {
42+
getCallCount++
43+
return apps.App{
44+
Name: req.Vars["name"],
45+
ComputeStatus: &apps.ComputeStatus{
46+
State: apps.ComputeStateDeleting,
47+
},
48+
}
49+
})
50+
51+
testserver.AddDefaultHandlers(server)
52+
53+
client, err := databricks.NewWorkspaceClient(&databricks.Config{
54+
Host: server.URL,
55+
Token: "testtoken",
56+
})
57+
require.NoError(t, err)
58+
59+
r := (&ResourceApp{}).New(client)
60+
ctx := context.Background()
61+
name, _, err := r.DoCreate(ctx, &apps.App{Name: "test-app"})
62+
63+
require.NoError(t, err)
64+
assert.Equal(t, "test-app", name)
65+
assert.Equal(t, 2, createCallCount, "expected Create to be called twice (1 retry)")
66+
assert.Equal(t, 1, getCallCount, "expected Get to be called once to check app state")
67+
}
68+
69+
// TestAppDoCreate_RetriesWhenGetReturnsNotFound verifies that DoCreate retries
70+
// when the app was just deleted between the create call and the get call.
71+
func TestAppDoCreate_RetriesWhenGetReturnsNotFound(t *testing.T) {
72+
server := testserver.New(t)
73+
74+
createCallCount := 0
75+
getCallCount := 0
76+
77+
server.Handle("POST", "/api/2.0/apps", func(req testserver.Request) any {
78+
createCallCount++
79+
if createCallCount == 1 {
80+
return testserver.Response{
81+
StatusCode: 409,
82+
Body: map[string]string{
83+
"error_code": "RESOURCE_ALREADY_EXISTS",
84+
"message": "An app with the same name already exists.",
85+
},
86+
}
87+
}
88+
return apps.App{
89+
Name: "test-app",
90+
ComputeStatus: &apps.ComputeStatus{
91+
State: apps.ComputeStateActive,
92+
},
93+
}
94+
})
95+
96+
server.Handle("GET", "/api/2.0/apps/{name}", func(req testserver.Request) any {
97+
getCallCount++
98+
return testserver.Response{
99+
StatusCode: 404,
100+
Body: map[string]string{
101+
"error_code": "RESOURCE_DOES_NOT_EXIST",
102+
"message": "App not found.",
103+
},
104+
}
105+
})
106+
107+
testserver.AddDefaultHandlers(server)
108+
109+
client, err := databricks.NewWorkspaceClient(&databricks.Config{
110+
Host: server.URL,
111+
Token: "testtoken",
112+
})
113+
require.NoError(t, err)
114+
115+
r := (&ResourceApp{}).New(client)
116+
ctx := context.Background()
117+
name, _, err := r.DoCreate(ctx, &apps.App{Name: "test-app"})
118+
119+
require.NoError(t, err)
120+
assert.Equal(t, "test-app", name)
121+
assert.Equal(t, 2, createCallCount, "expected Create to be called twice")
122+
assert.Equal(t, 1, getCallCount, "expected Get to be called once to check app state")
123+
}

0 commit comments

Comments
 (0)