Skip to content

Commit 3023134

Browse files
address comments
1 parent 48bd034 commit 3023134

File tree

10 files changed

+135
-29
lines changed

10 files changed

+135
-29
lines changed

acceptance/bundle/deploy/signal-cleanup/out.test.toml

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

acceptance/bundle/deploy/signal-cleanup/output.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ Deployment in progress, sending interrupt signal...
77

88
>>> wait [PID]
99
Operation interrupted. Gracefully shutting down...
10+
Error: terraform apply: exit status 1
11+
12+
Updating deployment state...
13+
Error: failed in rate limiter: context canceled
14+
1015

1116
Exit code: 130
1217

@@ -38,3 +43,15 @@ Exit code: 130
3843
}
3944
}
4045
}
46+
47+
>>> [CLI] bundle debug plan
48+
{
49+
"plan": {
50+
"resources.jobs.job1": {
51+
"action": "create"
52+
},
53+
"resources.jobs.job2": {
54+
"action": "create"
55+
}
56+
}
57+
}

acceptance/bundle/deploy/signal-cleanup/script

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,6 @@ trace cat out.requests.txt | jq 'select(.method == "POST" and (.path | contains(
2727
title "A creation request for job1 should be recorded in the requests file. No request for job2 should exist since the process was terminated mid deployment."
2828
trace cat out.requests.txt | jq 'select(.method == "POST" and (.path | contains("/api/2.2/jobs/create")))'
2929

30+
trace $CLI bundle debug plan
31+
3032
rm out.requests.txt

acceptance/bundle/deploy/signal-cleanup/test.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ Local = true
22
Cloud = false
33
RecordRequests = true
44

5+
# Test only terraform engine (signal handling is the same for direct)
6+
EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform"]
7+
58
# Add delay to first job creation to ensure we can interrupt during deployment
69
[[Server]]
710
Pattern = "POST /api/2.2/jobs/create"

acceptance/bundle/destroy/signal-cleanup/output.txt

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ Destroy in progress, sending interrupt signal...
1515

1616
>>> wait [PID]
1717
Operation interrupted. Gracefully shutting down...
18+
Error: terraform apply: exit status 1
19+
1820

1921
Exit code: 130
2022

@@ -30,4 +32,14 @@ Exit code: 130
3032
}
3133
}
3234

33-
=== [CLI]
35+
>>> [CLI] bundle debug plan
36+
{
37+
"plan": {
38+
"resources.jobs.job1": {
39+
"action": "skip"
40+
},
41+
"resources.jobs.job2": {
42+
"action": "skip"
43+
}
44+
}
45+
}

acceptance/bundle/destroy/signal-cleanup/script

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,6 @@ trace cat out.requests.txt | jq 'select(.method == "POST" and (.path | contains(
2929
title "A deletion request for job2 should be recorded in the requests file. No request for job1 should exist since the process was terminated mid destroy."
3030
trace cat out.requests.txt | jq 'select(.method == "POST" and (.path | contains("/api/2.2/jobs/delete")))'
3131

32+
trace $CLI bundle debug plan
33+
3234
rm out.requests.txt

bundle/bundle.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,9 @@ type Bundle struct {
123123
// Stores the locker responsible for acquiring/releasing a deployment lock.
124124
Locker *locker.Locker
125125

126+
// LockReleaseOnce ensures the lock is only released once
127+
LockReleaseOnce sync.Once
128+
126129
// TerraformPlanPath is the path to the plan from the terraform CLI
127130
TerraformPlanPath string
128131

bundle/deploy/lock/release.go

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,22 @@ func (m *release) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics
4444
return nil
4545
}
4646

47-
log.Infof(ctx, "Releasing deployment lock")
48-
switch m.goal {
49-
case GoalDeploy:
50-
return diag.FromErr(b.Locker.Unlock(ctx))
51-
case GoalBind, GoalUnbind:
52-
return diag.FromErr(b.Locker.Unlock(ctx))
53-
case GoalDestroy:
54-
return diag.FromErr(b.Locker.Unlock(ctx, locker.AllowLockFileNotExist))
55-
default:
56-
return diag.Errorf("unknown goal for lock release: %s", m.goal)
57-
}
47+
// Make lock release idempotent using sync.Once to prevent race conditions
48+
// This ensures the lock is only released once even if called multiple times
49+
var diags diag.Diagnostics
50+
b.LockReleaseOnce.Do(func() {
51+
log.Infof(ctx, "Releasing deployment lock")
52+
switch m.goal {
53+
case GoalDeploy:
54+
diags = diag.FromErr(b.Locker.Unlock(ctx))
55+
case GoalBind, GoalUnbind:
56+
diags = diag.FromErr(b.Locker.Unlock(ctx))
57+
case GoalDestroy:
58+
diags = diag.FromErr(b.Locker.Unlock(ctx, locker.AllowLockFileNotExist))
59+
default:
60+
diags = diag.Errorf("unknown goal for lock release: %s", m.goal)
61+
}
62+
})
63+
64+
return diags
5865
}

bundle/phases/deploy.go

Lines changed: 74 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -136,40 +136,98 @@ func uploadLibraries(ctx context.Context, b *bundle.Bundle, libs map[string][]li
136136
}
137137

138138
// registerGracefulCleanup sets up signal handlers to release the lock
139-
// before the process terminates. Returns a cleanup function for the normal exit path.
139+
// before the process terminates. Returns a new context that will be cancelled
140+
// when a signal is received, and a cleanup function for the exit path.
141+
//
142+
// This follows idiomatic Go patterns for graceful shutdown:
143+
// 1. Use context cancellation to signal shutdown to the main routine
144+
// 2. Use a done channel to wait for the main routine to complete
145+
// 3. Only exit after confirming the main routine has terminated
140146
//
141147
// Catches SIGINT (Ctrl+C), SIGTERM, SIGHUP, and SIGQUIT.
142148
// Note: SIGKILL and SIGSTOP cannot be caught - the kernel terminates the process directly.
143-
func registerGracefulCleanup(ctx context.Context, b *bundle.Bundle, goal lock.Goal) func() {
149+
func registerGracefulCleanup(ctx context.Context, b *bundle.Bundle, goal lock.Goal) (context.Context, func()) {
150+
// Create a cancellable context to propagate cancellation to the main routine
151+
ctx, cancel := context.WithCancel(ctx)
152+
153+
// Channel to signal that the main routine has completed
154+
done := make(chan struct{})
155+
156+
// Channel to signal that a signal was received and handled
157+
signalReceived := make(chan struct{})
158+
159+
// Channel to receive OS signals
144160
sigChan := make(chan os.Signal, 1)
145161
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP, syscall.SIGQUIT)
146162

147-
// Start goroutine to handle signals
148-
go func() {
163+
signalHandler := func() {
164+
// Wait for a signal to be received.
149165
sig := <-sigChan
150-
// Stop listening for more signals
166+
167+
// Stop listening for more signals. This allows for multiple interrupts
168+
// to cause the program to force exit.
151169
signal.Stop(sigChan)
152170

171+
// Signal that we received an interrupt
172+
close(signalReceived)
173+
153174
cmdio.LogString(ctx, "Operation interrupted. Gracefully shutting down...")
154175

155-
// Release the lock.
156-
bundle.ApplyContext(ctx, b, lock.Release(goal))
176+
// Cancel the context to signal the main routine to stop
177+
cancel()
178+
179+
// Wait for the main routine to complete before releasing the lock
180+
// This ensures we don't exit while operations are still in progress
181+
<-done
157182

158-
// Exit immediately with standard signal exit code (128 + signal number).
159-
// The deferred cleanup function returned below won't run because we exit here.
183+
// Release the lock using a context without cancellation to avoid cancellation issues
184+
// We use context.WithoutCancel to preserve context values (like user agent)
185+
// but remove the cancellation signal so the lock release can complete
186+
releaseCtx := context.WithoutCancel(ctx)
187+
bundle.ApplyContext(releaseCtx, b, lock.Release(goal))
188+
189+
// Calculate exit code (128 + signal number)
160190
exitCode := 128
161191
if s, ok := sig.(syscall.Signal); ok {
162192
exitCode += int(s)
163193
}
194+
195+
// Exit with the appropriate signal exit code
164196
os.Exit(exitCode)
165-
}()
197+
}
198+
199+
// Start goroutine to handle signals
200+
go signalHandler()
166201

167-
// Return cleanup function for normal exit path
168-
return func() {
202+
// Return cleanup function for the exit path
203+
// This should be called via defer to ensure it runs even if there's a panic
204+
cleanup := func() {
205+
// Stop listening for signals
169206
signal.Stop(sigChan)
170-
// Don't close the channel - it causes the goroutine to receive nil
171-
bundle.ApplyContext(ctx, b, lock.Release(goal))
207+
208+
// Release the lock (idempotent thanks to sync.Once in lock.Release)
209+
// Use context.WithoutCancel to preserve context values but remove cancellation
210+
releaseCtx := context.WithoutCancel(ctx)
211+
bundle.ApplyContext(releaseCtx, b, lock.Release(goal))
212+
213+
// Signal that the main routine has completed.
214+
// Once the signal is recieved,
215+
// This must be done AFTER all cleanup is complete
216+
close(done)
217+
218+
// If a signal was received, wait indefinitely for the signal handler to exit
219+
// This prevents the main function from returning and exiting with a different code
220+
// If no signal was received, signalReceived will never be closed, so we just return
221+
select {
222+
case <-signalReceived:
223+
// Signal was received, wait forever for os.Exit() in signal handler
224+
select {}
225+
default:
226+
// No signal received, proceed with normal exit
227+
}
172228
}
229+
230+
return ctx, cleanup
173231
}
174232

175233
// The deploy phase deploys artifacts and resources.
@@ -189,7 +247,8 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand
189247
}
190248

191249
// lock is acquired here - set up signal handlers and defer cleanup
192-
defer registerGracefulCleanup(ctx, b, lock.GoalDeploy)()
250+
ctx, cleanup := registerGracefulCleanup(ctx, b, lock.GoalDeploy)
251+
defer cleanup()
193252

194253
libs := deployPrepare(ctx, b, false, directDeployment)
195254
if logdiag.HasError(ctx) {

bundle/phases/destroy.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,8 @@ func Destroy(ctx context.Context, b *bundle.Bundle, directDeployment bool) {
130130
}
131131

132132
// lock is acquired here - set up signal handlers and defer cleanup
133-
defer registerGracefulCleanup(ctx, b, lock.GoalDestroy)()
133+
ctx, cleanup := registerGracefulCleanup(ctx, b, lock.GoalDestroy)
134+
defer cleanup()
134135

135136
if !directDeployment {
136137
bundle.ApplySeqContext(ctx, b,

0 commit comments

Comments
 (0)