Skip to content

Commit a0aa88c

Browse files
Add KillCaller feature to test server for CLI crash recovery testing (#4146)
Closes: #4140 ## Changes Add `KillCaller` option to test server that terminates the CLI process when specific endpoints are hit. This enables testing crash recovery scenarios like `bundle deploy` interrupted mid-execution. `KillCaller` specifies how many times to kill before allowing normal responses, enabling retry/recovery testing. ```toml [[Server]] Pattern = "GET /api/2.0/workspace/list" KillCaller = 2 # Kill first 2 calls, then allow ``` ## Why Enables testing CLI crash recovery scenarios (e.g., `bundle deploy` interrupted mid-execution). ## Tests Added `acceptance/selftest/kill_caller/` tests. --------- Signed-off-by: Varun Deep Saini <[email protected]> Co-authored-by: Denis Bilenko <[email protected]>
1 parent 9747a93 commit a0aa88c

File tree

23 files changed

+292
-2
lines changed

23 files changed

+292
-2
lines changed

acceptance/internal/config.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,12 @@ type ServerStub struct {
142142
// Configure as "1ms", "2s", "3m", etc.
143143
// See [time.ParseDuration] for details.
144144
Delay time.Duration
145+
146+
// Number of times to kill the caller process before returning normal responses.
147+
// 0 = never kill (default), 1 = kill once then allow, 2 = kill twice then allow, etc.
148+
// Useful for testing crash recovery scenarios where first deploy crashes but retry succeeds.
149+
// Requires DATABRICKS_CLI_TEST_PID=1 to be set in the test environment.
150+
KillCaller int
145151
}
146152

147153
// FindConfigs finds all the config relevant for this test,

acceptance/internal/prepare_server.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,13 +184,23 @@ func startLocalServer(t *testing.T,
184184
s.ResponseCallback = logResponseCallback(t)
185185
}
186186

187+
// Track remaining kill counts per pattern (for KillCaller > 0)
188+
killCounters := make(map[string]int)
189+
killCountersMu := &sync.Mutex{}
190+
187191
for ind := range stubs {
188192
// We want later stubs takes precedence, because then leaf configs take precedence over parent directory configs
189193
// In gorilla/mux earlier handlers take precedence, so we need to reverse the order
190194
stub := stubs[len(stubs)-1-ind]
191195
require.NotEmpty(t, stub.Pattern)
192196
items := strings.Split(stub.Pattern, " ")
193197
require.Len(t, items, 2)
198+
199+
// Initialize kill counter for this pattern
200+
if stub.KillCaller > 0 {
201+
killCounters[stub.Pattern] = stub.KillCaller
202+
}
203+
194204
s.Handle(items[0], items[1], func(req testserver.Request) any {
195205
if stub.Delay > 0 {
196206
ctx := req.Context
@@ -209,6 +219,11 @@ func startLocalServer(t *testing.T,
209219
}
210220
}
211221

222+
if shouldKillCaller(stub, killCounters, killCountersMu) {
223+
killCaller(t, stub.Pattern, req.Headers)
224+
return testserver.Response{StatusCode: http.StatusOK}
225+
}
226+
212227
return stub.Response
213228
})
214229
}
@@ -218,6 +233,42 @@ func startLocalServer(t *testing.T,
218233
return s.URL
219234
}
220235

236+
func shouldKillCaller(stub ServerStub, killCounters map[string]int, mu *sync.Mutex) bool {
237+
if stub.KillCaller <= 0 {
238+
return false
239+
}
240+
mu.Lock()
241+
defer mu.Unlock()
242+
if killCounters[stub.Pattern] <= 0 {
243+
return false
244+
}
245+
killCounters[stub.Pattern]--
246+
return true
247+
}
248+
249+
func killCaller(t *testing.T, pattern string, headers http.Header) {
250+
pid := testserver.ExtractPidFromHeaders(headers)
251+
if pid == 0 {
252+
t.Errorf("KillCaller configured but test-pid not found in User-Agent")
253+
return
254+
}
255+
256+
process, err := os.FindProcess(pid)
257+
if err != nil {
258+
t.Errorf("Failed to find process %d: %s", pid, err)
259+
return
260+
}
261+
262+
// Use process.Kill() for cross-platform compatibility.
263+
// On Unix, this sends SIGKILL. On Windows, this calls TerminateProcess.
264+
if err := process.Kill(); err != nil {
265+
t.Errorf("Failed to kill process %d: %s", pid, err)
266+
return
267+
}
268+
269+
t.Logf("KillCaller: killed PID %d (pattern: %s)", pid, pattern)
270+
}
271+
221272
func startProxyServer(t *testing.T,
222273
logRequests bool,
223274
includeHeaders []string,

acceptance/selftest/kill_caller/currentuser/out.test.toml

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
2+
>>> errcode [CLI] current-user me
3+
[PROCESS_KILLED]
4+
5+
Exit code: [KILLED]
6+
Script continued after kill
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
trace errcode $CLI current-user me
2+
echo "Script continued after kill"
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Kill the CLI when it calls /Me endpoint (once, then allow)
2+
[[Server]]
3+
Pattern = "GET /api/2.0/preview/scim/v2/Me"
4+
KillCaller = 1

acceptance/selftest/kill_caller/multi_pattern/out.test.toml

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
2+
>>> errcode [CLI] current-user me
3+
[PROCESS_KILLED]
4+
5+
Exit code: [KILLED]
6+
Me attempt 1 done
7+
8+
>>> errcode [CLI] current-user me
9+
[PROCESS_KILLED]
10+
11+
Exit code: [KILLED]
12+
Me attempt 2 done
13+
14+
>>> [CLI] current-user me
15+
{
16+
"id":"123",
17+
"userName":"[email protected]"
18+
}
19+
Me attempt 3 done - success!
20+
21+
>>> errcode [CLI] workspace list /
22+
[PROCESS_KILLED]
23+
24+
Exit code: [KILLED]
25+
Workspace attempt 1 done
26+
27+
>>> [CLI] workspace list /
28+
ID Type Language Path
29+
Workspace attempt 2 done - success!
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Test pattern 1: /Me endpoint (kills first 2, then allows)
2+
trace errcode $CLI current-user me
3+
echo "Me attempt 1 done"
4+
5+
trace errcode $CLI current-user me
6+
echo "Me attempt 2 done"
7+
8+
trace $CLI current-user me
9+
echo "Me attempt 3 done - success!"
10+
11+
# Test pattern 2: /workspace/list endpoint (kills first 1, then allows)
12+
trace errcode $CLI workspace list /
13+
echo "Workspace attempt 1 done"
14+
15+
trace $CLI workspace list /
16+
echo "Workspace attempt 2 done - success!"
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Test that multiple patterns can have independent KillCaller counts
2+
# Pattern 1: Kill first 2 requests to /Me endpoint
3+
# Pattern 2: Kill first 1 request to /workspace/list endpoint
4+
5+
[[Server]]
6+
Pattern = "GET /api/2.0/preview/scim/v2/Me"
7+
KillCaller = 2
8+
Response.Body = '''
9+
{
10+
"id": "123",
11+
"userName": "[email protected]"
12+
}
13+
'''
14+
15+
[[Server]]
16+
Pattern = "GET /api/2.0/workspace/list"
17+
KillCaller = 1

0 commit comments

Comments
 (0)