Skip to content

Commit 4cd82d6

Browse files
committed
test: add Debian 12 exec test to catch init script issues
This test would have caught the bug fixed in this PR: - VM crashing (kernel panic) when the entrypoint exits immediately, killing exec-agent before any exec commands can connect The test: - Uses debian:12-slim (minimal image) - Waits for exec-agent to start (not the app, which exits immediately) - Verifies exec commands work even after the main app exited - Confirms we're running on Debian 12 (bookworm) Also fixes pre-existing bug in api_test.go where NewManager was missing the volumeManager argument.
1 parent 4f20f52 commit 4cd82d6

File tree

2 files changed

+182
-15
lines changed

2 files changed

+182
-15
lines changed

cmd/api/api/api_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ func newTestService(t *testing.T) *ApiService {
3030

3131
systemMgr := system.NewManager(p)
3232
networkMgr := network.NewManager(p, cfg)
33-
maxOverlaySize := int64(100 * 1024 * 1024 * 1024) // 100GB for tests
34-
instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, maxOverlaySize)
3533
volumeMgr := volumes.NewManager(p)
34+
maxOverlaySize := int64(100 * 1024 * 1024 * 1024) // 100GB for tests
35+
instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, volumeMgr, maxOverlaySize)
3636

3737
// Register cleanup for orphaned Cloud Hypervisor processes
3838
t.Cleanup(func() {

cmd/api/api/exec_test.go

Lines changed: 180 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ func TestExecInstanceNonTTY(t *testing.T) {
6363
Name: "docker.io/library/nginx:alpine",
6464
})
6565
require.NoError(t, err)
66-
66+
6767
img, ok := imgResp.(oapi.GetImage200JSONResponse)
6868
if ok && img.Status == "ready" {
6969
imageReady = true
@@ -82,8 +82,7 @@ func TestExecInstanceNonTTY(t *testing.T) {
8282
Name: "exec-test",
8383
Image: "docker.io/library/nginx:alpine",
8484
Network: &struct {
85-
Enabled *bool `json:"enabled,omitempty"`
86-
Name *string `json:"name,omitempty"`
85+
Enabled *bool `json:"enabled,omitempty"`
8786
}{
8887
Enabled: &networkDisabled,
8988
},
@@ -108,8 +107,8 @@ func TestExecInstanceNonTTY(t *testing.T) {
108107
case <-nginxTimeout:
109108
t.Fatal("Timeout waiting for nginx to start")
110109
case <-nginxTicker.C:
111-
logs, err := svc.InstanceManager.GetInstanceLogs(ctx(), inst.Id, false, 100)
112-
if err == nil && strings.Contains(logs, "start worker processes") {
110+
logs := collectTestLogs(t, svc, inst.Id, 100)
111+
if strings.Contains(logs, "start worker processes") {
113112
nginxReady = true
114113
t.Log("Nginx is ready")
115114
}
@@ -132,7 +131,7 @@ func TestExecInstanceNonTTY(t *testing.T) {
132131
consolePath := paths.New(svc.Config.DataDir).InstanceConsoleLog(inst.Id)
133132
if consoleData, err := os.ReadFile(consolePath); err == nil {
134133
lines := strings.Split(string(consoleData), "\n")
135-
134+
136135
// Print exec-agent specific logs
137136
t.Logf("=== Exec Agent Logs ===")
138137
for _, line := range lines {
@@ -155,40 +154,39 @@ func TestExecInstanceNonTTY(t *testing.T) {
155154
var exit *exec.ExitStatus
156155
var stdout, stderr outputBuffer
157156
var execErr error
158-
157+
159158
t.Log("Testing exec command: whoami")
160159
maxRetries := 10
161160
for i := 0; i < maxRetries; i++ {
162161
stdout = outputBuffer{}
163162
stderr = outputBuffer{}
164-
163+
165164
exit, execErr = exec.ExecIntoInstance(ctx(), actualInst.VsockSocket, exec.ExecOptions{
166165
Command: []string{"/bin/sh", "-c", "whoami"},
167166
Stdin: nil,
168167
Stdout: &stdout,
169168
Stderr: &stderr,
170169
TTY: false,
171170
})
172-
171+
173172
if execErr == nil {
174173
break
175174
}
176-
175+
177176
t.Logf("Exec attempt %d/%d failed, retrying: %v", i+1, maxRetries, execErr)
178177
time.Sleep(1 * time.Second)
179178
}
180-
179+
181180
// Assert exec worked
182181
require.NoError(t, execErr, "exec should succeed after retries")
183182
require.NotNil(t, exit, "exit status should be returned")
184183
require.Equal(t, 0, exit.Code, "whoami should exit with code 0")
185-
186184

187185
// Verify output
188186
outStr := stdout.String()
189187
t.Logf("Command output: %q", outStr)
190188
require.Contains(t, outStr, "root", "whoami should return root user")
191-
189+
192190
// Cleanup
193191
t.Log("Cleaning up instance...")
194192
delResp, err := svc.DeleteInstance(ctx(), oapi.DeleteInstanceRequestObject{
@@ -199,6 +197,175 @@ func TestExecInstanceNonTTY(t *testing.T) {
199197
require.True(t, ok, "expected 204 response")
200198
}
201199

200+
// TestExecWithDebianMinimal tests exec with a minimal Debian image.
201+
// This test specifically catches issues that wouldn't appear with Alpine-based images:
202+
// 1. Debian's default entrypoint (bash) exits immediately without a TTY
203+
// 2. exec-agent must keep running even after the main app exits
204+
// 3. The VM must not kernel panic when the entrypoint exits
205+
func TestExecWithDebianMinimal(t *testing.T) {
206+
// Require KVM access for VM creation
207+
if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) {
208+
t.Fatal("/dev/kvm not available - ensure KVM is enabled and user is in 'kvm' group (sudo usermod -aG kvm $USER)")
209+
}
210+
211+
if testing.Short() {
212+
t.Skip("Skipping integration test in short mode")
213+
}
214+
215+
svc := newTestService(t)
216+
217+
// Ensure system files (kernel and initrd) are available
218+
t.Log("Ensuring system files...")
219+
systemMgr := system.NewManager(paths.New(svc.Config.DataDir))
220+
err := systemMgr.EnsureSystemFiles(ctx())
221+
require.NoError(t, err)
222+
t.Log("System files ready")
223+
224+
// Create Debian 12 slim image (minimal, no iproute2)
225+
t.Log("Creating debian:12-slim image...")
226+
imgResp, err := svc.CreateImage(ctx(), oapi.CreateImageRequestObject{
227+
Body: &oapi.CreateImageRequest{
228+
Name: "docker.io/library/debian:12-slim",
229+
},
230+
})
231+
require.NoError(t, err)
232+
imgCreated, ok := imgResp.(oapi.CreateImage202JSONResponse)
233+
require.True(t, ok, "expected 202 response")
234+
assert.Equal(t, "docker.io/library/debian:12-slim", imgCreated.Name)
235+
236+
// Wait for image to be ready
237+
t.Log("Waiting for image to be ready...")
238+
timeout := time.After(60 * time.Second)
239+
ticker := time.NewTicker(1 * time.Second)
240+
defer ticker.Stop()
241+
242+
imageReady := false
243+
for !imageReady {
244+
select {
245+
case <-timeout:
246+
t.Fatal("Timeout waiting for image to be ready")
247+
case <-ticker.C:
248+
imgResp, err := svc.GetImage(ctx(), oapi.GetImageRequestObject{
249+
Name: "docker.io/library/debian:12-slim",
250+
})
251+
require.NoError(t, err)
252+
253+
img, ok := imgResp.(oapi.GetImage200JSONResponse)
254+
if ok && img.Status == "ready" {
255+
imageReady = true
256+
t.Log("Image is ready")
257+
} else if ok {
258+
t.Logf("Image status: %s", img.Status)
259+
}
260+
}
261+
}
262+
263+
// Create instance (network disabled in test environment)
264+
t.Log("Creating Debian instance...")
265+
networkDisabled := false
266+
instResp, err := svc.CreateInstance(ctx(), oapi.CreateInstanceRequestObject{
267+
Body: &oapi.CreateInstanceRequest{
268+
Name: "debian-exec-test",
269+
Image: "docker.io/library/debian:12-slim",
270+
Network: &struct {
271+
Enabled *bool `json:"enabled,omitempty"`
272+
}{
273+
Enabled: &networkDisabled,
274+
},
275+
},
276+
})
277+
require.NoError(t, err)
278+
279+
inst, ok := instResp.(oapi.CreateInstance201JSONResponse)
280+
require.True(t, ok, "expected 201 response")
281+
require.NotEmpty(t, inst.Id)
282+
t.Logf("Instance created: %s", inst.Id)
283+
284+
// Cleanup on exit
285+
t.Cleanup(func() {
286+
t.Log("Cleaning up instance...")
287+
svc.DeleteInstance(ctx(), oapi.DeleteInstanceRequestObject{Id: inst.Id})
288+
})
289+
290+
// Get actual instance to access vsock fields
291+
actualInst, err := svc.InstanceManager.GetInstance(ctx(), inst.Id)
292+
require.NoError(t, err)
293+
require.NotNil(t, actualInst)
294+
295+
// Wait for exec-agent to be ready by checking logs
296+
// This is the key difference: we wait for exec-agent, not the app (which exits immediately)
297+
t.Log("Waiting for exec-agent to start...")
298+
execAgentReady := false
299+
agentTimeout := time.After(15 * time.Second)
300+
agentTicker := time.NewTicker(500 * time.Millisecond)
301+
defer agentTicker.Stop()
302+
303+
var logs string
304+
for !execAgentReady {
305+
select {
306+
case <-agentTimeout:
307+
// Dump logs on failure for debugging
308+
logs = collectTestLogs(t, svc, inst.Id, 200)
309+
t.Logf("Console logs:\n%s", logs)
310+
t.Fatal("Timeout waiting for exec-agent to start")
311+
case <-agentTicker.C:
312+
logs = collectTestLogs(t, svc, inst.Id, 100)
313+
if strings.Contains(logs, "[exec-agent] listening on vsock port 2222") {
314+
execAgentReady = true
315+
t.Log("exec-agent is ready")
316+
}
317+
}
318+
}
319+
320+
// Verify the app exited but VM is still usable (key behavior this test validates)
321+
logs = collectTestLogs(t, svc, inst.Id, 200)
322+
assert.Contains(t, logs, "overlay-init: app exited with code", "App should have exited")
323+
324+
// Test exec commands work even though the main app (bash) has exited
325+
t.Log("Testing exec command: echo")
326+
var stdout, stderr outputBuffer
327+
exit, err := exec.ExecIntoInstance(ctx(), actualInst.VsockSocket, exec.ExecOptions{
328+
Command: []string{"echo", "hello from debian"},
329+
Stdout: &stdout,
330+
Stderr: &stderr,
331+
TTY: false,
332+
})
333+
require.NoError(t, err, "exec should succeed")
334+
require.NotNil(t, exit)
335+
require.Equal(t, 0, exit.Code, "echo should exit with code 0")
336+
assert.Contains(t, stdout.String(), "hello from debian")
337+
338+
// Verify we're actually in Debian
339+
t.Log("Verifying OS release...")
340+
stdout = outputBuffer{}
341+
exit, err = exec.ExecIntoInstance(ctx(), actualInst.VsockSocket, exec.ExecOptions{
342+
Command: []string{"cat", "/etc/os-release"},
343+
Stdout: &stdout,
344+
TTY: false,
345+
})
346+
require.NoError(t, err)
347+
require.Equal(t, 0, exit.Code)
348+
assert.Contains(t, stdout.String(), "Debian", "Should be running Debian")
349+
assert.Contains(t, stdout.String(), "bookworm", "Should be Debian 12 (bookworm)")
350+
t.Logf("OS: %s", strings.Split(stdout.String(), "\n")[0])
351+
352+
}
353+
354+
// collectTestLogs collects logs from an instance (non-streaming)
355+
func collectTestLogs(t *testing.T, svc *ApiService, instanceID string, n int) string {
356+
logChan, err := svc.InstanceManager.StreamInstanceLogs(ctx(), instanceID, n, false)
357+
if err != nil {
358+
return ""
359+
}
360+
361+
var lines []string
362+
for line := range logChan {
363+
lines = append(lines, line)
364+
}
365+
366+
return strings.Join(lines, "\n")
367+
}
368+
202369
// outputBuffer is a simple buffer for capturing exec output
203370
type outputBuffer struct {
204371
buf bytes.Buffer

0 commit comments

Comments
 (0)