Skip to content

Commit 6999211

Browse files
authored
Merge pull request #153 from kzys/exit-error
Report its exit status from Wait() correctly
2 parents ef87884 + c32330d commit 6999211

File tree

3 files changed

+135
-13
lines changed

3 files changed

+135
-13
lines changed

machine.go

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,8 @@ type Machine struct {
173173
startOnce sync.Once
174174
// exitCh is a channel which gets closed when the VMM exits
175175
exitCh chan struct{}
176-
// err records any error executing the VMM
177-
err error
176+
// fatalErr records an error that either stops or prevent starting the VMM
177+
fatalErr error
178178

179179
// callbacks that should be run when the machine is being torn down
180180
cleanupOnce sync.Once
@@ -349,7 +349,7 @@ func (m *Machine) Wait(ctx context.Context) error {
349349
case <-ctx.Done():
350350
return ctx.Err()
351351
case <-m.exitCh:
352-
return m.err
352+
return m.fatalErr
353353
}
354354
}
355355

@@ -443,7 +443,10 @@ func (m *Machine) startVMM(ctx context.Context) error {
443443

444444
if err != nil {
445445
m.logger.Errorf("Failed to start VMM: %s", err)
446+
447+
m.fatalErr = err
446448
close(m.exitCh)
449+
447450
return err
448451
}
449452
m.logger.Debugf("VMM started socket path is %s", m.Cfg.SocketPath)
@@ -459,17 +462,19 @@ func (m *Machine) startVMM(ctx context.Context) error {
459462

460463
errCh := make(chan error)
461464
go func() {
462-
if err := m.cmd.Wait(); err != nil {
463-
m.logger.Warnf("firecracker exited: %s", err.Error())
465+
waitErr := m.cmd.Wait()
466+
if waitErr != nil {
467+
m.logger.Warnf("firecracker exited: %s", waitErr.Error())
464468
} else {
465469
m.logger.Printf("firecracker exited: status=0")
466470
}
467471

468-
if err := m.doCleanup(); err != nil {
469-
m.logger.Errorf("failed to cleanup after VM exit: %v", err)
472+
cleanupErr := m.doCleanup()
473+
if cleanupErr != nil {
474+
m.logger.Errorf("failed to cleanup after VM exit: %v", cleanupErr)
470475
}
471476

472-
errCh <- err
477+
errCh <- multierror.Append(waitErr, cleanupErr).ErrorOrNil()
473478

474479
// Notify subscribers that there will be no more values.
475480
// When err is nil, two reads are performed (waitForSocket and close exitCh goroutine),
@@ -495,17 +500,18 @@ func (m *Machine) startVMM(ctx context.Context) error {
495500
// Wait for firecracker to initialize:
496501
err = m.waitForSocket(3*time.Second, errCh)
497502
if err != nil {
498-
msg := fmt.Sprintf("Firecracker did not create API socket %s: %s", m.Cfg.SocketPath, err)
499-
err = errors.New(msg)
503+
err = errors.Wrapf(err, "Firecracker did not create API socket %s", m.Cfg.SocketPath)
504+
m.fatalErr = err
500505
close(m.exitCh)
506+
501507
return err
502508
}
503509
go func() {
504510
select {
505511
case <-ctx.Done():
506-
m.err = ctx.Err()
512+
m.fatalErr = ctx.Err()
507513
case err := <-errCh:
508-
m.err = err
514+
m.fatalErr = err
509515
}
510516

511517
signal.Stop(sigchan)

machine_test.go

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,3 +1075,115 @@ func TestCaptureFifoToFile_leak(t *testing.T) {
10751075
expected := `io.Copy failed to copy contents of fifo pipe: read testdata/TestCaptureFifoToFileLeak.fifo: file already closed`
10761076
assert.Contains(t, loggerBuffer.String(), expected)
10771077
}
1078+
1079+
func TestWaitWithKill(t *testing.T) {
1080+
fctesting.RequiresRoot(t)
1081+
ctx := context.Background()
1082+
1083+
socketPath := filepath.Join(testDataPath, t.Name())
1084+
defer os.Remove(socketPath)
1085+
1086+
cfg := createValidConfig(t, socketPath)
1087+
cmd := VMCommandBuilder{}.
1088+
WithSocketPath(cfg.SocketPath).
1089+
WithBin(getFirecrackerBinaryPath()).
1090+
Build(ctx)
1091+
m, err := NewMachine(ctx, cfg, WithProcessRunner(cmd))
1092+
require.NoError(t, err)
1093+
1094+
err = m.Start(ctx)
1095+
require.NoError(t, err)
1096+
1097+
go func() {
1098+
pid, err := m.PID()
1099+
require.NoError(t, err)
1100+
1101+
process, err := os.FindProcess(pid)
1102+
require.NoError(t, err)
1103+
1104+
err = process.Kill()
1105+
require.NoError(t, err)
1106+
}()
1107+
1108+
err = m.Wait(ctx)
1109+
require.Error(t, err, "Firecracker was killed and it must be reported")
1110+
}
1111+
1112+
func TestWaitWithInvalidBinary(t *testing.T) {
1113+
ctx := context.Background()
1114+
1115+
socketPath := filepath.Join(testDataPath, t.Name())
1116+
defer os.Remove(socketPath)
1117+
1118+
cfg := createValidConfig(t, socketPath)
1119+
cmd := VMCommandBuilder{}.
1120+
WithSocketPath(socketPath).
1121+
WithBin("invalid-bin").
1122+
Build(ctx)
1123+
m, err := NewMachine(ctx, cfg, WithProcessRunner(cmd))
1124+
require.NoError(t, err)
1125+
1126+
ch := make(chan error)
1127+
1128+
go func() {
1129+
err := m.Wait(ctx)
1130+
require.Error(t, err, "Wait() reports an error")
1131+
ch <- err
1132+
}()
1133+
1134+
err = m.Start(ctx)
1135+
require.Error(t, err, "Start() reports an error")
1136+
1137+
select {
1138+
case errFromWait := <-ch:
1139+
require.Equal(t, errFromWait, err)
1140+
}
1141+
}
1142+
1143+
func TestWaitWithNoSocket(t *testing.T) {
1144+
ctx := context.Background()
1145+
1146+
socketPath := filepath.Join(testDataPath, t.Name())
1147+
defer os.Remove(socketPath)
1148+
cfg := createValidConfig(t, socketPath)
1149+
1150+
m, err := NewMachine(ctx, cfg, WithProcessRunner(exec.Command("sleep", "10")))
1151+
require.NoError(t, err)
1152+
1153+
ch := make(chan error)
1154+
1155+
go func() {
1156+
err := m.Wait(ctx)
1157+
require.Error(t, err, "Wait() reports an error")
1158+
ch <- err
1159+
}()
1160+
1161+
err = m.Start(ctx)
1162+
require.Error(t, err, "Start() reports an error")
1163+
1164+
select {
1165+
case errFromWait := <-ch:
1166+
require.Equal(t, errFromWait, err)
1167+
}
1168+
}
1169+
1170+
func createValidConfig(t *testing.T, socketPath string) Config {
1171+
return Config{
1172+
SocketPath: socketPath,
1173+
KernelImagePath: getVmlinuxPath(t),
1174+
MachineCfg: models.MachineConfiguration{
1175+
VcpuCount: Int64(2),
1176+
CPUTemplate: models.CPUTemplate(models.CPUTemplateT2),
1177+
MemSizeMib: Int64(256),
1178+
HtEnabled: Bool(false),
1179+
},
1180+
Drives: []models.Drive{
1181+
{
1182+
DriveID: String("root"),
1183+
IsRootDevice: Bool(true),
1184+
IsReadOnly: Bool(true),
1185+
PathOnHost: String(testRootfs),
1186+
},
1187+
},
1188+
}
1189+
}

network_test.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,11 @@ func TestNetworkMachineCNI(t *testing.T) {
316316

317317
require.NoError(t, m.StopVMM(), "failed to stop machine")
318318
waitCtx, waitCancel := context.WithTimeout(ctx, 3*time.Second)
319-
assert.NoError(t, m.Wait(waitCtx), "failed waiting for machine stop")
319+
320+
// Having an error is fine, since StopVM() kills a Firecracker process.
321+
// Shutdown() uses SendCtrAltDel action, which doesn't work with the kernel we are using here.
322+
// https://github.com/firecracker-microvm/firecracker/issues/1095
323+
assert.NotEqual(t, m.Wait(waitCtx), context.DeadlineExceeded, "failed waiting for machine stop")
320324
waitCancel()
321325

322326
_, err := os.Stat(expectedCacheDirPath)

0 commit comments

Comments
 (0)