Skip to content

Commit c26c185

Browse files
committed
machine: make Wait() safe for multiple calls
Prior to this change, multiple calls to Wait() could result in the error being lost, and for subsequent callers to wait until their context was canceled rather than waiting for the VMM to exit. This change makes it such that multiple callers of Wait() will all block for the VMM exit (or their individual context cancellation) and will receive the same error (for non-canceled contexts). Signed-off-by: Samuel Karp <[email protected]>
1 parent aea8573 commit c26c185

File tree

1 file changed

+24
-9
lines changed

1 file changed

+24
-9
lines changed

machine.go

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,12 @@ type Machine struct {
148148
cmd *exec.Cmd
149149
logger *log.Entry
150150
machineConfig models.MachineConfiguration // The actual machine config as reported by Firecracker
151-
errCh chan error
152-
startOnce sync.Once
151+
// startOnce ensures that the machine can only be started once
152+
startOnce sync.Once
153+
// exitCh is a channel which gets closed when the VMM exits
154+
exitCh chan struct{}
155+
// err records any error executing the VMM
156+
err error
153157
}
154158

155159
// Logger returns a logrus logger appropriate for logging hypervisor messages
@@ -201,7 +205,9 @@ func NewMachine(ctx context.Context, cfg Config, opts ...Opt) (*Machine, error)
201205
return nil, err
202206
}
203207

204-
m := &Machine{}
208+
m := &Machine{
209+
exitCh: make(chan struct{}),
210+
}
205211
logger := log.New()
206212

207213
if cfg.Debug {
@@ -251,13 +257,15 @@ func (m *Machine) Start(ctx context.Context) error {
251257
return m.startInstance(ctx)
252258
}
253259

254-
// Wait will wait until the firecracker process has finished
260+
// Wait will wait until the firecracker process has finished. Wait is safe to
261+
// call concurrently, and will deliver the same error to all callers, subject to
262+
// each caller's context cancellation.
255263
func (m *Machine) Wait(ctx context.Context) error {
256264
select {
257265
case <-ctx.Done():
258266
return ctx.Err()
259-
case err := <-m.errCh:
260-
return err
267+
case <-m.exitCh:
268+
return m.err
261269
}
262270
}
263271

@@ -297,11 +305,12 @@ func (m *Machine) attachDrives(ctx context.Context, drives ...models.Drive) erro
297305
func (m *Machine) startVMM(ctx context.Context) error {
298306
m.logger.Printf("Called startVMM(), setting up a VMM on %s", m.cfg.SocketPath)
299307

300-
m.errCh = make(chan error)
308+
errCh := make(chan error)
301309

302310
err := m.cmd.Start()
303311
if err != nil {
304312
m.logger.Errorf("Failed to start VMM: %s", err)
313+
close(m.exitCh)
305314
return err
306315
}
307316
m.logger.Debugf("VMM started socket path is %s", m.cfg.SocketPath)
@@ -316,7 +325,7 @@ func (m *Machine) startVMM(ctx context.Context) error {
316325
os.Remove(m.cfg.SocketPath)
317326
os.Remove(m.cfg.LogFifo)
318327
os.Remove(m.cfg.MetricsFifo)
319-
m.errCh <- err
328+
errCh <- err
320329
}()
321330

322331
// Set up a signal handler and pass INT, QUIT, and TERM through to firecracker
@@ -334,12 +343,18 @@ func (m *Machine) startVMM(ctx context.Context) error {
334343
}()
335344

336345
// Wait for firecracker to initialize:
337-
err = m.waitForSocket(3*time.Second, m.errCh)
346+
err = m.waitForSocket(3*time.Second, errCh)
338347
if err != nil {
339348
msg := fmt.Sprintf("Firecracker did not create API socket %s: %s", m.cfg.SocketPath, err)
340349
err = errors.New(msg)
350+
close(m.exitCh)
341351
return err
342352
}
353+
go func() {
354+
err := <-errCh
355+
m.err = err
356+
close(m.exitCh)
357+
}()
343358

344359
m.logger.Debugf("returning from startVMM()")
345360
return nil

0 commit comments

Comments
 (0)