Skip to content

Commit 170e440

Browse files
authored
Merge pull request moby#3722 from coryb/runc-exec-zombie
fix process termination handling for runc exec
2 parents b5d3de7 + b76f8c0 commit 170e440

File tree

3 files changed

+91
-66
lines changed

3 files changed

+91
-66
lines changed

executor/runcexecutor/executor.go

Lines changed: 78 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ func New(opt Opt, networkProviders map[pb.NetMode]network.Provider) (executor.Ex
9292

9393
root := opt.Root
9494

95-
if err := os.MkdirAll(root, 0711); err != nil {
95+
if err := os.MkdirAll(root, 0o711); err != nil {
9696
return nil, errors.Wrapf(err, "failed to create %s", root)
9797
}
9898

@@ -205,7 +205,7 @@ func (w *runcExecutor) Run(ctx context.Context, id string, root executor.Mount,
205205
}
206206
bundle := filepath.Join(w.root, id)
207207

208-
if err := os.Mkdir(bundle, 0711); err != nil {
208+
if err := os.Mkdir(bundle, 0o711); err != nil {
209209
return err
210210
}
211211
defer os.RemoveAll(bundle)
@@ -216,7 +216,7 @@ func (w *runcExecutor) Run(ctx context.Context, id string, root executor.Mount,
216216
}
217217

218218
rootFSPath := filepath.Join(bundle, "rootfs")
219-
if err := idtools.MkdirAllAndChown(rootFSPath, 0700, identity); err != nil {
219+
if err := idtools.MkdirAllAndChown(rootFSPath, 0o700, identity); err != nil {
220220
return err
221221
}
222222
if err := mount.All(rootMount, rootFSPath); err != nil {
@@ -270,7 +270,7 @@ func (w *runcExecutor) Run(ctx context.Context, id string, root executor.Mount,
270270
return errors.Wrapf(err, "working dir %s points to invalid target", newp)
271271
}
272272
if _, err := os.Stat(newp); err != nil {
273-
if err := idtools.MkdirAllAndChown(newp, 0755, identity); err != nil {
273+
if err := idtools.MkdirAllAndChown(newp, 0o755, identity); err != nil {
274274
return errors.Wrapf(err, "failed to create working directory %s", newp)
275275
}
276276
}
@@ -287,50 +287,17 @@ func (w *runcExecutor) Run(ctx context.Context, id string, root executor.Mount,
287287
return err
288288
}
289289

290-
// runCtx/killCtx is used for extra check in case the kill command blocks
291-
runCtx, cancelRun := context.WithCancel(context.Background())
292-
defer cancelRun()
293-
294-
ended := make(chan struct{})
295-
go func() {
296-
for {
297-
select {
298-
case <-ctx.Done():
299-
killCtx, timeout := context.WithTimeout(context.Background(), 7*time.Second)
300-
if err := w.runc.Kill(killCtx, id, int(syscall.SIGKILL), nil); err != nil {
301-
bklog.G(ctx).Errorf("failed to kill runc %s: %+v", id, err)
302-
select {
303-
case <-killCtx.Done():
304-
timeout()
305-
cancelRun()
306-
return
307-
default:
308-
}
309-
}
310-
timeout()
311-
select {
312-
case <-time.After(50 * time.Millisecond):
313-
case <-ended:
314-
return
315-
}
316-
case <-ended:
317-
return
318-
}
319-
}
320-
}()
321-
322290
bklog.G(ctx).Debugf("> creating %s %v", id, meta.Args)
323291

324292
trace.SpanFromContext(ctx).AddEvent("Container created")
325-
err = w.run(runCtx, id, bundle, process, func() {
293+
err = w.run(ctx, id, bundle, process, func() {
326294
startedOnce.Do(func() {
327295
trace.SpanFromContext(ctx).AddEvent("Container started")
328296
if started != nil {
329297
close(started)
330298
}
331299
})
332300
})
333-
close(ended)
334301
return exitError(ctx, err)
335302
}
336303

@@ -462,23 +429,87 @@ func (s *forwardIO) Stderr() io.ReadCloser {
462429
return nil
463430
}
464431

465-
// startingProcess is to track the os process so we can send signals to it.
466-
type startingProcess struct {
467-
Process *os.Process
468-
ready chan struct{}
432+
// procHandle is to track the os process so we can send signals to it.
433+
type procHandle struct {
434+
Process *os.Process
435+
ready chan struct{}
436+
ended chan struct{}
437+
shutdown func()
469438
}
470439

471-
// Release will free resources with a startingProcess.
472-
func (p *startingProcess) Release() {
440+
// runcProcessHandle will create a procHandle that will be monitored, where
441+
// on ctx.Done the process will be killed. If the kill fails, then the cancel
442+
// will be called. This is to allow for runc to go through its normal shutdown
443+
// procedure if the ctx is canceled and to ensure there are no zombie processes
444+
// left by runc.
445+
func runcProcessHandle(ctx context.Context, id string) (*procHandle, context.Context) {
446+
runcCtx, cancel := context.WithCancel(context.Background())
447+
p := &procHandle{
448+
ready: make(chan struct{}),
449+
ended: make(chan struct{}),
450+
shutdown: cancel,
451+
}
452+
// preserve the logger on the context used for the runc process handling
453+
runcCtx = bklog.WithLogger(runcCtx, bklog.G(ctx))
454+
455+
go func() {
456+
// Wait for pid
457+
select {
458+
case <-ctx.Done():
459+
return // nothing to kill
460+
case <-p.ready:
461+
}
462+
463+
for {
464+
select {
465+
case <-ctx.Done():
466+
killCtx, timeout := context.WithTimeout(context.Background(), 7*time.Second)
467+
if err := p.Process.Kill(); err != nil {
468+
bklog.G(ctx).Errorf("failed to kill runc %s: %+v", id, err)
469+
select {
470+
case <-killCtx.Done():
471+
timeout()
472+
cancel()
473+
return
474+
default:
475+
}
476+
}
477+
timeout()
478+
select {
479+
case <-time.After(50 * time.Millisecond):
480+
case <-p.ended:
481+
return
482+
}
483+
case <-p.ended:
484+
return
485+
}
486+
}
487+
}()
488+
489+
return p, runcCtx
490+
}
491+
492+
// Release will free resources with a procHandle.
493+
func (p *procHandle) Release() {
494+
close(p.ended)
473495
if p.Process != nil {
474496
p.Process.Release()
475497
}
476498
}
477499

500+
// Shutdown should be called after the runc process has exited. This will allow
501+
// the signal handling and tty resize loops to exit, terminating the
502+
// goroutines.
503+
func (p *procHandle) Shutdown() {
504+
if p.shutdown != nil {
505+
p.shutdown()
506+
}
507+
}
508+
478509
// WaitForReady will wait until the Process has been populated or the
479510
// provided context was cancelled. This should be called before using
480511
// the Process field.
481-
func (p *startingProcess) WaitForReady(ctx context.Context) error {
512+
func (p *procHandle) WaitForReady(ctx context.Context) error {
482513
select {
483514
case <-ctx.Done():
484515
return ctx.Err()
@@ -490,7 +521,7 @@ func (p *startingProcess) WaitForReady(ctx context.Context) error {
490521
// WaitForStart will record the pid reported by Runc via the channel.
491522
// We wait for up to 10s for the runc process to start. If the started
492523
// callback is non-nil it will be called after receiving the pid.
493-
func (p *startingProcess) WaitForStart(ctx context.Context, startedCh <-chan int, started func()) error {
524+
func (p *procHandle) WaitForStart(ctx context.Context, startedCh <-chan int, started func()) error {
494525
startedCtx, timeout := context.WithTimeout(ctx, 10*time.Second)
495526
defer timeout()
496527
var err error
@@ -515,7 +546,7 @@ func (p *startingProcess) WaitForStart(ctx context.Context, startedCh <-chan int
515546

516547
// handleSignals will wait until the runcProcess is ready then will
517548
// send each signal received on the channel to the process.
518-
func handleSignals(ctx context.Context, runcProcess *startingProcess, signals <-chan syscall.Signal) error {
549+
func handleSignals(ctx context.Context, runcProcess *procHandle, signals <-chan syscall.Signal) error {
519550
if signals == nil {
520551
return nil
521552
}

executor/runcexecutor/executor_common.go

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,23 +49,20 @@ type runcCall func(ctx context.Context, started chan<- int, io runc.IO) error
4949
// is only supported for linux, so this really just handles signal propagation
5050
// to the started runc process.
5151
func (w *runcExecutor) commonCall(ctx context.Context, id, bundle string, process executor.ProcessInfo, started func(), call runcCall) error {
52-
runcProcess := &startingProcess{
53-
ready: make(chan struct{}),
54-
}
52+
runcProcess, ctx := runcProcessHandle(ctx, id)
5553
defer runcProcess.Release()
5654

57-
var eg errgroup.Group
58-
egCtx, cancel := context.WithCancel(ctx)
55+
eg, ctx := errgroup.WithContext(ctx)
5956
defer eg.Wait()
60-
defer cancel()
57+
defer runcProcess.Shutdown()
6158

6259
startedCh := make(chan int, 1)
6360
eg.Go(func() error {
64-
return runcProcess.WaitForStart(egCtx, startedCh, started)
61+
return runcProcess.WaitForStart(ctx, startedCh, started)
6562
})
6663

6764
eg.Go(func() error {
68-
return handleSignals(egCtx, runcProcess, process.Signal)
65+
return handleSignals(ctx, runcProcess, process.Signal)
6966
})
7067

7168
return call(ctx, startedCh, &forwardIO{stdin: process.Stdin, stdout: process.Stdout, stderr: process.Stderr})

executor/runcexecutor/executor_linux.go

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -44,23 +44,20 @@ func (w *runcExecutor) exec(ctx context.Context, id, bundle string, specsProcess
4444
type runcCall func(ctx context.Context, started chan<- int, io runc.IO) error
4545

4646
func (w *runcExecutor) callWithIO(ctx context.Context, id, bundle string, process executor.ProcessInfo, started func(), call runcCall) error {
47-
runcProcess := &startingProcess{
48-
ready: make(chan struct{}),
49-
}
47+
runcProcess, ctx := runcProcessHandle(ctx, id)
5048
defer runcProcess.Release()
5149

52-
var eg errgroup.Group
53-
egCtx, cancel := context.WithCancel(ctx)
50+
eg, ctx := errgroup.WithContext(ctx)
5451
defer eg.Wait()
55-
defer cancel()
52+
defer runcProcess.Shutdown()
5653

5754
startedCh := make(chan int, 1)
5855
eg.Go(func() error {
59-
return runcProcess.WaitForStart(egCtx, startedCh, started)
56+
return runcProcess.WaitForStart(ctx, startedCh, started)
6057
})
6158

6259
eg.Go(func() error {
63-
return handleSignals(egCtx, runcProcess, process.Signal)
60+
return handleSignals(ctx, runcProcess, process.Signal)
6461
})
6562

6663
if !process.Meta.Tty {
@@ -84,7 +81,7 @@ func (w *runcExecutor) callWithIO(ctx context.Context, id, bundle string, proces
8481
}
8582
pts.Close()
8683
ptm.Close()
87-
cancel() // this will shutdown resize and signal loops
84+
runcProcess.Shutdown()
8885
err := eg.Wait()
8986
if err != nil {
9087
bklog.G(ctx).Warningf("error while shutting down tty io: %s", err)
@@ -119,13 +116,13 @@ func (w *runcExecutor) callWithIO(ctx context.Context, id, bundle string, proces
119116
}
120117

121118
eg.Go(func() error {
122-
err := runcProcess.WaitForReady(egCtx)
119+
err := runcProcess.WaitForReady(ctx)
123120
if err != nil {
124121
return err
125122
}
126123
for {
127124
select {
128-
case <-egCtx.Done():
125+
case <-ctx.Done():
129126
return nil
130127
case resize := <-process.Resize:
131128
err = ptm.Resize(console.WinSize{

0 commit comments

Comments
 (0)