Skip to content

Commit 3187d2d

Browse files
authored
Merge pull request moby#3754 from coryb/issue-3751
runc worker: fix sigkill handling
2 parents 86c3b26 + 6073f58 commit 3187d2d

File tree

6 files changed

+282
-45
lines changed

6 files changed

+282
-45
lines changed

executor/runcexecutor/executor.go

Lines changed: 142 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"os"
88
"os/exec"
99
"path/filepath"
10+
"strconv"
1011
"sync"
1112
"syscall"
1213
"time"
@@ -429,25 +430,129 @@ func (s *forwardIO) Stderr() io.ReadCloser {
429430
return nil
430431
}
431432

432-
// procHandle is to track the os process so we can send signals to it.
433+
// newRuncProcKiller returns an abstraction for sending SIGKILL to the
434+
// process inside the container initiated from `runc run`.
435+
func newRunProcKiller(runC *runc.Runc, id string) procKiller {
436+
return procKiller{runC: runC, id: id}
437+
}
438+
439+
// newExecProcKiller returns an abstraction for sending SIGKILL to the
440+
// process inside the container initiated from `runc exec`.
441+
func newExecProcKiller(runC *runc.Runc, id string) (procKiller, error) {
442+
// for `runc exec` we need to create a pidfile and read it later to kill
443+
// the process
444+
tdir, err := os.MkdirTemp("", "runc")
445+
if err != nil {
446+
return procKiller{}, errors.Wrap(err, "failed to create directory for runc pidfile")
447+
}
448+
449+
return procKiller{
450+
runC: runC,
451+
id: id,
452+
pidfile: filepath.Join(tdir, "pidfile"),
453+
cleanup: func() {
454+
os.RemoveAll(tdir)
455+
},
456+
}, nil
457+
}
458+
459+
type procKiller struct {
460+
runC *runc.Runc
461+
id string
462+
pidfile string
463+
cleanup func()
464+
}
465+
466+
// Cleanup will delete any tmp files created for the pidfile allocation
467+
// if this killer was for a `runc exec` process.
468+
func (k procKiller) Cleanup() {
469+
if k.cleanup != nil {
470+
k.cleanup()
471+
}
472+
}
473+
474+
// Kill will send SIGKILL to the process running inside the container.
475+
// If the process was created by `runc run` then we will use `runc kill`,
476+
// otherwise for `runc exec` we will read the pid from a pidfile and then
477+
// send the signal directly that process.
478+
func (k procKiller) Kill(ctx context.Context) (err error) {
479+
bklog.G(ctx).Debugf("sending sigkill to process in container %s", k.id)
480+
defer func() {
481+
if err != nil {
482+
bklog.G(ctx).Errorf("failed to kill process in container id %s: %+v", k.id, err)
483+
}
484+
}()
485+
486+
// this timeout is generally a no-op, the Kill ctx should already have a
487+
// shorter timeout but here as a fail-safe for future refactoring.
488+
ctx, timeout := context.WithTimeout(ctx, 10*time.Second)
489+
defer timeout()
490+
491+
if k.pidfile == "" {
492+
// for `runc run` process we use `runc kill` to terminate the process
493+
return k.runC.Kill(ctx, k.id, int(syscall.SIGKILL), nil)
494+
}
495+
496+
// `runc exec` will write the pidfile a few milliseconds after we
497+
// get the runc pid via the startedCh, so we might need to retry until
498+
// it appears in the edge case where we want to kill a process
499+
// immediately after it was created.
500+
var pidData []byte
501+
for {
502+
pidData, err = os.ReadFile(k.pidfile)
503+
if err != nil {
504+
if os.IsNotExist(err) {
505+
select {
506+
case <-ctx.Done():
507+
return errors.New("context cancelled before runc wrote pidfile")
508+
case <-time.After(10 * time.Millisecond):
509+
continue
510+
}
511+
}
512+
return errors.Wrap(err, "failed to read pidfile from runc")
513+
}
514+
break
515+
}
516+
pid, err := strconv.Atoi(string(pidData))
517+
if err != nil {
518+
return errors.Wrap(err, "read invalid pid from pidfile")
519+
}
520+
process, err := os.FindProcess(pid)
521+
if err != nil {
522+
// error only possible on non-unix hosts
523+
return errors.Wrapf(err, "failed to find process for pid %d from pidfile", pid)
524+
}
525+
defer process.Release()
526+
return process.Signal(syscall.SIGKILL)
527+
}
528+
529+
// procHandle is to track the process so we can send signals to it
530+
// and handle graceful shutdown.
433531
type procHandle struct {
434-
Process *os.Process
435-
ready chan struct{}
436-
ended chan struct{}
437-
shutdown func()
532+
// this is for the runc process (not the process in-container)
533+
monitorProcess *os.Process
534+
ready chan struct{}
535+
ended chan struct{}
536+
shutdown func()
537+
// this this only used when the request context is canceled and we need
538+
// to kill the in-container process.
539+
killer procKiller
438540
}
439541

440542
// runcProcessHandle will create a procHandle that will be monitored, where
441-
// on ctx.Done the process will be killed. If the kill fails, then the cancel
442-
// will be called. This is to allow for runc to go through its normal shutdown
443-
// procedure if the ctx is canceled and to ensure there are no zombie processes
444-
// left by runc.
445-
func runcProcessHandle(ctx context.Context, id string) (*procHandle, context.Context) {
543+
// on ctx.Done the in-container process will receive a SIGKILL. The returned
544+
// context should be used for the go-runc.(Run|Exec) invocations. The returned
545+
// context will only be canceled in the case where the request context is
546+
// canceled and we are unable to send the SIGKILL to the in-container process.
547+
// The goal is to allow for runc to gracefully shutdown when the request context
548+
// is cancelled.
549+
func runcProcessHandle(ctx context.Context, killer procKiller) (*procHandle, context.Context) {
446550
runcCtx, cancel := context.WithCancel(context.Background())
447551
p := &procHandle{
448552
ready: make(chan struct{}),
449553
ended: make(chan struct{}),
450554
shutdown: cancel,
555+
killer: killer,
451556
}
452557
// preserve the logger on the context used for the runc process handling
453558
runcCtx = bklog.WithLogger(runcCtx, bklog.G(ctx))
@@ -464,8 +569,7 @@ func runcProcessHandle(ctx context.Context, id string) (*procHandle, context.Con
464569
select {
465570
case <-ctx.Done():
466571
killCtx, timeout := context.WithTimeout(context.Background(), 7*time.Second)
467-
if err := p.Process.Kill(); err != nil {
468-
bklog.G(ctx).Errorf("failed to kill runc %s: %+v", id, err)
572+
if err := p.killer.Kill(killCtx); err != nil {
469573
select {
470574
case <-killCtx.Done():
471575
timeout()
@@ -492,8 +596,8 @@ func runcProcessHandle(ctx context.Context, id string) (*procHandle, context.Con
492596
// Release will free resources with a procHandle.
493597
func (p *procHandle) Release() {
494598
close(p.ended)
495-
if p.Process != nil {
496-
p.Process.Release()
599+
if p.monitorProcess != nil {
600+
p.monitorProcess.Release()
497601
}
498602
}
499603

@@ -506,9 +610,9 @@ func (p *procHandle) Shutdown() {
506610
}
507611
}
508612

509-
// WaitForReady will wait until the Process has been populated or the
510-
// provided context was cancelled. This should be called before using
511-
// the Process field.
613+
// WaitForReady will wait until we have received the runc pid via the go-runc
614+
// Started channel, or until the request context is canceled. This should
615+
// return without errors before attempting to send signals to the runc process.
512616
func (p *procHandle) WaitForReady(ctx context.Context) error {
513617
select {
514618
case <-ctx.Done():
@@ -518,34 +622,36 @@ func (p *procHandle) WaitForReady(ctx context.Context) error {
518622
}
519623
}
520624

521-
// WaitForStart will record the pid reported by Runc via the channel.
522-
// We wait for up to 10s for the runc process to start. If the started
625+
// WaitForStart will record the runc pid reported by go-runc via the channel.
626+
// We wait for up to 10s for the runc pid to be reported. If the started
523627
// callback is non-nil it will be called after receiving the pid.
524628
func (p *procHandle) WaitForStart(ctx context.Context, startedCh <-chan int, started func()) error {
525629
startedCtx, timeout := context.WithTimeout(ctx, 10*time.Second)
526630
defer timeout()
527-
var err error
528631
select {
529632
case <-startedCtx.Done():
530-
return errors.New("runc started message never received")
531-
case pid, ok := <-startedCh:
633+
return errors.New("go-runc started message never received")
634+
case runcPid, ok := <-startedCh:
532635
if !ok {
533-
return errors.New("runc process failed to send pid")
636+
return errors.New("go-runc failed to send pid")
534637
}
535638
if started != nil {
536639
started()
537640
}
538-
p.Process, err = os.FindProcess(pid)
641+
var err error
642+
p.monitorProcess, err = os.FindProcess(runcPid)
539643
if err != nil {
540-
return errors.Wrapf(err, "unable to find runc process for pid %d", pid)
644+
// error only possible on non-unix hosts
645+
return errors.Wrapf(err, "failed to find runc process %d", runcPid)
541646
}
542647
close(p.ready)
543648
}
544649
return nil
545650
}
546651

547-
// handleSignals will wait until the runcProcess is ready then will
548-
// send each signal received on the channel to the process.
652+
// handleSignals will wait until the procHandle is ready then will
653+
// send each signal received on the channel to the runc process (not directly
654+
// to the in-container process)
549655
func handleSignals(ctx context.Context, runcProcess *procHandle, signals <-chan syscall.Signal) error {
550656
if signals == nil {
551657
return nil
@@ -559,8 +665,15 @@ func handleSignals(ctx context.Context, runcProcess *procHandle, signals <-chan
559665
case <-ctx.Done():
560666
return nil
561667
case sig := <-signals:
562-
err := runcProcess.Process.Signal(sig)
563-
if err != nil {
668+
if sig == syscall.SIGKILL {
669+
// never send SIGKILL directly to runc, it needs to go to the
670+
// process in-container
671+
if err := runcProcess.killer.Kill(ctx); err != nil {
672+
return err
673+
}
674+
continue
675+
}
676+
if err := runcProcess.monitorProcess.Signal(sig); err != nil {
564677
bklog.G(ctx).Errorf("failed to signal %s to process: %s", sig, err)
565678
return err
566679
}

executor/runcexecutor/executor_common.go

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88

99
runc "github.com/containerd/go-runc"
1010
"github.com/moby/buildkit/executor"
11+
"github.com/moby/buildkit/util/bklog"
1112
"github.com/opencontainers/runtime-spec/specs-go"
1213
"github.com/pkg/errors"
1314
"golang.org/x/sync/errgroup"
@@ -21,7 +22,8 @@ func (w *runcExecutor) run(ctx context.Context, id, bundle string, process execu
2122
if process.Meta.Tty {
2223
return unsupportedConsoleError
2324
}
24-
return w.commonCall(ctx, id, bundle, process, started, func(ctx context.Context, started chan<- int, io runc.IO) error {
25+
killer := newRunProcKiller(w.runc, id)
26+
return w.commonCall(ctx, id, bundle, process, started, killer, func(ctx context.Context, started chan<- int, io runc.IO, pidfile string) error {
2527
_, err := w.runc.Run(ctx, id, bundle, &runc.CreateOpts{
2628
NoPivot: w.noPivot,
2729
Started: started,
@@ -35,25 +37,37 @@ func (w *runcExecutor) exec(ctx context.Context, id, bundle string, specsProcess
3537
if process.Meta.Tty {
3638
return unsupportedConsoleError
3739
}
38-
return w.commonCall(ctx, id, bundle, process, started, func(ctx context.Context, started chan<- int, io runc.IO) error {
40+
41+
killer, err := newExecProcKiller(w.runc, id)
42+
if err != nil {
43+
return errors.Wrap(err, "failed to initialize process killer")
44+
}
45+
defer killer.Cleanup()
46+
47+
return w.commonCall(ctx, id, bundle, process, started, killer, func(ctx context.Context, started chan<- int, io runc.IO, pidfile string) error {
3948
return w.runc.Exec(ctx, id, *specsProcess, &runc.ExecOpts{
4049
Started: started,
4150
IO: io,
51+
PidFile: pidfile,
4252
})
4353
})
4454
}
4555

46-
type runcCall func(ctx context.Context, started chan<- int, io runc.IO) error
56+
type runcCall func(ctx context.Context, started chan<- int, io runc.IO, pidfile string) error
4757

4858
// commonCall is the common run/exec logic used for non-linux runtimes. A tty
4959
// is only supported for linux, so this really just handles signal propagation
5060
// to the started runc process.
51-
func (w *runcExecutor) commonCall(ctx context.Context, id, bundle string, process executor.ProcessInfo, started func(), call runcCall) error {
52-
runcProcess, ctx := runcProcessHandle(ctx, id)
61+
func (w *runcExecutor) commonCall(ctx context.Context, id, bundle string, process executor.ProcessInfo, started func(), killer procKiller, call runcCall) error {
62+
runcProcess, ctx := runcProcessHandle(ctx, killer)
5363
defer runcProcess.Release()
5464

5565
eg, ctx := errgroup.WithContext(ctx)
56-
defer eg.Wait()
66+
defer func() {
67+
if err := eg.Wait(); err != nil && !errors.Is(err, context.Canceled) {
68+
bklog.G(ctx).Errorf("runc process monitoring error: %s", err)
69+
}
70+
}()
5771
defer runcProcess.Shutdown()
5872

5973
startedCh := make(chan int, 1)
@@ -65,5 +79,5 @@ func (w *runcExecutor) commonCall(ctx context.Context, id, bundle string, proces
6579
return handleSignals(ctx, runcProcess, process.Signal)
6680
})
6781

68-
return call(ctx, startedCh, &forwardIO{stdin: process.Stdin, stdout: process.Stdout, stderr: process.Stderr})
82+
return call(ctx, startedCh, &forwardIO{stdin: process.Stdin, stdout: process.Stdout, stderr: process.Stderr}, killer.pidfile)
6983
}

executor/runcexecutor/executor_linux.go

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ func updateRuncFieldsForHostOS(runtime *runc.Runc) {
2222
}
2323

2424
func (w *runcExecutor) run(ctx context.Context, id, bundle string, process executor.ProcessInfo, started func()) error {
25-
return w.callWithIO(ctx, id, bundle, process, started, func(ctx context.Context, started chan<- int, io runc.IO) error {
25+
killer := newRunProcKiller(w.runc, id)
26+
return w.callWithIO(ctx, id, bundle, process, started, killer, func(ctx context.Context, started chan<- int, io runc.IO, pidfile string) error {
2627
_, err := w.runc.Run(ctx, id, bundle, &runc.CreateOpts{
2728
NoPivot: w.noPivot,
2829
Started: started,
@@ -33,22 +34,33 @@ func (w *runcExecutor) run(ctx context.Context, id, bundle string, process execu
3334
}
3435

3536
func (w *runcExecutor) exec(ctx context.Context, id, bundle string, specsProcess *specs.Process, process executor.ProcessInfo, started func()) error {
36-
return w.callWithIO(ctx, id, bundle, process, started, func(ctx context.Context, started chan<- int, io runc.IO) error {
37+
killer, err := newExecProcKiller(w.runc, id)
38+
if err != nil {
39+
return errors.Wrap(err, "failed to initialize process killer")
40+
}
41+
defer killer.Cleanup()
42+
43+
return w.callWithIO(ctx, id, bundle, process, started, killer, func(ctx context.Context, started chan<- int, io runc.IO, pidfile string) error {
3744
return w.runc.Exec(ctx, id, *specsProcess, &runc.ExecOpts{
3845
Started: started,
3946
IO: io,
47+
PidFile: pidfile,
4048
})
4149
})
4250
}
4351

44-
type runcCall func(ctx context.Context, started chan<- int, io runc.IO) error
52+
type runcCall func(ctx context.Context, started chan<- int, io runc.IO, pidfile string) error
4553

46-
func (w *runcExecutor) callWithIO(ctx context.Context, id, bundle string, process executor.ProcessInfo, started func(), call runcCall) error {
47-
runcProcess, ctx := runcProcessHandle(ctx, id)
54+
func (w *runcExecutor) callWithIO(ctx context.Context, id, bundle string, process executor.ProcessInfo, started func(), killer procKiller, call runcCall) error {
55+
runcProcess, ctx := runcProcessHandle(ctx, killer)
4856
defer runcProcess.Release()
4957

5058
eg, ctx := errgroup.WithContext(ctx)
51-
defer eg.Wait()
59+
defer func() {
60+
if err := eg.Wait(); err != nil && !errors.Is(err, context.Canceled) {
61+
bklog.G(ctx).Errorf("runc process monitoring error: %s", err)
62+
}
63+
}()
5264
defer runcProcess.Shutdown()
5365

5466
startedCh := make(chan int, 1)
@@ -61,7 +73,7 @@ func (w *runcExecutor) callWithIO(ctx context.Context, id, bundle string, proces
6173
})
6274

6375
if !process.Meta.Tty {
64-
return call(ctx, startedCh, &forwardIO{stdin: process.Stdin, stdout: process.Stdout, stderr: process.Stderr})
76+
return call(ctx, startedCh, &forwardIO{stdin: process.Stdin, stdout: process.Stdout, stderr: process.Stderr}, killer.pidfile)
6577
}
6678

6779
ptm, ptsName, err := console.NewPty()
@@ -132,7 +144,9 @@ func (w *runcExecutor) callWithIO(ctx context.Context, id, bundle string, proces
132144
if err != nil {
133145
bklog.G(ctx).Errorf("failed to resize ptm: %s", err)
134146
}
135-
err = runcProcess.Process.Signal(signal.SIGWINCH)
147+
// SIGWINCH must be sent to the runc monitor process, as
148+
// terminal resizing is done in runc.
149+
err = runcProcess.monitorProcess.Signal(signal.SIGWINCH)
136150
if err != nil {
137151
bklog.G(ctx).Errorf("failed to send SIGWINCH to process: %s", err)
138152
}
@@ -151,5 +165,5 @@ func (w *runcExecutor) callWithIO(ctx context.Context, id, bundle string, proces
151165
runcIO.stderr = pts
152166
}
153167

154-
return call(ctx, startedCh, runcIO)
168+
return call(ctx, startedCh, runcIO, killer.pidfile)
155169
}

0 commit comments

Comments
 (0)