@@ -21,6 +21,7 @@ import (
2121 "golang.org/x/sys/unix"
2222
2323 "github.com/opencontainers/cgroups"
24+ "github.com/opencontainers/runc/internal/linux"
2425 "github.com/opencontainers/runc/libcontainer/configs"
2526 "github.com/opencontainers/runc/libcontainer/exeseal"
2627 "github.com/opencontainers/runc/libcontainer/intelrdt"
@@ -377,9 +378,13 @@ func (c *Container) start(process *Process) (retErr error) {
377378
378379// Signal sends a specified signal to container's init.
379380//
380- // When s is SIGKILL and the container does not have its own PID namespace, all
381- // the container's processes are killed. In this scenario, the libcontainer
381+ // When s is SIGKILL:
382+ // 1. If the container does not have its own PID namespace, all the
383+ // container's processes are killed. In this scenario, the libcontainer
382384// user may be required to implement a proper child reaper.
385+ // 2. Otherwise, we just send the SIGKILL signal to the init process,
386+ // but we don't wait for the init process to disappear. If you want to
387+ // wait, please use c.EnsureKilled instead.
383388func (c * Container ) Signal (s os.Signal ) error {
384389 c .m .Lock ()
385390 defer c .m .Unlock ()
@@ -431,6 +436,82 @@ func (c *Container) signal(s os.Signal) error {
431436 return nil
432437}
433438
439+ func (c * Container ) killViaPidfd () error {
440+ c .m .Lock ()
441+ defer c .m .Unlock ()
442+
443+ // To avoid a PID reuse attack, don't kill non-running container.
444+ if ! c .hasInit () {
445+ return ErrNotRunning
446+ }
447+
448+ pidfd , err := unix .PidfdOpen (c .initProcess .pid (), 0 )
449+ if err != nil {
450+ return err
451+ }
452+ defer unix .Close (pidfd )
453+
454+ epollfd , err := unix .EpollCreate1 (unix .EPOLL_CLOEXEC )
455+ if err != nil {
456+ return err
457+ }
458+ defer unix .Close (epollfd )
459+
460+ event := unix.EpollEvent {
461+ Events : unix .EPOLLIN ,
462+ Fd : int32 (pidfd ),
463+ }
464+ if err := unix .EpollCtl (epollfd , unix .EPOLL_CTL_ADD , pidfd , & event ); err != nil {
465+ return err
466+ }
467+
468+ if err := unix .PidfdSendSignal (pidfd , unix .SIGKILL , nil , 0 ); err != nil {
469+ return err
470+ }
471+
472+ events := make ([]unix.EpollEvent , 1 )
473+ // Set the timeout to 10s, the same as in kill below.
474+ n , err := linux .EpollWait (epollfd , events , 10000 )
475+ if err != nil {
476+ return err
477+ }
478+ if n > 0 {
479+ for i := range n {
480+ event := events [i ]
481+ if event .Fd == int32 (pidfd ) {
482+ return nil
483+ }
484+ }
485+ }
486+ return errors .New ("container init still running" )
487+ }
488+
489+ func (c * Container ) kill () error {
490+ _ = c .Signal (unix .SIGKILL )
491+ for i := 0 ; i < 100 ; i ++ {
492+ time .Sleep (100 * time .Millisecond )
493+ if err := c .Signal (unix .Signal (0 )); err != nil {
494+ return nil
495+ }
496+ }
497+ return errors .New ("container init still running" )
498+ }
499+
500+ // EnsureKilled kills the container and waits for the kernel to finish killing it.
501+ func (c * Container ) EnsureKilled () error {
502+ // When a container doesn't have a private pidns, we have to kill all processes
503+ // in the cgroup, it's more simpler to use `cgroup.kill` or `unix.Kill`.
504+ if c .config .Namespaces .IsPrivate (configs .NEWPID ) {
505+ var err error
506+ if err = c .killViaPidfd (); err == nil {
507+ return nil
508+ }
509+
510+ logrus .Debugf ("pidfd & epoll failed, falling back to unix.Signal: %v" , err )
511+ }
512+ return c .kill ()
513+ }
514+
434515func (c * Container ) createExecFifo () (retErr error ) {
435516 rootuid , err := c .config .HostRootUID ()
436517 if err != nil {
0 commit comments