Skip to content

Commit fa4cfca

Browse files
committed
libct: use pidfd and epoll to wait the init process exit
Signed-off-by: lifubang <[email protected]>
1 parent c3a41d7 commit fa4cfca

File tree

4 files changed

+95
-15
lines changed

4 files changed

+95
-15
lines changed

delete.go

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,16 @@ import (
55
"fmt"
66
"os"
77
"path/filepath"
8-
"time"
98

109
"github.com/opencontainers/runc/libcontainer"
1110
"github.com/urfave/cli"
12-
13-
"golang.org/x/sys/unix"
1411
)
1512

16-
func killContainer(container *libcontainer.Container) error {
17-
_ = container.Signal(unix.SIGKILL)
18-
for range 100 {
19-
time.Sleep(100 * time.Millisecond)
20-
if err := container.Signal(unix.Signal(0)); err != nil {
21-
return container.Destroy()
22-
}
13+
func killAndDestroy(container *libcontainer.Container) error {
14+
if err := container.EnsureKilled(); err != nil {
15+
return err
2316
}
24-
return errors.New("container init still running")
17+
return container.Destroy()
2518
}
2619

2720
var deleteCommand = cli.Command{
@@ -71,7 +64,7 @@ status of "ubuntu01" as "stopped" the following will delete resources held for
7164
// namespace) there may be some leftover processes in the
7265
// container's cgroup.
7366
if force {
74-
return killContainer(container)
67+
return killAndDestroy(container)
7568
}
7669
s, err := container.Status()
7770
if err != nil {
@@ -81,7 +74,7 @@ status of "ubuntu01" as "stopped" the following will delete resources held for
8174
case libcontainer.Stopped:
8275
return container.Destroy()
8376
case libcontainer.Created:
84-
return killContainer(container)
77+
return killAndDestroy(container)
8578
default:
8679
return fmt.Errorf("cannot delete container %s that is not stopped: %s", id, s)
8780
}

internal/linux/linux.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,14 @@ func Sendmsg(fd int, p, oob []byte, to unix.Sockaddr, flags int) error {
7272
})
7373
return os.NewSyscallError("sendmsg", err)
7474
}
75+
76+
// SetNonblock wraps [unix.SetNonblock].
77+
func EpollWait(epfd int, events []unix.EpollEvent, msec int) (n int, err error) {
78+
n, err = retryOnEINTR2(func() (int, error) {
79+
return unix.EpollWait(epfd, events, msec)
80+
})
81+
if err != nil {
82+
return 0, os.NewSyscallError("epollwait", err)
83+
}
84+
return n, nil
85+
}

libcontainer/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,9 @@ container.Resume()
230230
// send signal to container's init process.
231231
container.Signal(signal)
232232

233+
// send signal to container's init process and waits for the kernel to finish killing it.
234+
container.EnsureKilled()
235+
233236
// update container resource constraints.
234237
container.Set(config)
235238

libcontainer/container_linux.go

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"golang.org/x/sys/unix"
2222

2323
"github.com/opencontainers/cgroups"
24+
"github.com/opencontainers/runc/internal/linux"
2425
"github.com/opencontainers/runc/libcontainer/configs"
2526
"github.com/opencontainers/runc/libcontainer/exeseal"
2627
"github.com/opencontainers/runc/libcontainer/intelrdt"
@@ -377,9 +378,13 @@ func (c *Container) start(process *Process) (retErr error) {
377378

378379
// Signal sends a specified signal to container's init.
379380
//
380-
// When s is SIGKILL and the container does not have its own PID namespace, all
381-
// the container's processes are killed. In this scenario, the libcontainer
381+
// When s is SIGKILL:
382+
// 1. If the container does not have its own PID namespace, all the
383+
// container's processes are killed. In this scenario, the libcontainer
382384
// user may be required to implement a proper child reaper.
385+
// 2. Otherwise, we just send the SIGKILL signal to the init process,
386+
// but we don't wait for the init process to disappear. If you want to
387+
// wait, please use c.EnsureKilled instead.
383388
func (c *Container) Signal(s os.Signal) error {
384389
c.m.Lock()
385390
defer c.m.Unlock()
@@ -431,6 +436,74 @@ func (c *Container) signal(s os.Signal) error {
431436
return nil
432437
}
433438

439+
func (c *Container) killViaPidfd() error {
440+
pidfd, err := unix.PidfdOpen(c.initProcess.pid(), 0)
441+
if err != nil {
442+
return err
443+
}
444+
defer unix.Close(pidfd)
445+
446+
epollfd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC)
447+
if err != nil {
448+
return err
449+
}
450+
defer unix.Close(epollfd)
451+
452+
event := unix.EpollEvent{
453+
Events: unix.EPOLLIN,
454+
Fd: int32(pidfd),
455+
}
456+
if err := unix.EpollCtl(epollfd, unix.EPOLL_CTL_ADD, pidfd, &event); err != nil {
457+
return err
458+
}
459+
460+
if err := unix.PidfdSendSignal(pidfd, unix.SIGKILL, nil, 0); err != nil {
461+
return err
462+
}
463+
464+
events := make([]unix.EpollEvent, 1)
465+
// Set the timeout to 10s, the same as in kill below.
466+
n, err := linux.EpollWait(epollfd, events, 10000)
467+
if err != nil {
468+
return err
469+
}
470+
if n > 0 {
471+
for i := range n {
472+
event := events[i]
473+
if event.Fd == int32(pidfd) {
474+
return nil
475+
}
476+
}
477+
}
478+
return errors.New("container init still running")
479+
}
480+
481+
func (c *Container) kill() error {
482+
_ = c.Signal(unix.SIGKILL)
483+
for i := 0; i < 100; i++ {
484+
time.Sleep(100 * time.Millisecond)
485+
if err := c.Signal(unix.Signal(0)); err != nil {
486+
return nil
487+
}
488+
}
489+
return errors.New("container init still running")
490+
}
491+
492+
// EnsureKilled kills the container and waits for the kernel to finish killing it.
493+
func (c *Container) EnsureKilled() error {
494+
// When a container doesn't have a private pidns, we have to kill all processes
495+
// in the cgroup, it's more simpler to use `cgroup.kill` or `unix.Kill`.
496+
if c.config.Namespaces.IsPrivate(configs.NEWPID) {
497+
err := c.killViaPidfd()
498+
if err == nil {
499+
return nil
500+
}
501+
502+
logrus.Debugf("pidfd & epoll failed, falling back to unix.Signal: %v", err)
503+
}
504+
return c.kill()
505+
}
506+
434507
func (c *Container) createExecFifo() (retErr error) {
435508
rootuid, err := c.config.HostRootUID()
436509
if err != nil {

0 commit comments

Comments
 (0)