Skip to content

Commit 1fe4ed9

Browse files
committed
libct: use pidfd and epoll to wait the init process exit
Signed-off-by: lfbzhm <[email protected]>
1 parent 08525df commit 1fe4ed9

File tree

3 files changed

+89
-15
lines changed

3 files changed

+89
-15
lines changed

delete.go

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,16 @@ import (
55
"fmt"
66
"os"
77
"path/filepath"
8-
"time"
98

109
"github.com/opencontainers/runc/libcontainer"
1110
"github.com/urfave/cli"
12-
13-
"golang.org/x/sys/unix"
1411
)
1512

16-
func killContainer(container *libcontainer.Container) error {
17-
_ = container.Signal(unix.SIGKILL)
18-
for i := 0; i < 100; i++ {
19-
time.Sleep(100 * time.Millisecond)
20-
if err := container.Signal(unix.Signal(0)); err != nil {
21-
return container.Destroy()
22-
}
13+
func killAndDestroy(container *libcontainer.Container) error {
14+
if err := container.KillAndWaitExit(); err != nil {
15+
return err
2316
}
24-
return errors.New("container init still running")
17+
return container.Destroy()
2518
}
2619

2720
var deleteCommand = cli.Command{
@@ -71,7 +64,7 @@ status of "ubuntu01" as "stopped" the following will delete resources held for
7164
// namespace) there may be some leftover processes in the
7265
// container's cgroup.
7366
if force {
74-
return killContainer(container)
67+
return killAndDestroy(container)
7568
}
7669
s, err := container.Status()
7770
if err != nil {
@@ -81,7 +74,7 @@ status of "ubuntu01" as "stopped" the following will delete resources held for
8174
case libcontainer.Stopped:
8275
return container.Destroy()
8376
case libcontainer.Created:
84-
return killContainer(container)
77+
return killAndDestroy(container)
8578
default:
8679
return fmt.Errorf("cannot delete container %s that is not stopped: %s", id, s)
8780
}

libcontainer/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,9 @@ container.Resume()
230230
// send signal to container's init process.
231231
container.Signal(signal)
232232

233+
// send signal to container's init process and wait it to exit.
234+
container.KillAndWaitExit(signal)
235+
233236
// update container resource constraints.
234237
container.Set(config)
235238

libcontainer/container_linux.go

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -369,9 +369,13 @@ func (c *Container) start(process *Process) (retErr error) {
369369

370370
// Signal sends a specified signal to container's init.
371371
//
372-
// When s is SIGKILL and the container does not have its own PID namespace, all
373-
// the container's processes are killed. In this scenario, the libcontainer
372+
// When s is SIGKILL:
373+
// 1. If the container does not have its own PID namespace, all the
374+
// container's processes are killed. In this scenario, the libcontainer
374375
// user may be required to implement a proper child reaper.
376+
// 2. Otherwise, we just send the SIGKILL signal to the init process,
377+
// but we don't wait the init process exit. If you want to wait it,
378+
// please use c.KillAndWaitExit instead.
375379
func (c *Container) Signal(s os.Signal) error {
376380
c.m.Lock()
377381
defer c.m.Unlock()
@@ -423,6 +427,80 @@ func (c *Container) signal(s os.Signal) error {
423427
return nil
424428
}
425429

430+
func (c *Container) killViaPidfd() error {
431+
pidfd, err := unix.PidfdOpen(c.initProcess.pid(), 0)
432+
if err != nil {
433+
return err
434+
}
435+
defer unix.Close(pidfd)
436+
437+
epollfd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC)
438+
if err != nil {
439+
return err
440+
}
441+
defer unix.Close(epollfd)
442+
443+
event := unix.EpollEvent{
444+
Events: unix.EPOLLIN,
445+
Fd: int32(pidfd),
446+
}
447+
if err := unix.EpollCtl(epollfd, unix.EPOLL_CTL_ADD, pidfd, &event); err != nil {
448+
return err
449+
}
450+
451+
// We don't need unix.PidfdSendSignal because go runtime will use it if possible.
452+
_ = c.Signal(unix.SIGKILL)
453+
454+
events := make([]unix.EpollEvent, 1)
455+
for {
456+
// Set the timeout to 10s, the same as in kill below.
457+
n, err := unix.EpollWait(epollfd, events, 10000)
458+
if err != nil {
459+
if err == unix.EINTR {
460+
continue
461+
}
462+
return err
463+
}
464+
465+
if n == 0 {
466+
return errors.New("container init still running")
467+
}
468+
469+
if n > 0 {
470+
event := events[0]
471+
if event.Fd == int32(pidfd) {
472+
return nil
473+
}
474+
}
475+
}
476+
}
477+
478+
func (c *Container) kill() error {
479+
_ = c.Signal(unix.SIGKILL)
480+
for i := 0; i < 100; i++ {
481+
time.Sleep(100 * time.Millisecond)
482+
if err := c.Signal(unix.Signal(0)); err != nil {
483+
return nil
484+
}
485+
}
486+
return errors.New("container init still running")
487+
}
488+
489+
// KillAndWaitExit kills the container and waits for the init process to exit.
490+
func (c *Container) KillAndWaitExit() error {
491+
// When a container doesn't have a private pidns, we have to kill all processes
492+
// in the cgroup, it's more simpler to use `cgroup.kill` or `unix.Kill`.
493+
if c.config.Namespaces.IsPrivate(configs.NEWPID) {
494+
err := c.killViaPidfd()
495+
if err == nil {
496+
return nil
497+
}
498+
499+
logrus.Debugf("pidfd & epoll failed, falling back to unix.Signal: %v", err)
500+
}
501+
return c.kill()
502+
}
503+
426504
func (c *Container) createExecFifo() (retErr error) {
427505
rootuid, err := c.Config().HostRootUID()
428506
if err != nil {

0 commit comments

Comments
 (0)