Skip to content

Commit c9019ea

Browse files
committed
merge #4102 into opencontainers/runc:main
Kir Kolyshkin (8): libct: replace runType with hasInit libct: Signal: slight refactor runc kill: fix sending KILL to non-pidns container runc delete -f: fix for no pidns + no init case libct/cg: improve cgroup removal logic runc delete: do not ignore error from destroy runc delete, container.Destroy: kill all processes libct: Destroy: don't proceed in case of errors LGTMs: lifubang cyphar
2 parents eb24430 + a6f4081 commit c9019ea

File tree

9 files changed

+244
-122
lines changed

9 files changed

+244
-122
lines changed

checkpoint.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,9 @@ checkpointed.`,
7070
err = container.Checkpoint(options)
7171
if err == nil && !(options.LeaveRunning || options.PreDump) {
7272
// Destroy the container unless we tell CRIU to keep it.
73-
destroy(container)
73+
if err := container.Destroy(); err != nil {
74+
logrus.Warn(err)
75+
}
7476
}
7577
return err
7678
},

delete.go

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@ func killContainer(container *libcontainer.Container) error {
1818
for i := 0; i < 100; i++ {
1919
time.Sleep(100 * time.Millisecond)
2020
if err := container.Signal(unix.Signal(0)); err != nil {
21-
destroy(container)
22-
return nil
21+
return container.Destroy()
2322
}
2423
}
2524
return errors.New("container init still running")
@@ -66,22 +65,25 @@ status of "ubuntu01" as "stopped" the following will delete resources held for
6665
}
6766
return err
6867
}
68+
// When --force is given, we kill all container processes and
69+
// then destroy the container. This is done even for a stopped
70+
// container, because (in case it does not have its own PID
71+
// namespace) there may be some leftover processes in the
72+
// container's cgroup.
73+
if force {
74+
return killContainer(container)
75+
}
6976
s, err := container.Status()
7077
if err != nil {
7178
return err
7279
}
7380
switch s {
7481
case libcontainer.Stopped:
75-
destroy(container)
82+
return container.Destroy()
7683
case libcontainer.Created:
7784
return killContainer(container)
7885
default:
79-
if force {
80-
return killContainer(container)
81-
}
8286
return fmt.Errorf("cannot delete container %s that is not stopped: %s", id, s)
8387
}
84-
85-
return nil
8688
},
8789
}

libcontainer/cgroups/utils.go

Lines changed: 32 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -217,79 +217,69 @@ func PathExists(path string) bool {
217217
return true
218218
}
219219

220-
func rmdir(path string) error {
220+
// rmdir tries to remove a directory, optionally retrying on EBUSY.
221+
func rmdir(path string, retry bool) error {
222+
delay := time.Millisecond
223+
tries := 10
224+
225+
again:
221226
err := unix.Rmdir(path)
222-
if err == nil || err == unix.ENOENT {
227+
switch err { // nolint:errorlint // unix errors are bare
228+
case nil, unix.ENOENT:
223229
return nil
230+
case unix.EINTR:
231+
goto again
232+
case unix.EBUSY:
233+
if retry && tries > 0 {
234+
time.Sleep(delay)
235+
delay *= 2
236+
tries--
237+
goto again
238+
239+
}
224240
}
225241
return &os.PathError{Op: "rmdir", Path: path, Err: err}
226242
}
227243

228244
// RemovePath aims to remove cgroup path. It does so recursively,
229245
// by removing any subdirectories (sub-cgroups) first.
230246
func RemovePath(path string) error {
231-
// try the fast path first
232-
if err := rmdir(path); err == nil {
247+
// Try the fast path first.
248+
if err := rmdir(path, false); err == nil {
233249
return nil
234250
}
235251

236252
infos, err := os.ReadDir(path)
237-
if err != nil {
238-
if os.IsNotExist(err) {
239-
err = nil
240-
}
253+
if err != nil && !os.IsNotExist(err) {
241254
return err
242255
}
243256
for _, info := range infos {
244257
if info.IsDir() {
245-
// We should remove subcgroups dir first
258+
// We should remove subcgroup first.
246259
if err = RemovePath(filepath.Join(path, info.Name())); err != nil {
247260
break
248261
}
249262
}
250263
}
251264
if err == nil {
252-
err = rmdir(path)
265+
err = rmdir(path, true)
253266
}
254267
return err
255268
}
256269

257270
// RemovePaths iterates over the provided paths removing them.
258-
// We trying to remove all paths five times with increasing delay between tries.
259-
// If after all there are not removed cgroups - appropriate error will be
260-
// returned.
261271
func RemovePaths(paths map[string]string) (err error) {
262-
const retries = 5
263-
delay := 10 * time.Millisecond
264-
for i := 0; i < retries; i++ {
265-
if i != 0 {
266-
time.Sleep(delay)
267-
delay *= 2
268-
}
269-
for s, p := range paths {
270-
if err := RemovePath(p); err != nil {
271-
// do not log intermediate iterations
272-
switch i {
273-
case 0:
274-
logrus.WithError(err).Warnf("Failed to remove cgroup (will retry)")
275-
case retries - 1:
276-
logrus.WithError(err).Error("Failed to remove cgroup")
277-
}
278-
}
279-
_, err := os.Stat(p)
280-
// We need this strange way of checking cgroups existence because
281-
// RemoveAll almost always returns error, even on already removed
282-
// cgroups
283-
if os.IsNotExist(err) {
284-
delete(paths, s)
285-
}
286-
}
287-
if len(paths) == 0 {
288-
//nolint:ineffassign,staticcheck // done to help garbage collecting: opencontainers/runc#2506
289-
paths = make(map[string]string)
290-
return nil
272+
for s, p := range paths {
273+
if err := RemovePath(p); err == nil {
274+
delete(paths, s)
291275
}
292276
}
277+
if len(paths) == 0 {
278+
//nolint:ineffassign,staticcheck // done to help garbage collecting: opencontainers/runc#2506
279+
// TODO: switch to clear once Go < 1.21 is not supported.
280+
paths = make(map[string]string)
281+
return nil
282+
}
293283
return fmt.Errorf("Failed to remove paths: %v", paths)
294284
}
295285

libcontainer/container_linux.go

Lines changed: 33 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ func (c *Container) ignoreCgroupError(err error) error {
115115
if err == nil {
116116
return nil
117117
}
118-
if errors.Is(err, os.ErrNotExist) && c.runType() == Stopped && !c.cgroupManager.Exists() {
118+
if errors.Is(err, os.ErrNotExist) && !c.hasInit() && !c.cgroupManager.Exists() {
119119
return nil
120120
}
121121
return err
@@ -364,37 +364,35 @@ func (c *Container) start(process *Process) (retErr error) {
364364
func (c *Container) Signal(s os.Signal) error {
365365
c.m.Lock()
366366
defer c.m.Unlock()
367-
status, err := c.currentStatus()
368-
if err != nil {
369-
return err
370-
}
371-
// To avoid a PID reuse attack, don't kill non-running container.
372-
switch status {
373-
case Running, Created, Paused:
374-
default:
375-
return ErrNotRunning
376-
}
377367

378368
// When a container has its own PID namespace, inside it the init PID
379369
// is 1, and thus it is handled specially by the kernel. In particular,
380370
// killing init with SIGKILL from an ancestor namespace will also kill
381371
// all other processes in that PID namespace (see pid_namespaces(7)).
382372
//
383373
// OTOH, if PID namespace is shared, we should kill all pids to avoid
384-
// leftover processes.
374+
// leftover processes. Handle this special case here.
385375
if s == unix.SIGKILL && !c.config.Namespaces.IsPrivate(configs.NEWPID) {
386-
err = signalAllProcesses(c.cgroupManager, unix.SIGKILL)
387-
} else {
388-
err = c.initProcess.signal(s)
376+
if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil {
377+
return fmt.Errorf("unable to kill all processes: %w", err)
378+
}
379+
return nil
389380
}
390-
if err != nil {
381+
382+
// To avoid a PID reuse attack, don't kill non-running container.
383+
if !c.hasInit() {
384+
return ErrNotRunning
385+
}
386+
if err := c.initProcess.signal(s); err != nil {
391387
return fmt.Errorf("unable to signal init: %w", err)
392388
}
393-
if status == Paused && s == unix.SIGKILL {
389+
if s == unix.SIGKILL {
394390
// For cgroup v1, killing a process in a frozen cgroup
395391
// does nothing until it's thawed. Only thaw the cgroup
396392
// for SIGKILL.
397-
_ = c.cgroupManager.Freeze(configs.Thawed)
393+
if paused, _ := c.isPaused(); paused {
394+
_ = c.cgroupManager.Freeze(configs.Thawed)
395+
}
398396
}
399397
return nil
400398
}
@@ -876,7 +874,10 @@ func (c *Container) newInitConfig(process *Process) *initConfig {
876874
func (c *Container) Destroy() error {
877875
c.m.Lock()
878876
defer c.m.Unlock()
879-
return c.state.destroy()
877+
if err := c.state.destroy(); err != nil {
878+
return fmt.Errorf("unable to destroy container: %w", err)
879+
}
880+
return nil
880881
}
881882

882883
// Pause pauses the container, if its state is RUNNING or CREATED, changing
@@ -1006,34 +1007,31 @@ func (c *Container) refreshState() error {
10061007
if paused {
10071008
return c.state.transition(&pausedState{c: c})
10081009
}
1009-
t := c.runType()
1010-
switch t {
1011-
case Created:
1010+
if !c.hasInit() {
1011+
return c.state.transition(&stoppedState{c: c})
1012+
}
1013+
// The presence of exec fifo helps to distinguish between
1014+
// the created and the running states.
1015+
if _, err := os.Stat(filepath.Join(c.stateDir, execFifoFilename)); err == nil {
10121016
return c.state.transition(&createdState{c: c})
1013-
case Running:
1014-
return c.state.transition(&runningState{c: c})
10151017
}
1016-
return c.state.transition(&stoppedState{c: c})
1018+
return c.state.transition(&runningState{c: c})
10171019
}
10181020

1019-
func (c *Container) runType() Status {
1021+
// hasInit tells whether the container init process exists.
1022+
func (c *Container) hasInit() bool {
10201023
if c.initProcess == nil {
1021-
return Stopped
1024+
return false
10221025
}
10231026
pid := c.initProcess.pid()
10241027
stat, err := system.Stat(pid)
10251028
if err != nil {
1026-
return Stopped
1029+
return false
10271030
}
10281031
if stat.StartTime != c.initProcessStartTime || stat.State == system.Zombie || stat.State == system.Dead {
1029-
return Stopped
1030-
}
1031-
// We'll create exec fifo and blocking on it after container is created,
1032-
// and delete it after start container.
1033-
if _, err := os.Stat(filepath.Join(c.stateDir, execFifoFilename)); err == nil {
1034-
return Created
1032+
return false
10351033
}
1036-
return Running
1034+
return true
10371035
}
10381036

10391037
func (c *Container) isPaused() (bool, error) {

libcontainer/init_linux.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,9 @@ func setupPersonality(config *configs.Config) error {
673673
// signalAllProcesses freezes then iterates over all the processes inside the
674674
// manager's cgroups sending the signal s to them.
675675
func signalAllProcesses(m cgroups.Manager, s unix.Signal) error {
676+
if !m.Exists() {
677+
return ErrNotRunning
678+
}
676679
// Use cgroup.kill, if available.
677680
if s == unix.SIGKILL {
678681
if p := m.Path(""); p != "" { // Either cgroup v2 or hybrid.

libcontainer/state_linux.go

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -35,19 +35,30 @@ type containerState interface {
3535
}
3636

3737
func destroy(c *Container) error {
38-
err := c.cgroupManager.Destroy()
38+
// Usually, when a container init is gone, all other processes in its
39+
// cgroup are killed by the kernel. This is not the case for a shared
40+
// PID namespace container, which may have some processes left after
41+
// its init is killed or exited.
42+
//
43+
// As the container without init process running is considered stopped,
44+
// and destroy is supposed to remove all the container resources, we need
45+
// to kill those processes here.
46+
if !c.config.Namespaces.IsPrivate(configs.NEWPID) {
47+
_ = signalAllProcesses(c.cgroupManager, unix.SIGKILL)
48+
}
49+
if err := c.cgroupManager.Destroy(); err != nil {
50+
return fmt.Errorf("unable to remove container's cgroup: %w", err)
51+
}
3952
if c.intelRdtManager != nil {
40-
if ierr := c.intelRdtManager.Destroy(); err == nil {
41-
err = ierr
53+
if err := c.intelRdtManager.Destroy(); err != nil {
54+
return fmt.Errorf("unable to remove container's IntelRDT group: %w", err)
4255
}
4356
}
44-
if rerr := os.RemoveAll(c.stateDir); err == nil {
45-
err = rerr
57+
if err := os.RemoveAll(c.stateDir); err != nil {
58+
return fmt.Errorf("unable to remove container state dir: %w", err)
4659
}
4760
c.initProcess = nil
48-
if herr := runPoststopHooks(c); err == nil {
49-
err = herr
50-
}
61+
err := runPoststopHooks(c)
5162
c.state = &stoppedState{c: c}
5263
return err
5364
}
@@ -103,7 +114,7 @@ func (r *runningState) status() Status {
103114
func (r *runningState) transition(s containerState) error {
104115
switch s.(type) {
105116
case *stoppedState:
106-
if r.c.runType() == Running {
117+
if r.c.hasInit() {
107118
return ErrRunning
108119
}
109120
r.c.state = s
@@ -118,7 +129,7 @@ func (r *runningState) transition(s containerState) error {
118129
}
119130

120131
func (r *runningState) destroy() error {
121-
if r.c.runType() == Running {
132+
if r.c.hasInit() {
122133
return ErrRunning
123134
}
124135
return destroy(r.c)
@@ -170,14 +181,13 @@ func (p *pausedState) transition(s containerState) error {
170181
}
171182

172183
func (p *pausedState) destroy() error {
173-
t := p.c.runType()
174-
if t != Running && t != Created {
175-
if err := p.c.cgroupManager.Freeze(configs.Thawed); err != nil {
176-
return err
177-
}
178-
return destroy(p.c)
184+
if p.c.hasInit() {
185+
return ErrPaused
186+
}
187+
if err := p.c.cgroupManager.Freeze(configs.Thawed); err != nil {
188+
return err
179189
}
180-
return ErrPaused
190+
return destroy(p.c)
181191
}
182192

183193
// restoredState is the same as the running state but also has associated checkpoint

0 commit comments

Comments
 (0)