Skip to content

Commit ad99e59

Browse files
committed
runc exec: use CLONE_INTO_CGROUP when available
This is based on work done in [1]. Since the functionality requires a recent kernel and might not work, implement a fallback. [1]: https://go-review.googlesource.com/c/go/+/417695 Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent 6a1d0a5 commit ad99e59

File tree

2 files changed

+35
-3
lines changed

2 files changed

+35
-3
lines changed

libcontainer/process_linux.go

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"runtime"
1414
"strconv"
1515
"sync"
16+
"syscall"
1617
"time"
1718

1819
"github.com/opencontainers/runtime-spec/specs-go"
@@ -229,9 +230,38 @@ func (p *setnsProcess) addIntoCgroup() error {
229230
func (p *setnsProcess) start() (retErr error) {
230231
defer p.comm.closeParent()
231232

233+
useCgroupFD := false
234+
if cg, ok := p.cgroupPaths[""]; ok && len(p.cgroupPaths) == 1 {
235+
// Try using clone3(CLONE_INTO_CGROUP).
236+
fd, err := os.OpenFile(cg, unix.O_PATH|unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
237+
if err != nil {
238+
if !p.rootlessCgroups {
239+
return fmt.Errorf("can't open cgroup: %w", err)
240+
}
241+
} else {
242+
defer fd.Close()
243+
244+
useCgroupFD = true
245+
if p.cmd.SysProcAttr == nil {
246+
p.cmd.SysProcAttr = &syscall.SysProcAttr{}
247+
}
248+
p.cmd.SysProcAttr.UseCgroupFD = true
249+
p.cmd.SysProcAttr.CgroupFD = int(fd.Fd())
250+
}
251+
}
252+
232253
// Get the "before" value of oom kill count.
233254
oom, _ := p.manager.OOMKillCount()
255+
234256
err := p.startWithCPUAffinity()
257+
if useCgroupFD && (errors.Is(err, errors.ErrUnsupported) || errors.Is(err, unix.EBUSY)) {
258+
logrus.Debugf("exec(CLONE_INTO_CGROUP) failed with %v, retrying", err)
259+
// Older kernel? Try again without CLONE_INTO_CGROUP.
260+
useCgroupFD = false
261+
p.cmd.SysProcAttr.UseCgroupFD = false
262+
err = p.startWithCPUAffinity()
263+
}
264+
235265
// Close the child-side of the pipes (controlled by child).
236266
p.comm.closeChild()
237267
if err != nil {
@@ -259,8 +289,10 @@ func (p *setnsProcess) start() (retErr error) {
259289
if err := p.execSetns(); err != nil {
260290
return fmt.Errorf("error executing setns process: %w", err)
261291
}
262-
if err := p.addIntoCgroup(); err != nil {
263-
return err
292+
if !useCgroupFD {
293+
if err := p.addIntoCgroup(); err != nil {
294+
return err
295+
}
264296
}
265297
// Set final CPU affinity right after the process is moved into container's cgroup.
266298
if err := p.setFinalCPUAffinity(); err != nil {

tests/integration/exec.bats

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ function check_exec_debug() {
282282
# Check we can't join non-existing subcgroup.
283283
runc exec --cgroup nonexistent test_busybox cat /proc/self/cgroup
284284
[ "$status" -ne 0 ]
285-
[[ "$output" == *" adding pid "*"/nonexistent/cgroup.procs: no such file "* ]]
285+
[[ "$output" == *" can't open cgroup:"*"/nonexistent: no such file "* ]]
286286

287287
# Check we can join top-level cgroup (implicit).
288288
runc exec test_busybox grep '^0::/$' /proc/self/cgroup

0 commit comments

Comments
 (0)