Skip to content

Commit 1df1ec2

Browse files
committed
libct: setns recreate cmd object after the first call
Signed-off-by: Efim Verzakov <efimverzakov@gmail.com>
1 parent e0adafb commit 1df1ec2

File tree

3 files changed

+192
-48
lines changed

3 files changed

+192
-48
lines changed

libcontainer/container_linux.go

Lines changed: 67 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -479,52 +479,30 @@ func (c *Container) deleteExecFifo() {
479479
// container cannot access the statedir (and the FIFO itself remains
480480
// un-opened). It then adds the FifoFd to the given exec.Cmd as an inherited
481481
// fd, with _LIBCONTAINER_FIFOFD set to its fd number.
482-
func (c *Container) includeExecFifo(cmd *exec.Cmd) error {
482+
func (c *Container) includeExecFifo() error {
483483
fifoName := filepath.Join(c.stateDir, execFifoFilename)
484484
fifo, err := os.OpenFile(fifoName, unix.O_PATH|unix.O_CLOEXEC, 0)
485485
if err != nil {
486486
return err
487487
}
488488
c.fifo = fifo
489489

490-
cmd.ExtraFiles = append(cmd.ExtraFiles, fifo)
491-
cmd.Env = append(cmd.Env,
492-
"_LIBCONTAINER_FIFOFD="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1))
493490
return nil
494491
}
495492

496-
func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
497-
comm, err := newProcessComm()
498-
if err != nil {
499-
return nil, err
500-
}
501-
502-
// Make sure we use a new safe copy of /proc/self/exe binary each time, this
503-
// is called to make sure that if a container manages to overwrite the file,
504-
// it cannot affect other containers on the system. For runc, this code will
505-
// only ever be called once, but libcontainer users might call this more than
506-
// once.
507-
p.closeClonedExes()
493+
// After https://go-review.googlesource.com/c/go/+/728642 will be merged, we need to recreate
494+
// cmd Object to retry it.
495+
func (c *Container) createCmdObject(p *Process, comm *processComm, cgroupFD *os.File) *exec.Cmd {
508496
var (
509497
exePath string
510498
safeExe *os.File
511499
)
512-
if exeseal.IsSelfExeCloned() {
513-
// /proc/self/exe is already a cloned binary -- no need to do anything
514-
logrus.Debug("skipping binary cloning -- /proc/self/exe is already cloned!")
515-
// We don't need to use /proc/thread-self here because the exe mm of a
516-
// thread-group is guaranteed to be the same for all threads by
517-
// definition. This lets us avoid having to do runtime.LockOSThread.
518-
exePath = "/proc/self/exe"
519-
} else {
520-
var err error
521-
safeExe, err = exeseal.CloneSelfExe(c.stateDir)
522-
if err != nil {
523-
return nil, fmt.Errorf("unable to create safe /proc/self/exe clone for runc init: %w", err)
524-
}
500+
501+
if len(p.clonedExes) > 0 {
502+
safeExe = p.clonedExes[0]
525503
exePath = "/proc/self/fd/" + strconv.Itoa(int(safeExe.Fd()))
526-
p.clonedExes = append(p.clonedExes, safeExe)
527-
logrus.Debug("runc exeseal: using /proc/self/exe clone") // used for tests
504+
} else {
505+
exePath = "/proc/self/exe"
528506
}
529507

530508
cmd := exec.Command(exePath, "init")
@@ -602,22 +580,72 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
602580
cmd.SysProcAttr.Pdeathsig = unix.Signal(c.config.ParentDeathSignal)
603581
}
604582

583+
if c.fifo != nil {
584+
cmd.ExtraFiles = append(cmd.ExtraFiles, c.fifo)
585+
cmd.Env = append(cmd.Env,
586+
"_LIBCONTAINER_FIFOFD="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1))
587+
}
588+
589+
if p.Init {
590+
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
591+
} else {
592+
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
593+
}
594+
595+
if cgroupFD != nil {
596+
cmd.SysProcAttr.UseCgroupFD = true
597+
cmd.SysProcAttr.CgroupFD = int(cgroupFD.Fd())
598+
}
599+
600+
return cmd
601+
}
602+
603+
func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
604+
comm, err := newProcessComm()
605+
if err != nil {
606+
return nil, err
607+
}
608+
609+
// Make sure we use a new safe copy of /proc/self/exe binary each time, this
610+
// is called to make sure that if a container manages to overwrite the file,
611+
// it cannot affect other containers on the system. For runc, this code will
612+
// only ever be called once, but libcontainer users might call this more than
613+
// once.
614+
p.closeClonedExes()
615+
var (
616+
safeExe *os.File
617+
)
618+
if exeseal.IsSelfExeCloned() {
619+
// /proc/self/exe is already a cloned binary -- no need to do anything
620+
logrus.Debug("skipping binary cloning -- /proc/self/exe is already cloned!")
621+
// We don't need to use /proc/thread-self here because the exe mm of a
622+
// thread-group is guaranteed to be the same for all threads by
623+
// definition. This lets us avoid having to do runtime.LockOSThread.
624+
} else {
625+
var err error
626+
safeExe, err = exeseal.CloneSelfExe(c.stateDir)
627+
if err != nil {
628+
return nil, fmt.Errorf("unable to create safe /proc/self/exe clone for runc init: %w", err)
629+
}
630+
p.clonedExes = append(p.clonedExes, safeExe)
631+
logrus.Debug("runc exeseal: using /proc/self/exe clone") // used for tests
632+
}
633+
605634
if p.Init {
606635
// We only set up fifoFd if we're not doing a `runc exec`. The historic
607636
// reason for this is that previously we would pass a dirfd that allowed
608637
// for container rootfs escape (and not doing it in `runc exec` avoided
609638
// that problem), but we no longer do that. However, there's no need to do
610639
// this for `runc exec` so we just keep it this way to be safe.
611-
if err := c.includeExecFifo(cmd); err != nil {
640+
if err := c.includeExecFifo(); err != nil {
612641
return nil, fmt.Errorf("unable to setup exec fifo: %w", err)
613642
}
614-
return c.newInitProcess(p, cmd, comm)
643+
return c.newInitProcess(p, comm)
615644
}
616-
return c.newSetnsProcess(p, cmd, comm)
645+
return c.newSetnsProcess(p, comm)
617646
}
618647

619-
func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, comm *processComm) (*initProcess, error) {
620-
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
648+
func (c *Container) newInitProcess(p *Process, comm *processComm) (*initProcess, error) {
621649
nsMaps := make(map[configs.NamespaceType]string)
622650
for _, ns := range c.config.Namespaces {
623651
if ns.Path != "" {
@@ -629,6 +657,8 @@ func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, comm *processComm)
629657
return nil, err
630658
}
631659

660+
cmd := c.createCmdObject(p, comm, nil)
661+
632662
init := &initProcess{
633663
containerProcess: containerProcess{
634664
cmd: cmd,
@@ -645,8 +675,7 @@ func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, comm *processComm)
645675
return init, nil
646676
}
647677

648-
func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, comm *processComm) (*setnsProcess, error) {
649-
cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns))
678+
func (c *Container) newSetnsProcess(p *Process, comm *processComm) (*setnsProcess, error) {
650679
state := c.currentState()
651680
// for setns process, we don't have to set cloneflags as the process namespaces
652681
// will only be set via setns syscall
@@ -656,7 +685,6 @@ func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, comm *processComm
656685
}
657686
proc := &setnsProcess{
658687
containerProcess: containerProcess{
659-
cmd: cmd,
660688
comm: comm,
661689
manager: c.cgroupManager,
662690
config: c.newInitConfig(p),

libcontainer/integration/exec_test.go

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,17 @@ import (
1313
"strings"
1414
"syscall"
1515
"testing"
16+
"time"
1617

18+
securejoin "github.com/cyphar/filepath-securejoin"
1719
"github.com/opencontainers/cgroups"
1820
"github.com/opencontainers/cgroups/systemd"
1921
"github.com/opencontainers/runc/internal/linux"
2022
"github.com/opencontainers/runc/internal/pathrs"
2123
"github.com/opencontainers/runc/libcontainer"
2224
"github.com/opencontainers/runc/libcontainer/configs"
2325
"github.com/opencontainers/runc/libcontainer/internal/userns"
26+
"github.com/opencontainers/runc/libcontainer/utils"
2427
"github.com/opencontainers/runtime-spec/specs-go"
2528

2629
"golang.org/x/sys/unix"
@@ -230,6 +233,125 @@ func TestEnter(t *testing.T) {
230233
}
231234
}
232235

236+
237+
const stateFilename = "state.json"
238+
239+
// We need to dump changed state into state file.
240+
func saveState(stateDir string, s *libcontainer.State) (retErr error) {
241+
tmpFile, err := os.CreateTemp(stateDir, "state-")
242+
if err != nil {
243+
return err
244+
}
245+
246+
defer func() {
247+
if retErr != nil {
248+
tmpFile.Close()
249+
os.Remove(tmpFile.Name())
250+
}
251+
}()
252+
253+
err = utils.WriteJSON(tmpFile, s)
254+
if err != nil {
255+
return err
256+
}
257+
err = tmpFile.Close()
258+
if err != nil {
259+
return err
260+
}
261+
262+
stateFilePath := filepath.Join(stateDir, stateFilename)
263+
return os.Rename(tmpFile.Name(), stateFilePath)
264+
}
265+
266+
func TestCmdRetry(t *testing.T) {
267+
if testing.Short() {
268+
return
269+
}
270+
271+
if !cgroups.IsCgroup2UnifiedMode() {
272+
t.Skip("CLONE_INTO_CGROUP works only with cgroup v2")
273+
}
274+
275+
// Create dir to "pseudo" cgroup path
276+
pseudoPath := t.TempDir()
277+
278+
config := newTemplateConfig(t, nil)
279+
280+
name := strings.ReplaceAll(t.Name(), "/", "_") + strconv.FormatInt(-int64(time.Now().Nanosecond()), 35)
281+
root := t.TempDir()
282+
283+
// Container State Dir
284+
stateDir, err := securejoin.SecureJoin(root, name)
285+
ok(t, err)
286+
287+
// Create Container
288+
container, err := libcontainer.Create(root, name, config)
289+
290+
ok(t, err)
291+
defer destroyContainer(container)
292+
293+
// Execute a first process in the container
294+
stdinR, stdinW, err := os.Pipe()
295+
ok(t, err)
296+
297+
var stdout, stdout2 bytes.Buffer
298+
299+
pconfig := libcontainer.Process{
300+
Cwd: "/",
301+
Args: []string{"sh", "-c", "cat"},
302+
Env: standardEnvironment,
303+
Stdin: stdinR,
304+
Stdout: &stdout,
305+
Init: true,
306+
}
307+
308+
err = container.Run(&pconfig)
309+
_ = stdinR.Close()
310+
defer stdinW.Close()
311+
ok(t, err)
312+
313+
state, err := container.State()
314+
ok(t, err)
315+
316+
// Dump our "pseudo" cgroup path as dirPath
317+
state.CgroupPaths[""] = pseudoPath
318+
err = saveState(stateDir, state)
319+
ok(t, err)
320+
321+
// Load container from dumped state
322+
container2, err := libcontainer.Load(root, name)
323+
ok(t, err)
324+
325+
// Execute another process in the container
326+
stdinR2, stdinW2, err := os.Pipe()
327+
ok(t, err)
328+
pconfig2 := libcontainer.Process{
329+
Cwd: "/",
330+
Env: standardEnvironment,
331+
}
332+
pconfig2.Args = []string{"sh", "-c", "cat"}
333+
pconfig2.Stdin = stdinR2
334+
pconfig2.Stdout = &stdout2
335+
336+
// Turn on TestMode in cgroups library to not check path mode and create all necessary files.
337+
cgroups.TestMode = true
338+
defer func() {
339+
cgroups.TestMode = false
340+
}()
341+
342+
err = container2.Run(&pconfig2)
343+
_ = stdinR2.Close()
344+
defer stdinW2.Close()
345+
ok(t, err)
346+
347+
// Wait processes
348+
_ = stdinW2.Close()
349+
waitProcess(&pconfig2, t)
350+
351+
_ = stdinW.Close()
352+
waitProcess(&pconfig, t)
353+
}
354+
233355
func TestProcessEnv(t *testing.T) {
234356
if testing.Short() {
235357
return

libcontainer/process_linux.go

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ import (
1515
"runtime"
1616
"strconv"
1717
"strings"
18-
"syscall"
1918
"time"
2019

2120
"github.com/opencontainers/runtime-spec/specs-go"
@@ -357,12 +356,6 @@ func (p *setnsProcess) prepareCgroupFD() (*os.File, error) {
357356
}
358357

359358
logrus.Debugf("using CLONE_INTO_CGROUP %q", cgroup)
360-
if p.cmd.SysProcAttr == nil {
361-
p.cmd.SysProcAttr = &syscall.SysProcAttr{}
362-
}
363-
p.cmd.SysProcAttr.UseCgroupFD = true
364-
p.cmd.SysProcAttr.CgroupFD = int(fd.Fd())
365-
366359
return fd, nil
367360
}
368361

@@ -380,11 +373,12 @@ func (p *setnsProcess) startWithCgroupFD() error {
380373
defer fd.Close()
381374
}
382375

376+
p.cmd = p.containerProcess.container.createCmdObject(p.process, p.comm, fd)
383377
err = p.startWithCPUAffinity()
384378
if err != nil && p.cmd.SysProcAttr.UseCgroupFD {
379+
// We need to recreate the exec.Cmd object
385380
logrus.Debugf("exec with CLONE_INTO_CGROUP failed: %v; retrying without", err)
386-
// SysProcAttr.CgroupFD is never used when UseCgroupFD is unset.
387-
p.cmd.SysProcAttr.UseCgroupFD = false
381+
p.cmd = p.containerProcess.container.createCmdObject(p.process, p.comm, nil)
388382
err = p.startWithCPUAffinity()
389383
}
390384

0 commit comments

Comments
 (0)