Skip to content
Closed
4 changes: 2 additions & 2 deletions contrib/cmd/recvtty/recvtty.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ func handleSingle(path string, noStdin bool) error {
defer socket.Close()

// Get the master file descriptor from runC.
master, err := utils.RecvFd(socket)
master, err := utils.RecvFile(socket)
if err != nil {
return err
}
Expand Down Expand Up @@ -171,7 +171,7 @@ func handleNull(path string) error {
defer socket.Close()

// Get the master file descriptor from runC.
master, err := utils.RecvFd(socket)
master, err := utils.RecvFile(socket)
if err != nil {
return
}
Expand Down
2 changes: 1 addition & 1 deletion libcontainer/criu_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -1185,7 +1185,7 @@ func (c *Container) criuNotifications(resp *criurpc.CriuResp, process *Process,
defer master.Close()

// While we can access console.master, using the API is a good idea.
if err := utils.SendFd(process.ConsoleSocket, master.Name(), master.Fd()); err != nil {
if err := utils.SendFile(process.ConsoleSocket, master); err != nil {
return err
}
case "status-ready":
Expand Down
62 changes: 26 additions & 36 deletions libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ import (
"encoding/json"
"errors"
"fmt"
"io"
"net"
"os"
"runtime"
"runtime/debug"
"strconv"
"strings"
Expand Down Expand Up @@ -102,11 +102,8 @@ func StartInitialization() (retErr error) {
defer func() {
// We have an error during the initialization of the container's init,
// send it back to the parent process in the form of an initError.
if err := writeSync(pipe, procError); err != nil {
fmt.Fprintln(os.Stderr, retErr)
return
}
if err := utils.WriteJSON(pipe, &initError{Message: retErr.Error()}); err != nil {
ierr := initError{Message: retErr.Error()}
if err := writeSyncArg(pipe, procError, ierr); err != nil {
fmt.Fprintln(os.Stderr, retErr)
return
}
Expand Down Expand Up @@ -317,7 +314,6 @@ func setupConsole(socket *os.File, config *initConfig, mount bool) error {
if err != nil {
return err
}

// After we return from here, we don't need the console anymore.
defer pty.Close()

Expand All @@ -339,67 +335,61 @@ func setupConsole(socket *os.File, config *initConfig, mount bool) error {
}
}
// While we can access console.master, using the API is a good idea.
if err := utils.SendFd(socket, pty.Name(), pty.Fd()); err != nil {
if err := utils.SendRawFd(socket, pty.Name(), pty.Fd()); err != nil {
return err
}
runtime.KeepAlive(pty)

// Now, dup over all the things.
return dupStdio(slavePath)
}

// syncParentReady sends to the given pipe a JSON payload which indicates that
// the init is ready to Exec the child process. It then waits for the parent to
// indicate that it is cleared to Exec.
func syncParentReady(pipe io.ReadWriter) error {
func syncParentReady(pipe *os.File) error {
// Tell parent.
if err := writeSync(pipe, procReady); err != nil {
return err
}

// Wait for parent to give the all-clear.
return readSync(pipe, procRun)
}

// syncParentHooks sends to the given pipe a JSON payload which indicates that
// the parent should execute pre-start hooks. It then waits for the parent to
// indicate that it is cleared to resume.
func syncParentHooks(pipe io.ReadWriter) error {
func syncParentHooks(pipe *os.File) error {
// Tell parent.
if err := writeSync(pipe, procHooks); err != nil {
return err
}

// Wait for parent to give the all-clear.
return readSync(pipe, procResume)
}

// syncParentSeccomp sends to the given pipe a JSON payload which
// indicates that the parent should pick up the seccomp fd with pidfd_getfd()
// and send it to the seccomp agent over a unix socket. It then waits for
// the parent to indicate that it is cleared to resume and closes the seccompFd.
// If the seccompFd is -1, there isn't anything to sync with the parent, so it
// returns no error.
func syncParentSeccomp(pipe io.ReadWriter, seccompFd int) error {
if seccompFd == -1 {
// syncParentSeccomp sends the fd associated with the seccomp file descriptor
// to the parent, and wait for the parent to do pidfd_getfd() to grab a copy.
func syncParentSeccomp(pipe *os.File, seccompFd int) error {
if seccompFd >= 0 {

This comment was marked as resolved.

This comment was marked as resolved.

return nil
}

// Tell parent.
if err := writeSyncWithFd(pipe, procSeccomp, seccompFd); err != nil {
unix.Close(seccompFd)
defer unix.Close(seccompFd)

// Tell parent to grab our fd.
//
// Notably, we do not use writeSyncFile here because a container might have
// an SCMP_ACT_NOTIFY action on sendmsg(2) so we need to use the smallest
// possible number of system calls here because all of those syscalls
// cannot be used with SCMP_ACT_NOTIFY as a result (any syscall we use here
// before the parent gets the file descriptor would deadlock "runc init" if
// we allowed it for SCMP_ACT_NOTIFY). See seccomp.InitSeccomp() for more
// details.
if err := writeSyncArg(pipe, procSeccomp, seccompFd); err != nil {
return err
}

// Wait for parent to give the all-clear.
if err := readSync(pipe, procSeccompDone); err != nil {
unix.Close(seccompFd)
return fmt.Errorf("sync parent seccomp: %w", err)
}

if err := unix.Close(seccompFd); err != nil {
return fmt.Errorf("close seccomp fd: %w", err)
}

return nil
// Wait for parent to tell us they've grabbed the seccompfd.
return readSync(pipe, procSeccompDone)
}

// setupUser changes the groups, gid, and uid for the user inside the container
Expand Down
4 changes: 2 additions & 2 deletions libcontainer/integration/execin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,9 @@ func TestExecInTTY(t *testing.T) {

done := make(chan (error))
go func() {
f, err := utils.RecvFd(parent)
f, err := utils.RecvFile(parent)
if err != nil {
done <- fmt.Errorf("RecvFd: %w", err)
done <- fmt.Errorf("RecvFile: %w", err)
return
}
c, err := console.ConsoleFromFile(f)
Expand Down
83 changes: 48 additions & 35 deletions libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"time"

Expand Down Expand Up @@ -171,14 +172,27 @@ func (p *setnsProcess) start() (retErr error) {
panic("unexpected procHooks in setns")
case procSeccomp:
if p.config.Config.Seccomp.ListenerPath == "" {
return errors.New("listenerPath is not set")
return errors.New("seccomp listenerPath is not set")
}

seccompFd, err := recvSeccompFd(uintptr(p.pid()), uintptr(sync.Fd))
if sync.Arg == nil {
return fmt.Errorf("sync %q is missing an argument", sync.Type)
}
var srcFd int
if err := json.Unmarshal(*sync.Arg, &srcFd); err != nil {
return fmt.Errorf("sync %q passed invalid fd arg: %w", sync.Type, err)
}
seccompFd, err := pidGetFd(p.pid(), srcFd)
if err != nil {
return fmt.Errorf("sync %q get fd %d from child failed: %w", sync.Type, srcFd, err)
}
defer seccompFd.Close()
// We have a copy, the child can keep working. We don't need to
// wait for the seccomp notify listener to get the fd before we
// permit the child to continue because the child will happily wait
// for the listener if it hits SCMP_ACT_NOTIFY.
if err := writeSync(p.messageSockPair.parent, procSeccompDone); err != nil {
Comment on lines +195 to +199

This comment was marked as resolved.

return err
}
defer unix.Close(seccompFd)

bundle, annotations := utils.Annotations(p.config.Config.Labels)
containerProcessState := &specs.ContainerProcessState{
Expand All @@ -199,15 +213,10 @@ func (p *setnsProcess) start() (retErr error) {
containerProcessState, seccompFd); err != nil {
return err
}

// Sync with child.
if err := writeSync(p.messageSockPair.parent, procSeccompDone); err != nil {
return err
}
return nil
default:
return errors.New("invalid JSON payload from child")
}
return nil
})

if err := unix.Shutdown(int(p.messageSockPair.parent.Fd()), unix.SHUT_WR); err != nil {
Expand Down Expand Up @@ -460,14 +469,27 @@ func (p *initProcess) start() (retErr error) {
switch sync.Type {
case procSeccomp:
if p.config.Config.Seccomp.ListenerPath == "" {
return errors.New("listenerPath is not set")
return errors.New("seccomp listenerPath is not set")
}

seccompFd, err := recvSeccompFd(uintptr(childPid), uintptr(sync.Fd))
var srcFd int
if sync.Arg == nil {
return fmt.Errorf("sync %q is missing an argument", sync.Type)
}
if err := json.Unmarshal(*sync.Arg, &srcFd); err != nil {
return fmt.Errorf("sync %q passed invalid fd arg: %w", sync.Type, err)
}
seccompFd, err := pidGetFd(p.pid(), srcFd)
if err != nil {
return fmt.Errorf("sync %q get fd %d from child failed: %w", sync.Type, srcFd, err)
}
defer seccompFd.Close()
// We have a copy, the child can keep working. We don't need to
// wait for the seccomp notify listener to get the fd before we
// permit the child to continue because the child will happily wait
// for the listener if it hits SCMP_ACT_NOTIFY.
if err := writeSync(p.messageSockPair.parent, procSeccompDone); err != nil {
return err
}
defer unix.Close(seccompFd)

s, err := p.container.currentOCIState()
if err != nil {
Expand All @@ -488,11 +510,6 @@ func (p *initProcess) start() (retErr error) {
containerProcessState, seccompFd); err != nil {
return err
}

// Sync with child.
if err := writeSync(p.messageSockPair.parent, procSeccompDone); err != nil {
return err
}
case procReady:
// set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace
Expand Down Expand Up @@ -591,7 +608,6 @@ func (p *initProcess) start() (retErr error) {
default:
return errors.New("invalid JSON payload from child")
}

return nil
})

Expand Down Expand Up @@ -684,22 +700,20 @@ func (p *initProcess) forwardChildLogs() chan error {
return logs.ForwardLogs(p.logFilePair.parent)
}

func recvSeccompFd(childPid, childFd uintptr) (int, error) {
pidfd, _, errno := unix.Syscall(unix.SYS_PIDFD_OPEN, childPid, 0, 0)
if errno != 0 {
return -1, fmt.Errorf("performing SYS_PIDFD_OPEN syscall: %w", errno)
func pidGetFd(pid, srcFd int) (*os.File, error) {
pidFd, err := unix.PidfdOpen(pid, 0)
if err != nil {
return nil, os.NewSyscallError("pidfd_open", err)
}
defer unix.Close(int(pidfd))

seccompFd, _, errno := unix.Syscall(unix.SYS_PIDFD_GETFD, pidfd, childFd, 0)
if errno != 0 {
return -1, fmt.Errorf("performing SYS_PIDFD_GETFD syscall: %w", errno)
defer unix.Close(pidFd)
fd, err := unix.PidfdGetfd(pidFd, srcFd, 0)
if err != nil {
return nil, os.NewSyscallError("pidfd_getfd", err)
}

return int(seccompFd), nil
return os.NewFile(uintptr(fd), "[pidfd_getfd]"), nil
}

func sendContainerProcessState(listenerPath string, state *specs.ContainerProcessState, fd int) error {
func sendContainerProcessState(listenerPath string, state *specs.ContainerProcessState, file *os.File) error {
conn, err := net.Dial("unix", listenerPath)
if err != nil {
return fmt.Errorf("failed to connect with seccomp agent specified in the seccomp profile: %w", err)
Expand All @@ -716,11 +730,10 @@ func sendContainerProcessState(listenerPath string, state *specs.ContainerProces
return fmt.Errorf("cannot marshall seccomp state: %w", err)
}

err = utils.SendFds(socket, b, fd)
if err != nil {
if err := utils.SendRawFd(socket, string(b), file.Fd()); err != nil {
return fmt.Errorf("cannot send seccomp fd to %s: %w", listenerPath, err)
}

runtime.KeepAlive(file)
return nil
}

Expand Down
3 changes: 1 addition & 2 deletions libcontainer/rootfs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package libcontainer
import (
"errors"
"fmt"
"io"
"os"
"path"
"path/filepath"
Expand Down Expand Up @@ -64,7 +63,7 @@ func needsSetupDev(config *configs.Config) bool {
// prepareRootfs sets up the devices, mount points, and filesystems for use
// inside a new mount namespace. It doesn't set anything as ro. You must call
// finalizeRootfs after this function to finish setting up the rootfs.
func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig, mountFds mountFds) (err error) {
func prepareRootfs(pipe *os.File, iConfig *initConfig, mountFds mountFds) (err error) {
config := iConfig.Config
if err := prepareRoot(config); err != nil {
return fmt.Errorf("error preparing rootfs: %w", err)
Expand Down
2 changes: 0 additions & 2 deletions libcontainer/setns_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ func (l *linuxSetnsInit) Init() error {
if err != nil {
return err
}

if err := syncParentSeccomp(l.pipe, seccompFd); err != nil {
return err
}
Expand All @@ -94,7 +93,6 @@ func (l *linuxSetnsInit) Init() error {
if err != nil {
return fmt.Errorf("unable to init seccomp: %w", err)
}

if err := syncParentSeccomp(l.pipe, seccompFd); err != nil {
return err
}
Expand Down
Loading