Skip to content

Commit 2b025c0

Browse files
albanrata
andcommitted
Implement Seccomp Notify
This commit implements support for the SCMP_ACT_NOTIFY action. It requires libseccomp-2.5.0 to work but runc still works with older libseccomp if the seccomp policy does not use the SCMP_ACT_NOTIFY action. A new synchronization step between runc[INIT] and runc run is introduced to pass the seccomp fd. runc run fetches the seccomp fd with pidfd_get from the runc[INIT] process and sends it to the seccomp agent using SCM_RIGHTS. As suggested by @kolyshkin, we also make writeSync() a wrapper of writeSyncWithFd() and wrap the error there. To avoid pointless errors, we made some existing code paths just return the error instead of re-wrapping it. If we don't do it, error will look like: writing syncT <act>: writing syncT: <err> By adjusting the code path, now they just look like this writing syncT <act>: <err> Signed-off-by: Alban Crequy <[email protected]> Signed-off-by: Rodrigo Campos <[email protected]> Co-authored-by: Rodrigo Campos <[email protected]>
1 parent 4e7aeff commit 2b025c0

File tree

12 files changed

+310
-66
lines changed

12 files changed

+310
-66
lines changed

libcontainer/configs/config.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,12 @@ type IDMap struct {
3131
// for syscalls. Additional architectures can be added by specifying them in
3232
// Architectures.
3333
type Seccomp struct {
34-
DefaultAction Action `json:"default_action"`
35-
Architectures []string `json:"architectures"`
36-
Syscalls []*Syscall `json:"syscalls"`
37-
DefaultErrnoRet *uint `json:"default_errno_ret"`
34+
DefaultAction Action `json:"default_action"`
35+
Architectures []string `json:"architectures"`
36+
Syscalls []*Syscall `json:"syscalls"`
37+
DefaultErrnoRet *uint `json:"default_errno_ret"`
38+
ListenerPath string `json:"listener_path,omitempty"`
39+
ListenerMetadata string `json:"listener_metadata,omitempty"`
3840
}
3941

4042
// Action is taken upon rule match in Seccomp
@@ -47,6 +49,7 @@ const (
4749
Allow
4850
Trace
4951
Log
52+
Notify
5053
)
5154

5255
// Operator is a comparison operator to be used when matching syscall arguments in Seccomp

libcontainer/factory_linux.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ func (l *LinuxFactory) StartInitialization() (err error) {
357357
defer func() {
358358
// We have an error during the initialization of the container's init,
359359
// send it back to the parent process in the form of an initError.
360-
if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
360+
if werr := writeSync(pipe, procError); werr != nil {
361361
fmt.Fprintln(os.Stderr, err)
362362
return
363363
}

libcontainer/init_linux.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,36 @@ func syncParentHooks(pipe io.ReadWriter) error {
271271
return readSync(pipe, procResume)
272272
}
273273

274+
// syncParentSeccomp sends to the given pipe a JSON payload which
275+
// indicates that the parent should pick up the seccomp fd with pidfd_getfd()
276+
// and send it to the seccomp agent over a unix socket. It then waits for
277+
// the parent to indicate that it is cleared to resume and closes the seccompFd.
278+
// If the seccompFd is -1, there isn't anything to sync with the parent, so it
279+
// returns no error.
280+
func syncParentSeccomp(pipe io.ReadWriter, seccompFd int) error {
281+
if seccompFd == -1 {
282+
return nil
283+
}
284+
285+
// Tell parent.
286+
if err := writeSyncWithFd(pipe, procSeccomp, seccompFd); err != nil {
287+
unix.Close(seccompFd)
288+
return err
289+
}
290+
291+
// Wait for parent to give the all-clear.
292+
if err := readSync(pipe, procSeccompDone); err != nil {
293+
unix.Close(seccompFd)
294+
return fmt.Errorf("sync parent seccomp: %w", err)
295+
}
296+
297+
if err := unix.Close(seccompFd); err != nil {
298+
return fmt.Errorf("close seccomp fd: %w", err)
299+
}
300+
301+
return nil
302+
}
303+
274304
// setupUser changes the groups, gid, and uid for the user inside the container
275305
func setupUser(config *initConfig) error {
276306
// Set up defaults.

libcontainer/process_linux.go

Lines changed: 114 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"errors"
66
"fmt"
77
"io"
8+
"net"
89
"os"
910
"os/exec"
1011
"path/filepath"
@@ -172,6 +173,42 @@ func (p *setnsProcess) start() (retErr error) {
172173
case procHooks:
173174
// This shouldn't happen.
174175
panic("unexpected procHooks in setns")
176+
case procSeccomp:
177+
if p.config.Config.Seccomp.ListenerPath == "" {
178+
return errors.New("listenerPath is not set")
179+
}
180+
181+
seccompFd, err := recvSeccompFd(uintptr(p.pid()), uintptr(sync.Fd))
182+
if err != nil {
183+
return err
184+
}
185+
defer unix.Close(seccompFd)
186+
187+
bundle, annotations := utils.Annotations(p.config.Config.Labels)
188+
containerProcessState := &specs.ContainerProcessState{
189+
Version: specs.Version,
190+
Fds: []string{specs.SeccompFdName},
191+
Pid: p.cmd.Process.Pid,
192+
Metadata: p.config.Config.Seccomp.ListenerMetadata,
193+
State: specs.State{
194+
Version: specs.Version,
195+
ID: p.config.ContainerId,
196+
Status: specs.StateRunning,
197+
Pid: p.initProcessPid,
198+
Bundle: bundle,
199+
Annotations: annotations,
200+
},
201+
}
202+
if err := sendContainerProcessState(p.config.Config.Seccomp.ListenerPath,
203+
containerProcessState, seccompFd); err != nil {
204+
return err
205+
}
206+
207+
// Sync with child.
208+
if err := writeSync(p.messageSockPair.parent, procSeccompDone); err != nil {
209+
return err
210+
}
211+
return nil
175212
default:
176213
return errors.New("invalid JSON payload from child")
177214
}
@@ -426,6 +463,41 @@ func (p *initProcess) start() (retErr error) {
426463

427464
ierr := parseSync(p.messageSockPair.parent, func(sync *syncT) error {
428465
switch sync.Type {
466+
case procSeccomp:
467+
if p.config.Config.Seccomp.ListenerPath == "" {
468+
return errors.New("listenerPath is not set")
469+
}
470+
471+
seccompFd, err := recvSeccompFd(uintptr(childPid), uintptr(sync.Fd))
472+
if err != nil {
473+
return err
474+
}
475+
defer unix.Close(seccompFd)
476+
477+
s, err := p.container.currentOCIState()
478+
if err != nil {
479+
return err
480+
}
481+
482+
// initProcessStartTime hasn't been set yet.
483+
s.Pid = p.cmd.Process.Pid
484+
s.Status = specs.StateCreating
485+
containerProcessState := &specs.ContainerProcessState{
486+
Version: specs.Version,
487+
Fds: []string{specs.SeccompFdName},
488+
Pid: s.Pid,
489+
Metadata: p.config.Config.Seccomp.ListenerMetadata,
490+
State: *s,
491+
}
492+
if err := sendContainerProcessState(p.config.Config.Seccomp.ListenerPath,
493+
containerProcessState, seccompFd); err != nil {
494+
return err
495+
}
496+
497+
// Sync with child.
498+
if err := writeSync(p.messageSockPair.parent, procSeccompDone); err != nil {
499+
return err
500+
}
429501
case procReady:
430502
// set rlimits, this has to be done here because we lose permissions
431503
// to raise the limits once we enter a user-namespace
@@ -486,7 +558,7 @@ func (p *initProcess) start() (retErr error) {
486558

487559
// Sync with child.
488560
if err := writeSync(p.messageSockPair.parent, procRun); err != nil {
489-
return fmt.Errorf("error writing syncT 'run': %w", err)
561+
return err
490562
}
491563
sentRun = true
492564
case procHooks:
@@ -518,7 +590,7 @@ func (p *initProcess) start() (retErr error) {
518590
}
519591
// Sync with child.
520592
if err := writeSync(p.messageSockPair.parent, procResume); err != nil {
521-
return fmt.Errorf("error writing syncT 'resume': %w", err)
593+
return err
522594
}
523595
sentResume = true
524596
default:
@@ -621,6 +693,46 @@ func (p *initProcess) forwardChildLogs() chan error {
621693
return logs.ForwardLogs(p.logFilePair.parent)
622694
}
623695

696+
func recvSeccompFd(childPid, childFd uintptr) (int, error) {
697+
pidfd, _, errno := unix.Syscall(unix.SYS_PIDFD_OPEN, childPid, 0, 0)
698+
if errno != 0 {
699+
return -1, fmt.Errorf("performing SYS_PIDFD_OPEN syscall: %w", errno)
700+
}
701+
defer unix.Close(int(pidfd))
702+
703+
seccompFd, _, errno := unix.Syscall(unix.SYS_PIDFD_GETFD, pidfd, childFd, 0)
704+
if errno != 0 {
705+
return -1, fmt.Errorf("performing SYS_PIDFD_GETFD syscall: %w", errno)
706+
}
707+
708+
return int(seccompFd), nil
709+
}
710+
711+
func sendContainerProcessState(listenerPath string, state *specs.ContainerProcessState, fd int) error {
712+
conn, err := net.Dial("unix", listenerPath)
713+
if err != nil {
714+
return fmt.Errorf("failed to connect with seccomp agent specified in the seccomp profile: %w", err)
715+
}
716+
717+
socket, err := conn.(*net.UnixConn).File()
718+
if err != nil {
719+
return fmt.Errorf("cannot get seccomp socket: %w", err)
720+
}
721+
defer socket.Close()
722+
723+
b, err := json.Marshal(state)
724+
if err != nil {
725+
return fmt.Errorf("cannot marshall seccomp state: %w", err)
726+
}
727+
728+
err = utils.SendFds(socket, b, fd)
729+
if err != nil {
730+
return fmt.Errorf("cannot send seccomp fd to %s: %w", listenerPath, err)
731+
}
732+
733+
return nil
734+
}
735+
624736
func getPipeFds(pid int) ([]string, error) {
625737
fds := make([]string, 3)
626738

libcontainer/seccomp/config.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,13 @@ var operators = map[string]configs.Operator{
1717
}
1818

1919
var actions = map[string]configs.Action{
20-
"SCMP_ACT_KILL": configs.Kill,
21-
"SCMP_ACT_ERRNO": configs.Errno,
22-
"SCMP_ACT_TRAP": configs.Trap,
23-
"SCMP_ACT_ALLOW": configs.Allow,
24-
"SCMP_ACT_TRACE": configs.Trace,
25-
"SCMP_ACT_LOG": configs.Log,
20+
"SCMP_ACT_KILL": configs.Kill,
21+
"SCMP_ACT_ERRNO": configs.Errno,
22+
"SCMP_ACT_TRAP": configs.Trap,
23+
"SCMP_ACT_ALLOW": configs.Allow,
24+
"SCMP_ACT_TRACE": configs.Trace,
25+
"SCMP_ACT_LOG": configs.Log,
26+
"SCMP_ACT_NOTIFY": configs.Notify,
2627
}
2728

2829
var archs = map[string]string{

libcontainer/seccomp/patchbpf/enosys_linux.go

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ const uintptr_t C_SET_MODE_FILTER = SECCOMP_SET_MODE_FILTER;
4343
#endif
4444
const uintptr_t C_FILTER_FLAG_LOG = SECCOMP_FILTER_FLAG_LOG;
4545
46+
#ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
47+
# define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
48+
#endif
49+
const uintptr_t C_FILTER_FLAG_NEW_LISTENER = SECCOMP_FILTER_FLAG_NEW_LISTENER;
50+
4651
// We use the AUDIT_ARCH_* values because those are the ones used by the kernel
4752
// and SCMP_ARCH_* sometimes has fake values (such as SCMP_ARCH_X32). But we
4853
// use <seccomp.h> so we get libseccomp's fallback definitions of AUDIT_ARCH_*.
@@ -582,7 +587,7 @@ func enosysPatchFilter(config *configs.Seccomp, filter *libseccomp.ScmpFilter) (
582587
return fprog, nil
583588
}
584589

585-
func filterFlags(filter *libseccomp.ScmpFilter) (flags uint, noNewPrivs bool, err error) {
590+
func filterFlags(config *configs.Seccomp, filter *libseccomp.ScmpFilter) (flags uint, noNewPrivs bool, err error) {
586591
// Ignore the error since pre-2.4 libseccomp is treated as API level 0.
587592
apiLevel, _ := libseccomp.GetAPI()
588593

@@ -600,26 +605,38 @@ func filterFlags(filter *libseccomp.ScmpFilter) (flags uint, noNewPrivs bool, er
600605
}
601606

602607
// TODO: Support seccomp flags not yet added to libseccomp-golang...
608+
609+
for _, call := range config.Syscalls {
610+
if call.Action == configs.Notify {
611+
flags |= uint(C.C_FILTER_FLAG_NEW_LISTENER)
612+
break
613+
}
614+
}
615+
603616
return
604617
}
605618

606-
func sysSeccompSetFilter(flags uint, filter []unix.SockFilter) (err error) {
619+
func sysSeccompSetFilter(flags uint, filter []unix.SockFilter) (fd int, err error) {
607620
fprog := unix.SockFprog{
608621
Len: uint16(len(filter)),
609622
Filter: &filter[0],
610623
}
624+
fd = -1 // only return a valid fd when C_FILTER_FLAG_NEW_LISTENER is set
611625
// If no seccomp flags were requested we can use the old-school prctl(2).
612626
if flags == 0 {
613627
err = unix.Prctl(unix.PR_SET_SECCOMP,
614628
unix.SECCOMP_MODE_FILTER,
615629
uintptr(unsafe.Pointer(&fprog)), 0, 0)
616630
} else {
617-
_, _, errno := unix.RawSyscall(unix.SYS_SECCOMP,
631+
fdptr, _, errno := unix.RawSyscall(unix.SYS_SECCOMP,
618632
uintptr(C.C_SET_MODE_FILTER),
619633
uintptr(flags), uintptr(unsafe.Pointer(&fprog)))
620634
if errno != 0 {
621635
err = errno
622636
}
637+
if flags&uint(C.C_FILTER_FLAG_NEW_LISTENER) != 0 {
638+
fd = int(fdptr)
639+
}
623640
}
624641
runtime.KeepAlive(filter)
625642
runtime.KeepAlive(fprog)
@@ -631,31 +648,33 @@ func sysSeccompSetFilter(flags uint, filter []unix.SockFilter) (err error) {
631648
// patches said filter to handle -ENOSYS in a much nicer manner than the
632649
// default libseccomp default action behaviour, and loads the patched filter
633650
// into the kernel for the current process.
634-
func PatchAndLoad(config *configs.Seccomp, filter *libseccomp.ScmpFilter) error {
651+
func PatchAndLoad(config *configs.Seccomp, filter *libseccomp.ScmpFilter) (int, error) {
635652
// Generate a patched filter.
636653
fprog, err := enosysPatchFilter(config, filter)
637654
if err != nil {
638-
return fmt.Errorf("error patching filter: %w", err)
655+
return -1, fmt.Errorf("error patching filter: %w", err)
639656
}
640657

641658
// Get the set of libseccomp flags set.
642-
seccompFlags, noNewPrivs, err := filterFlags(filter)
659+
seccompFlags, noNewPrivs, err := filterFlags(config, filter)
643660
if err != nil {
644-
return fmt.Errorf("unable to fetch seccomp filter flags: %w", err)
661+
return -1, fmt.Errorf("unable to fetch seccomp filter flags: %w", err)
645662
}
646663

647664
// Set no_new_privs if it was requested, though in runc we handle
648665
// no_new_privs separately so warn if we hit this path.
649666
if noNewPrivs {
650667
logrus.Warnf("potentially misconfigured filter -- setting no_new_privs in seccomp path")
651668
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
652-
return fmt.Errorf("error enabling no_new_privs bit: %w", err)
669+
return -1, fmt.Errorf("error enabling no_new_privs bit: %w", err)
653670
}
654671
}
655672

656673
// Finally, load the filter.
657-
if err := sysSeccompSetFilter(seccompFlags, fprog); err != nil {
658-
return fmt.Errorf("error loading seccomp filter: %w", err)
674+
fd, err := sysSeccompSetFilter(seccompFlags, fprog)
675+
if err != nil {
676+
return -1, fmt.Errorf("error loading seccomp filter: %w", err)
659677
}
660-
return nil
678+
679+
return fd, nil
661680
}

0 commit comments

Comments
 (0)