From 29fd9f6428ec4ee9998a4c6a5d9a35657171736a Mon Sep 17 00:00:00 2001 From: lifubang Date: Tue, 15 Aug 2023 17:00:22 +0800 Subject: [PATCH 1/4] add runc-dmz.c Signed-off-by: lifubang --- contrib/cmd/runc-dmz/runc-dmz.c | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 contrib/cmd/runc-dmz/runc-dmz.c diff --git a/contrib/cmd/runc-dmz/runc-dmz.c b/contrib/cmd/runc-dmz/runc-dmz.c new file mode 100644 index 00000000000..a81cb4375fa --- /dev/null +++ b/contrib/cmd/runc-dmz/runc-dmz.c @@ -0,0 +1,11 @@ +#include + +extern char **environ; + +int main(int argv, char **args) +{ + if (argv > 0) { + return execve(args[0], args, environ); + } + return 0; +} From 48debd1a60b470fd7693acfbc99de1075323dd32 Mon Sep 17 00:00:00 2001 From: lifubang Date: Tue, 15 Aug 2023 17:00:43 +0800 Subject: [PATCH 2/4] make runc & runc-dmz at the same time Signed-off-by: lifubang --- Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c72dd2f414d..aca0c541075 100644 --- a/Makefile +++ b/Makefile @@ -57,7 +57,12 @@ endif .DEFAULT: runc -runc: +runc: runc-dmz runc-bin + +runc-dmz: + $(CC) -o runc-dmz -static contrib/cmd/runc-dmz/runc-dmz.c + +runc-bin: $(GO_BUILD) -o runc . all: runc recvtty sd-helper seccompagent fs-idmap From 7adf5a5eb446f0d2ace0d60037db677a1b4aba83 Mon Sep 17 00:00:00 2001 From: lifubang Date: Tue, 15 Aug 2023 17:54:30 +0800 Subject: [PATCH 3/4] use runc-dmz to defeat CVE-2019-5736 Signed-off-by: lifubang --- libcontainer/container_linux.go | 1 + libcontainer/init_linux.go | 13 +- libcontainer/nsenter/cloned_binary.c | 185 +++------------------------ libcontainer/setns_init_linux.go | 10 +- libcontainer/standard_init_linux.go | 8 +- libcontainer/system/linux.go | 34 +++++ 6 files changed, 78 insertions(+), 173 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 1a2a6fe37ac..12769f5f2b9 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -492,6 +492,7 @@ func (c *Container) commandTemplate(p *Process, childInitPipe *os.File, childLog cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1), "_LIBCONTAINER_STATEDIR="+c.root, + "_LIBCONTAINER_DMZFD=-1", ) cmd.ExtraFiles = append(cmd.ExtraFiles, childLogPipe) diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index 2e2d00ff83a..33f3af613be 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -186,6 +186,12 @@ func startInitialization() (retErr error) { return err } + // Get runc-dmz fds. + dmzFd, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_DMZFD")) + if err != nil { + return fmt.Errorf("dmzFd error: %w", err) + } + // clear the current process's environment to clean any libcontainer // specific env vars. os.Clearenv() @@ -201,10 +207,10 @@ func startInitialization() (retErr error) { }() // If init succeeds, it will not return, hence none of the defers will be called. - return containerInit(it, pipe, consoleSocket, fifofd, logFD, mountFds{sourceFds: mountSrcFds, idmapFds: idmapFds}) + return containerInit(it, pipe, consoleSocket, fifofd, logFD, dmzFd, mountFds{sourceFds: mountSrcFds, idmapFds: idmapFds}) } -func containerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int, mountFds mountFds) error { +func containerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd, dmzFd int, mountFds mountFds) error { var config *initConfig if err := json.NewDecoder(pipe).Decode(&config); err != nil { return err @@ -212,6 +218,7 @@ func containerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, lo if err := populateProcessEnvironment(config.Env); err != nil { return err } + switch t { case initSetns: // mount and idmap fds must be nil in this case. We don't mount while doing runc exec. @@ -224,6 +231,7 @@ func containerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, lo consoleSocket: consoleSocket, config: config, logFd: logFd, + dmzFd: dmzFd, } return i.Init() case initStandard: @@ -233,6 +241,7 @@ func containerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, lo parentPid: unix.Getppid(), config: config, fifoFd: fifoFd, + dmzFd: dmzFd, logFd: logFd, mountFds: mountFds, } diff --git a/libcontainer/nsenter/cloned_binary.c b/libcontainer/nsenter/cloned_binary.c index a7f992fddd7..459e7ab7ab9 100644 --- a/libcontainer/nsenter/cloned_binary.c +++ b/libcontainer/nsenter/cloned_binary.c @@ -132,7 +132,7 @@ int memfd_create(const char *name, unsigned int flags) #endif #define CLONED_BINARY_ENV "_LIBCONTAINER_CLONED_BINARY" -#define RUNC_MEMFD_COMMENT "runc_cloned:/proc/self/exe" +#define RUNC_MEMFD_COMMENT "runc_cloned:runc-dmz" /* * There are newer memfd seals (such as F_SEAL_FUTURE_WRITE and F_SEAL_EXEC), * which we use opportunistically. However, this set is the original set of @@ -142,162 +142,6 @@ int memfd_create(const char *name, unsigned int flags) #define RUNC_MEMFD_MIN_SEALS \ (F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE) -static void *must_realloc(void *ptr, size_t size) -{ - void *old = ptr; - do { - ptr = realloc(old, size); - } while (!ptr); - return ptr; -} - -/* - * Verify whether we are currently in a self-cloned program (namely, is - * /proc/self/exe a memfd). F_GET_SEALS will only succeed for memfds (or rather - * for shmem files), and we want to be sure it's actually sealed. - */ -static int is_self_cloned(void) -{ - int fd, seals = 0, is_cloned = false; - struct stat statbuf = { }; - struct statfs fsbuf = { }; - - fd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC); - if (fd < 0) { - write_log(ERROR, "cannot open runc binary for reading: open /proc/self/exe: %m"); - return -ENOTRECOVERABLE; - } - - /* - * Is the binary a fully-sealed memfd? We don't need CLONED_BINARY_ENV for - * this, because you cannot write to a sealed memfd no matter what. - */ - seals = fcntl(fd, F_GET_SEALS); - if (seals >= 0) { - write_log(DEBUG, "checking /proc/self/exe memfd seals: 0x%x", seals); - is_cloned = (seals & RUNC_MEMFD_MIN_SEALS) == RUNC_MEMFD_MIN_SEALS; - if (is_cloned) - goto out; - } - - /* - * All other forms require CLONED_BINARY_ENV, since they are potentially - * writeable (or we can't tell if they're fully safe) and thus we must - * check the environment as an extra layer of defence. - */ - if (!getenv(CLONED_BINARY_ENV)) { - is_cloned = false; - goto out; - } - - /* - * Is the binary on a read-only filesystem? We can't detect bind-mounts in - * particular (in-kernel they are identical to regular mounts) but we can - * at least be sure that it's read-only. In addition, to make sure that - * it's *our* bind-mount we check CLONED_BINARY_ENV. - */ - if (fstatfs(fd, &fsbuf) >= 0) - is_cloned |= (fsbuf.f_flags & MS_RDONLY); - - /* - * Okay, we're a tmpfile -- or we're currently running on RHEL <=7.6 - * which appears to have a borked backport of F_GET_SEALS. Either way, - * having a file which has no hardlinks indicates that we aren't using - * a host-side "runc" binary and this is something that a container - * cannot fake (because unlinking requires being able to resolve the - * path that you want to unlink). - */ - if (fstat(fd, &statbuf) >= 0) - is_cloned |= (statbuf.st_nlink == 0); - -out: - close(fd); - return is_cloned; -} - -/* Read a given file into a new buffer, and providing the length. */ -static char *read_file(char *path, size_t *length) -{ - int fd; - char buf[4096], *copy = NULL; - - if (!length) - return NULL; - - fd = open(path, O_RDONLY | O_CLOEXEC); - if (fd < 0) - return NULL; - - *length = 0; - for (;;) { - ssize_t n; - - n = read(fd, buf, sizeof(buf)); - if (n < 0) - goto error; - if (!n) - break; - - copy = must_realloc(copy, (*length + n) * sizeof(*copy)); - memcpy(copy + *length, buf, n); - *length += n; - } - close(fd); - return copy; - -error: - close(fd); - free(copy); - return NULL; -} - -/* - * A poor-man's version of "xargs -0". Basically parses a given block of - * NUL-delimited data, within the given length and adds a pointer to each entry - * to the array of pointers. - */ -static int parse_xargs(char *data, int data_length, char ***output) -{ - int num = 0; - char *cur = data; - - if (!data || *output != NULL) - return -1; - - while (cur < data + data_length) { - num++; - *output = must_realloc(*output, (num + 1) * sizeof(**output)); - (*output)[num - 1] = cur; - cur += strlen(cur) + 1; - } - (*output)[num] = NULL; - return num; -} - -/* - * "Parse" out argv from /proc/self/cmdline. - * This is necessary because we are running in a context where we don't have a - * main() that we can just get the arguments from. - */ -static int fetchve(char ***argv) -{ - char *cmdline = NULL; - size_t cmdline_size; - - cmdline = read_file("/proc/self/cmdline", &cmdline_size); - if (!cmdline) - goto error; - - if (parse_xargs(cmdline, cmdline_size, argv) <= 0) - goto error; - - return 0; - -error: - free(cmdline); - return -EINVAL; -} - enum { EFD_NONE = 0, EFD_MEMFD, @@ -499,12 +343,20 @@ static int clone_binary(void) struct stat statbuf = { }; size_t sent = 0; int fdtype = EFD_NONE; + char runcpath[PATH_MAX] = { 0 }; + char dmzpath[PATH_MAX] = { 0 }; execfd = make_execfd(&fdtype); if (execfd < 0 || fdtype == EFD_NONE) return -ENOTRECOVERABLE; - binfd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC); + if (readlink("/proc/self/exe", runcpath, PATH_MAX) < 1) + goto error; + + if (snprintf(dmzpath, PATH_MAX, "%s%s", runcpath, "-dmz") < 0) + goto error; + + binfd = open(dmzpath, O_RDONLY | O_CLOEXEC); if (binfd < 0) goto error; @@ -543,24 +395,19 @@ extern char **environ; int ensure_cloned_binary(void) { int execfd; - char **argv = NULL; - - /* Check that we're not self-cloned, and if we are then bail. */ - int cloned = is_self_cloned(); - if (cloned > 0 || cloned == -ENOTRECOVERABLE) - return cloned; - - if (fetchve(&argv) < 0) - return -EINVAL; execfd = clone_binary(); if (execfd < 0) return -EIO; - if (putenv(CLONED_BINARY_ENV "=1")) + char envString[PATH_MAX] = { 0 }; + if (sprintf(envString, "%d", execfd) < 0) + goto error; + + if (setenv("_LIBCONTAINER_DMZFD", envString, 1)) goto error; - fexecve(execfd, argv, environ); + return 0; error: close(execfd); return -ENOEXEC; diff --git a/libcontainer/setns_init_linux.go b/libcontainer/setns_init_linux.go index ac58190758a..4c43dc3ba89 100644 --- a/libcontainer/setns_init_linux.go +++ b/libcontainer/setns_init_linux.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "os" + "os/exec" "strconv" "github.com/opencontainers/selinux/go-selinux" @@ -23,6 +24,7 @@ type linuxSetnsInit struct { consoleSocket *os.File config *initConfig logFd int + dmzFd int } func (l *linuxSetnsInit) getSessionRingName() string { @@ -100,10 +102,16 @@ func (l *linuxSetnsInit) Init() error { } } logrus.Debugf("setns_init: about to exec") + // Close the log pipe fd so the parent's ForwardLogs can exit. if err := unix.Close(l.logFd); err != nil { return &os.PathError{Op: "close log pipe", Path: "fd " + strconv.Itoa(l.logFd), Err: err} } - return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) + entryPoint, err := exec.LookPath(l.config.Args[0]) + if err != nil { + return err + } + dmzArgs := []string{entryPoint} + return system.Fexecve(uintptr(l.dmzFd), append(dmzArgs, l.config.Args[1:]...), os.Environ()) } diff --git a/libcontainer/standard_init_linux.go b/libcontainer/standard_init_linux.go index f3d04282362..dc8070ba1cc 100644 --- a/libcontainer/standard_init_linux.go +++ b/libcontainer/standard_init_linux.go @@ -24,6 +24,7 @@ type linuxStandardInit struct { consoleSocket *os.File parentPid int fifoFd int + dmzFd int logFd int mountFds mountFds config *initConfig @@ -262,5 +263,10 @@ func (l *linuxStandardInit) Init() error { return err } - return system.Exec(name, l.config.Args[0:], os.Environ()) + entryPoint, err := exec.LookPath(l.config.Args[0]) + if err != nil { + return err + } + dmzArgs := []string{entryPoint} + return system.Fexecve(uintptr(l.dmzFd), append(dmzArgs, l.config.Args[1:]...), os.Environ()) } diff --git a/libcontainer/system/linux.go b/libcontainer/system/linux.go index e1d6eb18034..88aaf9a1985 100644 --- a/libcontainer/system/linux.go +++ b/libcontainer/system/linux.go @@ -6,6 +6,7 @@ package system import ( "os" "os/exec" + "syscall" "unsafe" "golang.org/x/sys/unix" @@ -49,6 +50,39 @@ func Exec(cmd string, args []string, env []string) error { } } +func Execveat(fd uintptr, pathname string, args []string, env []string, flags int) error { + pathnamep, err := syscall.BytePtrFromString(pathname) + if err != nil { + return err + } + + argv, err := syscall.SlicePtrFromStrings(args) + if err != nil { + return err + } + + envs, err := syscall.SlicePtrFromStrings(env) + if err != nil { + return err + } + + _, _, errno := syscall.Syscall6( + unix.SYS_EXECVEAT, + fd, + uintptr(unsafe.Pointer(pathnamep)), + uintptr(unsafe.Pointer(&argv[0])), + uintptr(unsafe.Pointer(&envs[0])), + uintptr(flags), + 0, + ) + + return errno +} + +func Fexecve(fd uintptr, args []string, env []string) error { + return Execveat(fd, "", args, env, unix.AT_EMPTY_PATH) +} + func SetParentDeathSignal(sig uintptr) error { if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil { return err From 7c7e825b06ea7c09a2060175e1846ce5f7eb323e Mon Sep 17 00:00:00 2001 From: lifubang Date: Tue, 15 Aug 2023 22:36:13 +0800 Subject: [PATCH 4/4] let unit test work after importing runc-dmz Signed-off-by: lifubang --- libcontainer/integration/utils_test.go | 19 +++++++++++++++++++ libcontainer/nsenter/nsenter_test.go | 26 ++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/libcontainer/integration/utils_test.go b/libcontainer/integration/utils_test.go index 780288ad02b..84a9a95980a 100644 --- a/libcontainer/integration/utils_test.go +++ b/libcontainer/integration/utils_test.go @@ -27,6 +27,16 @@ func init() { // Figure out path to get-images.sh. Note it won't work // in case the compiled test binary is moved elsewhere. _, ex, _, _ := runtime.Caller(0) + // make and copy runc-dmg + rootDir, err := filepath.Abs(filepath.Join(filepath.Dir(ex), "..", "..")) + if err != nil { + panic(err) + } + nowPath := getExeDir() + dmzMake, err := exec.Command("gcc", "-o", filepath.Join(nowPath, "integration.test-dmz"), "-static", filepath.Join(rootDir, "contrib/cmd/runc-dmz/runc-dmz.c")).CombinedOutput() + if err != nil { + panic(fmt.Errorf("make runc-dmz error %w (output: %s)", err, dmzMake)) + } getImages, err := filepath.Abs(filepath.Join(filepath.Dir(ex), "..", "..", "tests", "integration", "get-images.sh")) if err != nil { panic(err) @@ -48,6 +58,15 @@ func init() { } } +func getExeDir() string { + exePath, err := os.Executable() + if err != nil { + panic(err) + } + res, _ := filepath.EvalSymlinks(filepath.Dir(exePath)) + return res +} + func ptrInt(v int) *int { return &v } diff --git a/libcontainer/nsenter/nsenter_test.go b/libcontainer/nsenter/nsenter_test.go index 0cbf0aae61b..d63f9bd0336 100644 --- a/libcontainer/nsenter/nsenter_test.go +++ b/libcontainer/nsenter/nsenter_test.go @@ -8,6 +8,8 @@ import ( "io" "os" "os/exec" + "path/filepath" + "runtime" "strings" "testing" @@ -16,6 +18,29 @@ import ( "golang.org/x/sys/unix" ) +func init() { + _, ex, _, _ := runtime.Caller(0) + // make and copy runc-dmg + rootDir, err := filepath.Abs(filepath.Join(filepath.Dir(ex), "..", "..")) + if err != nil { + panic(err) + } + nowPath := getExeDir() + dmzMake, err := exec.Command("gcc", "-o", filepath.Join(nowPath, "nsenter.test-dmz"), "-static", filepath.Join(rootDir, "contrib/cmd/runc-dmz/runc-dmz.c")).CombinedOutput() + if err != nil { + panic(fmt.Errorf("make runc-dmz error %w (output: %s)", err, dmzMake)) + } +} + +func getExeDir() string { + exePath, err := os.Executable() + if err != nil { + panic(err) + } + res, _ := filepath.EvalSymlinks(filepath.Dir(exePath)) + return res +} + func TestNsenterValidPaths(t *testing.T) { args := []string{"nsenter-exec"} parent, child := newPipe(t) @@ -24,6 +49,7 @@ func TestNsenterValidPaths(t *testing.T) { // join pid ns of the current process fmt.Sprintf("pid:/proc/%d/ns/pid", os.Getpid()), } + fmt.Printf("=========os.Args[0] %s\n", os.Args[0]) cmd := &exec.Cmd{ Path: os.Args[0], Args: args,