|
| 1 | +package dmz |
| 2 | + |
| 3 | +import ( |
| 4 | + "errors" |
| 5 | + "fmt" |
| 6 | + "io" |
| 7 | + "os" |
| 8 | + |
| 9 | + "github.com/sirupsen/logrus" |
| 10 | + "golang.org/x/sys/unix" |
| 11 | + |
| 12 | + "github.com/opencontainers/runc/libcontainer/system" |
| 13 | +) |
| 14 | + |
| 15 | +type SealFunc func(**os.File) error |
| 16 | + |
| 17 | +var ( |
| 18 | + _ SealFunc = sealMemfd |
| 19 | + _ SealFunc = sealFile |
| 20 | +) |
| 21 | + |
| 22 | +const baseMemfdSeals = unix.F_SEAL_SEAL | unix.F_SEAL_SHRINK | unix.F_SEAL_GROW | unix.F_SEAL_WRITE |
| 23 | + |
| 24 | +func sealMemfd(f **os.File) error { |
| 25 | + if err := (*f).Chmod(0o511); err != nil { |
| 26 | + return err |
| 27 | + } |
| 28 | + // Try to set the newer memfd sealing flags, but we ignore |
| 29 | + // errors because they are not needed and we want to continue |
| 30 | + // to work on older kernels. |
| 31 | + fd := (*f).Fd() |
| 32 | + // F_SEAL_FUTURE_WRITE -- Linux 5.1 |
| 33 | + _, _ = unix.FcntlInt(fd, unix.F_ADD_SEALS, unix.F_SEAL_FUTURE_WRITE) |
| 34 | + // F_SEAL_EXEC -- Linux 6.3 |
| 35 | + const F_SEAL_EXEC = 0x20 //nolint:revive // this matches the unix.* name |
| 36 | + _, _ = unix.FcntlInt(fd, unix.F_ADD_SEALS, F_SEAL_EXEC) |
| 37 | + // Apply all original memfd seals. |
| 38 | + _, err := unix.FcntlInt(fd, unix.F_ADD_SEALS, baseMemfdSeals) |
| 39 | + return os.NewSyscallError("fcntl(F_ADD_SEALS)", err) |
| 40 | +} |
| 41 | + |
| 42 | +// Memfd creates a sealable executable memfd (supported since Linux 3.17). |
| 43 | +func Memfd(comment string) (*os.File, SealFunc, error) { |
| 44 | + file, err := system.ExecutableMemfd("runc_cloned:"+comment, unix.MFD_ALLOW_SEALING|unix.MFD_CLOEXEC) |
| 45 | + return file, sealMemfd, err |
| 46 | +} |
| 47 | + |
| 48 | +func sealFile(f **os.File) error { |
| 49 | + if err := (*f).Chmod(0o511); err != nil { |
| 50 | + return err |
| 51 | + } |
| 52 | + // When sealing an O_TMPFILE-style descriptor we need to |
| 53 | + // re-open the path as O_PATH to clear the existing write |
| 54 | + // handle we have. |
| 55 | + opath, err := os.OpenFile(fmt.Sprintf("/proc/self/fd/%d", (*f).Fd()), unix.O_PATH|unix.O_CLOEXEC, 0) |
| 56 | + if err != nil { |
| 57 | + return fmt.Errorf("reopen tmpfile: %w", err) |
| 58 | + } |
| 59 | + _ = (*f).Close() |
| 60 | + *f = opath |
| 61 | + return nil |
| 62 | +} |
| 63 | + |
| 64 | +// otmpfile creates an open(O_TMPFILE) file in the given directory (supported |
| 65 | +// since Linux 3.11). |
| 66 | +func otmpfile(dir string) (*os.File, SealFunc, error) { |
| 67 | + file, err := os.OpenFile(dir, unix.O_TMPFILE|unix.O_RDWR|unix.O_EXCL|unix.O_CLOEXEC, 0o700) |
| 68 | + if err != nil { |
| 69 | + return nil, nil, fmt.Errorf("O_TMPFILE creation failed: %w", err) |
| 70 | + } |
| 71 | + // Make sure we actually got an unlinked O_TMPFILE descriptor. |
| 72 | + var stat unix.Stat_t |
| 73 | + if err := unix.Fstat(int(file.Fd()), &stat); err != nil { |
| 74 | + file.Close() |
| 75 | + return nil, nil, fmt.Errorf("cannot fstat O_TMPFILE fd: %w", err) |
| 76 | + } else if stat.Nlink != 0 { |
| 77 | + file.Close() |
| 78 | + return nil, nil, errors.New("O_TMPFILE has non-zero nlink") |
| 79 | + } |
| 80 | + return file, sealFile, err |
| 81 | +} |
| 82 | + |
| 83 | +// mktemp creates a classic unlinked file in the given directory. |
| 84 | +func mktemp(dir string) (*os.File, SealFunc, error) { |
| 85 | + file, err := os.CreateTemp(dir, "runc.") |
| 86 | + if err != nil { |
| 87 | + return nil, nil, err |
| 88 | + } |
| 89 | + // Unlink the file and verify it was unlinked. |
| 90 | + if err := os.Remove(file.Name()); err != nil { |
| 91 | + return nil, nil, fmt.Errorf("unlinking classic tmpfile: %w", err) |
| 92 | + } |
| 93 | + var stat unix.Stat_t |
| 94 | + if err := unix.Fstat(int(file.Fd()), &stat); err != nil { |
| 95 | + return nil, nil, fmt.Errorf("cannot fstat classic tmpfile: %w", err) |
| 96 | + } else if stat.Nlink != 0 { |
| 97 | + return nil, nil, fmt.Errorf("classic tmpfile %s has non-zero nlink after unlink", file.Name()) |
| 98 | + } |
| 99 | + return file, sealFile, err |
| 100 | +} |
| 101 | + |
| 102 | +func getSealableFile(comment, tmpDir string) (file *os.File, sealFn SealFunc, err error) { |
| 103 | + // First, try an executable memfd (supported since Linux 3.17). |
| 104 | + file, sealFn, err = Memfd(comment) |
| 105 | + if err == nil { |
| 106 | + return |
| 107 | + } |
| 108 | + logrus.Debugf("memfd cloned binary failed, falling back to O_TMPFILE: %v", err) |
| 109 | + // Try to fallback to O_TMPFILE (supported since Linux 3.11). |
| 110 | + file, sealFn, err = otmpfile(tmpDir) |
| 111 | + if err == nil { |
| 112 | + return |
| 113 | + } |
| 114 | + logrus.Debugf("O_TMPFILE cloned binary failed, falling back to mktemp(): %v", err) |
| 115 | + // Finally, try a classic unlinked temporary file. |
| 116 | + file, sealFn, err = mktemp(tmpDir) |
| 117 | + if err == nil { |
| 118 | + return |
| 119 | + } |
| 120 | + return nil, nil, fmt.Errorf("could not create sealable file for cloned binary: %w", err) |
| 121 | +} |
| 122 | + |
| 123 | +// CloneBinary creates a "sealed" clone of a given binary, which can be used to |
| 124 | +// thwart attempts by the container process to gain access to host binaries |
| 125 | +// through procfs magic-link shenanigans. For more details on why this is |
| 126 | +// necessary, see CVE-2019-5736. |
| 127 | +func CloneBinary(src io.Reader, size int64, name, tmpDir string) (*os.File, error) { |
| 128 | + logrus.Debugf("cloning %s binary (%d bytes)", name, size) |
| 129 | + file, sealFn, err := getSealableFile(name, tmpDir) |
| 130 | + if err != nil { |
| 131 | + return nil, err |
| 132 | + } |
| 133 | + copied, err := io.Copy(file, src) |
| 134 | + if err != nil { |
| 135 | + file.Close() |
| 136 | + return nil, fmt.Errorf("copy binary: %w", err) |
| 137 | + } else if copied != size { |
| 138 | + file.Close() |
| 139 | + return nil, fmt.Errorf("copied binary size mismatch: %d != %d", copied, size) |
| 140 | + } |
| 141 | + if err := sealFn(&file); err != nil { |
| 142 | + file.Close() |
| 143 | + return nil, fmt.Errorf("could not seal fd: %w", err) |
| 144 | + } |
| 145 | + return file, nil |
| 146 | +} |
| 147 | + |
| 148 | +// IsCloned returns whether the given file can be guaranteed to be a safe exe. |
| 149 | +func IsCloned(exe *os.File) bool { |
| 150 | + seals, err := unix.FcntlInt(exe.Fd(), unix.F_GET_SEALS, 0) |
| 151 | + if err != nil { |
| 152 | + // /proc/self/exe is probably not a memfd |
| 153 | + logrus.Debugf("F_GET_SEALS on %s failed: %v", exe.Name(), err) |
| 154 | + return false |
| 155 | + } |
| 156 | + // The memfd must have all of the base seals applied. |
| 157 | + logrus.Debugf("checking %s memfd seals: 0x%x", exe.Name(), seals) |
| 158 | + return seals&baseMemfdSeals == baseMemfdSeals |
| 159 | +} |
| 160 | + |
| 161 | +// CloneSelfExe makes a clone of the current process's binary (through |
| 162 | +// /proc/self/exe). This binary can then be used for "runc init" in order to |
| 163 | +// make sure the container process can never resolve the original runc binary. |
| 164 | +// For more details on why this is necessary, see CVE-2019-5736. |
| 165 | +func CloneSelfExe(tmpDir string) (*os.File, error) { |
| 166 | + selfExe, err := os.Open("/proc/self/exe") |
| 167 | + if err != nil { |
| 168 | + return nil, fmt.Errorf("opening current binary: %w", err) |
| 169 | + } |
| 170 | + defer selfExe.Close() |
| 171 | + |
| 172 | + stat, err := selfExe.Stat() |
| 173 | + if err != nil { |
| 174 | + return nil, fmt.Errorf("checking /proc/self/exe size: %w", err) |
| 175 | + } |
| 176 | + size := stat.Size() |
| 177 | + |
| 178 | + return CloneBinary(selfExe, size, "/proc/self/exe", tmpDir) |
| 179 | +} |
| 180 | + |
| 181 | +// IsSelfExeCloned returns whether /proc/self/exe is a cloned binary that can |
| 182 | +// be guaranteed to be safe. This means that it must be a sealed memfd. Other |
| 183 | +// types of clones cannot be completely verified as safe. |
| 184 | +func IsSelfExeCloned() bool { |
| 185 | + selfExe, err := os.Open("/proc/self/exe") |
| 186 | + if err != nil { |
| 187 | + logrus.Debugf("open /proc/self/exe failed: %v", err) |
| 188 | + return false |
| 189 | + } |
| 190 | + defer selfExe.Close() |
| 191 | + return IsCloned(selfExe) |
| 192 | +} |
0 commit comments