Skip to content

Commit d40b343

Browse files
committed
rootfs: switch to fd-based handling of mountpoint targets
An attacker could race with us during mount configuration in order to trick us into mounting over an unexpected path. This would bypass checkProcMount() and would allow for security profiles to be left unapplied by mounting over /proc/self/attr/... (or even more serious outcomes such as killing the entire system by tricking runc into writing strings to /proc/sysrq-trigger). This is a larger issue with our current mount infrastructure, and the ideal solution would be to rewrite it all to be fd-based (which would also allow us to support the "new" mount API, which also avoids a bunch of other issues with mount(8)). However, such a rewrite is not really workable as a security fix, so this patch is a bit of a compromise approach to fix the issue while also moving us a bit towards that eventual end-goal. The core issue in CVE-2025-52881 is that we currently use the (insecure) SecureJoin to re-resolve mountpoint target paths multiple times during mounting. Rather than generating a string from createMountpoint(), we instead open an *os.File handle to the target mountpoint directly and then operate on that handle. This will make it easier to remove utils.WithProcfd() and rework mountViaFds() in the future. The only real issue we need to work around is that we need to re-open the mount target after doing the mount in order to get a handle to the mountpoint -- pathrs.Reopen() doesn't work in this case (it just re-opens the inode under the mountpoint) so we need to do a naive re-open using the full path. Note that if we used move_mount(2) this wouldn't be a problem because we would have a handle to the mountpoint itself. Note that this is still somewhat of a temporary solution -- ideally mountViaFds would use *os.File directly to let us avoid some other issues with using bare /proc/... paths, as well as also letting us more easily use the new mount API on modern kernels. Fixes: GHSA-cgrx-mc8f-2prm CVE-2025-52881 Co-developed-by: lifubang <[email protected]> Signed-off-by: Aleksa Sarai <[email protected]>
1 parent 4b37cd9 commit d40b343

File tree

6 files changed

+267
-105
lines changed

6 files changed

+267
-105
lines changed

internal/pathrs/root_pathrslite.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
/*
3+
* Copyright (C) 2024-2025 Aleksa Sarai <[email protected]>
4+
* Copyright (C) 2024-2025 SUSE LLC
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package pathrs
20+
21+
import (
22+
"fmt"
23+
"os"
24+
"path/filepath"
25+
26+
"github.com/cyphar/filepath-securejoin/pathrs-lite"
27+
"golang.org/x/sys/unix"
28+
29+
"github.com/opencontainers/runc/internal/linux"
30+
)
31+
32+
// OpenInRoot opens the given path inside the root with the provided flags. It
33+
// is effectively shorthand for [securejoin.OpenInRoot] followed by
34+
// [securejoin.Reopen].
35+
func OpenInRoot(root, subpath string, flags int) (*os.File, error) {
36+
handle, err := pathrs.OpenInRoot(root, subpath)
37+
if err != nil {
38+
return nil, err
39+
}
40+
defer handle.Close()
41+
return pathrs.Reopen(handle, flags)
42+
}
43+
44+
// CreateInRoot creates a new file inside a root (as well as any missing parent
45+
// directories) and returns a handle to said file. This effectively has
46+
// open(O_CREAT|O_NOFOLLOW) semantics. If you want the creation to use O_EXCL,
47+
// include it in the passed flags. The fileMode argument uses unix.* mode bits,
48+
// *not* os.FileMode.
49+
func CreateInRoot(root, subpath string, flags int, fileMode uint32) (*os.File, error) {
50+
dir, filename := filepath.Split(subpath)
51+
if filepath.Join("/", filename) == "/" {
52+
return nil, fmt.Errorf("create in root subpath %q has bad trailing component %q", subpath, filename)
53+
}
54+
55+
dirFd, err := MkdirAllInRootOpen(root, dir, 0o755)
56+
if err != nil {
57+
return nil, err
58+
}
59+
defer dirFd.Close()
60+
61+
// We know that the filename does not have any "/" components, and that
62+
// dirFd is inside the root. O_NOFOLLOW will stop us from following
63+
// trailing symlinks, so this is safe to do. libpathrs's Root::create_file
64+
// works the same way.
65+
flags |= unix.O_CREAT | unix.O_NOFOLLOW
66+
fd, err := linux.Openat(int(dirFd.Fd()), filename, flags, fileMode)
67+
if err != nil {
68+
return nil, err
69+
}
70+
return os.NewFile(uintptr(fd), root+"/"+subpath), nil
71+
}

libcontainer/criu_linux.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -582,8 +582,12 @@ func (c *Container) prepareCriuRestoreMounts(mounts []*configs.Mount) error {
582582
if isOnTmpfs(m.Destination, mounts) {
583583
continue
584584
}
585-
if _, err := createMountpoint(c.config.Rootfs, mountEntry{Mount: m}); err != nil {
586-
return fmt.Errorf("create criu restore mountpoint for %s mount: %w", m.Destination, err)
585+
me := mountEntry{Mount: m}
586+
if err := me.createOpenMountpoint(c.config.Rootfs); err != nil {
587+
return fmt.Errorf("create criu restore mountpoint for %s mount: %w", me.Destination, err)
588+
}
589+
if me.dstFile != nil {
590+
defer me.dstFile.Close()
587591
}
588592
// If the mount point is a bind mount, we need to mount
589593
// it now so that runc can create the necessary mount
@@ -595,13 +599,20 @@ func (c *Container) prepareCriuRestoreMounts(mounts []*configs.Mount) error {
595599
// because during initial container creation mounts are
596600
// set up in the order they are configured.
597601
if m.Device == "bind" {
598-
if err := utils.WithProcfd(c.config.Rootfs, m.Destination, func(dstFd string) error {
602+
if err := utils.WithProcfdFile(me.dstFile, func(dstFd string) error {
599603
return mountViaFds(m.Source, nil, m.Destination, dstFd, "", unix.MS_BIND|unix.MS_REC, "")
600604
}); err != nil {
601605
return err
602606
}
603607
umounts = append(umounts, m.Destination)
604608
}
609+
if me.dstFile != nil {
610+
// As this is being done in a loop, the defer earlier will be
611+
// delayed until all mountpoints are handled -- for a config with
612+
// many mountpoints this could result in a lot of open files. So we
613+
// opportunistically close the file as well as deferring it.
614+
_ = me.dstFile.Close()
615+
}
605616
}
606617
return nil
607618
}

0 commit comments

Comments
 (0)