Skip to content

Commit 51b7018

Browse files
ayushr2gvisor-bot
authored andcommitted
Support security.capability xattr for tmpfs.
This allows writing capabilities only to "security.capability" within the security namespace. In goferfs, this is disallowed to prevent writes from propagating to the underlying host filesystem. PiperOrigin-RevId: 756009863
1 parent 0bf11c1 commit 51b7018

File tree

11 files changed

+222
-58
lines changed

11 files changed

+222
-58
lines changed

images/basic/filecap/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@ FROM alpine
22

33
RUN apk add libcap
44

5-
RUN cp /bin/busybox /mnt/cat && setcap cap_net_admin+ep /mnt/cat
5+
RUN cp /bin/cat /mnt/cat && setcap cap_net_admin+ep /mnt/cat

pkg/abi/linux/capability.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,18 @@ func (c *VfsCapData) Inheritable() uint64 {
271271
return uint64(c.InheritableHi)<<32 | uint64(c.InheritableLo)
272272
}
273273

274+
// IsRevision2 returns true if c is v2.
275+
func (c *VfsCapData) IsRevision2() bool {
276+
return (c.MagicEtc & VFS_CAP_REVISION_MASK) == VFS_CAP_REVISION_2
277+
}
278+
279+
// ToString marshals c into bytes and returns it as a string.
280+
func (c *VfsCapData) ToString() string {
281+
buf := make([]byte, c.SizeBytes())
282+
c.MarshalUnsafe(buf)
283+
return string(buf)
284+
}
285+
274286
// VfsNsCapData is equivalent to Linux's struct vfs_ns_cap_data.
275287
//
276288
// +marshal
@@ -279,6 +291,34 @@ type VfsNsCapData struct {
279291
RootID uint32
280292
}
281293

294+
// ConvertToV3 converts c to v3 file capabilities.
295+
func (c *VfsNsCapData) ConvertToV3(rootid uint32) {
296+
c.RootID = rootid
297+
if c.IsRevision2() {
298+
// Change to v3 while retaining the effective bit.
299+
c.MagicEtc = VFS_CAP_REVISION_3 | c.MagicEtc&VFS_CAP_FLAGS_EFFECTIVE
300+
}
301+
}
302+
303+
// ConvertToV2 converts c to v2 file capabilities.
304+
func (c *VfsNsCapData) ConvertToV2() {
305+
c.RootID = 0
306+
if !c.IsRevision2() {
307+
// Change to v2 while retaining the effective bit.
308+
c.MagicEtc = VFS_CAP_REVISION_2 | c.MagicEtc&VFS_CAP_FLAGS_EFFECTIVE
309+
}
310+
}
311+
312+
// ToString marshals c into bytes and returns it as a string.
313+
func (c *VfsNsCapData) ToString() string {
314+
if c.IsRevision2() {
315+
return c.VfsCapData.ToString()
316+
}
317+
buf := make([]byte, c.SizeBytes())
318+
c.MarshalUnsafe(buf)
319+
return string(buf)
320+
}
321+
282322
// CapUserHeader is equivalent to Linux's cap_user_header_t.
283323
//
284324
// +marshal

pkg/abi/linux/xattr.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ const (
2626
XATTR_SECURITY_PREFIX = "security."
2727
XATTR_SECURITY_PREFIX_LEN = len(XATTR_SECURITY_PREFIX)
2828

29+
XATTR_SECURITY_CAPABILITY = XATTR_SECURITY_PREFIX + "capability"
30+
2931
XATTR_SYSTEM_PREFIX = "system."
3032
XATTR_SYSTEM_PREFIX_LEN = len(XATTR_SYSTEM_PREFIX)
3133

pkg/sentry/fsimpl/gofer/gofer.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1456,6 +1456,10 @@ func (d *dentry) checkXattrPermissions(creds *auth.Credentials, name string, ats
14561456
if strings.HasPrefix(name, linux.XATTR_SYSTEM_PREFIX) || strings.HasPrefix(name, linux.XATTR_TRUSTED_PREFIX) {
14571457
return linuxerr.EOPNOTSUPP
14581458
}
1459+
// Do not allow writes to the "security" namespace on the host filesystem.
1460+
if ats.MayWrite() && strings.HasPrefix(name, linux.XATTR_SECURITY_PREFIX) {
1461+
return linuxerr.EOPNOTSUPP
1462+
}
14591463
mode := linux.FileMode(d.mode.Load())
14601464
kuid := auth.KUID(d.uid.Load())
14611465
kgid := auth.KGID(d.gid.Load())

pkg/sentry/fsimpl/tmpfs/tmpfs.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.Virt
193193
allowXattrPrefix := map[string]struct{}{
194194
linux.XATTR_TRUSTED_PREFIX: {},
195195
linux.XATTR_USER_PREFIX: {},
196-
// The "security" namespace is allowed, but it always returns an error.
196+
// Only the "security.capability" xattr is supported.
197197
linux.XATTR_SECURITY_PREFIX: {},
198198
}
199199

@@ -876,7 +876,7 @@ func (i *inode) setXattr(creds *auth.Credentials, opts *vfs.SetXattrOptions) err
876876
if err := vfs.GenericCheckPermissions(creds, vfs.MayWrite, mode, kuid, kgid); err != nil {
877877
return err
878878
}
879-
return i.xattrs.SetXattr(creds, mode, kuid, opts)
879+
return i.xattrs.SetXattr(creds, mode, kuid, kgid, opts)
880880
}
881881

882882
func (i *inode) removeXattr(creds *auth.Credentials, name string) error {

pkg/sentry/kernel/auth/capability_set.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,77 @@ func CapsFromVfsCaps(capData linux.VfsNsCapData, creds *Credentials) (*Credentia
103103
return newCreds, nil
104104
}
105105

106+
// FixupVfsCapDataOnSet may convert the given value to v3 file capabilities. It
107+
// is analogous to security/commoncap.c:cap_convert_nscap().
108+
func FixupVfsCapDataOnSet(creds *Credentials, value string, kuid KUID, kgid KGID) (string, error) {
109+
vfsCaps, err := VfsCapDataOf([]byte(value))
110+
if err != nil {
111+
return "", err
112+
}
113+
if !creds.HasCapabilityOnFile(linux.CAP_SETFCAP, kuid, kgid) {
114+
return "", linuxerr.EPERM
115+
}
116+
if vfsCaps.IsRevision2() && creds.HasCapabilityIn(linux.CAP_SETFCAP, creds.UserNamespace.Root()) {
117+
// The user is privileged, allow the v2 write.
118+
return value, nil
119+
}
120+
// Linux does the following UID gymnastics:
121+
// 1. The userspace-provided rootID is relative to the caller's user
122+
// namespace. So vfsCaps.RootID is mapped down to KUID first.
123+
// 2. If this is an ID-mapped mount, the result is mapped up using the
124+
// ID-map and then down again using the filesystem's owning user
125+
// namespace (inode->i_sb->s_user_ns). We again have a KUID result.
126+
// 3. The result is mapped up using the filesystem's owning user namespace.
127+
//
128+
// The final result is saved in the xattr value at vfs_ns_cap_data->rootid.
129+
// Since gVisor does not support ID-mapped mounts and all filesystems are
130+
// owned by the initial user namespace, we can skip steps 2 and 3.
131+
rootID := creds.UserNamespace.MapToKUID(UID(vfsCaps.RootID))
132+
if !rootID.Ok() {
133+
return "", linuxerr.EINVAL
134+
}
135+
vfsCaps.ConvertToV3(uint32(rootID))
136+
return vfsCaps.ToString(), nil
137+
}
138+
139+
// FixupVfsCapDataOnGet may convert the given value to v2 file capabilities. It
140+
// is analogous to security/commoncap.c:cap_inode_getsecurity().
141+
func FixupVfsCapDataOnGet(creds *Credentials, value string) (string, error) {
142+
vfsCaps, err := VfsCapDataOf([]byte(value))
143+
if err != nil {
144+
return "", err
145+
}
146+
// Linux does the steps mentioned in FixupVfsCapDataOnSet in reverse. But
147+
// since gVisor does not support ID-mapped mounts and all filesystems are
148+
// owned by the initial user namespace, we only need to reverse step 1 here.
149+
rootID := KUID(vfsCaps.RootID)
150+
mappedRoot := creds.UserNamespace.MapFromKUID(rootID)
151+
if mappedRoot.Ok() && mappedRoot != RootUID {
152+
// Return this as v3.
153+
vfsCaps.ConvertToV3(uint32(mappedRoot))
154+
return vfsCaps.ToString(), nil
155+
}
156+
if !rootIDOwnsCurrentUserns(creds, rootID) {
157+
return "", linuxerr.EOVERFLOW
158+
}
159+
// Return this as v2.
160+
vfsCaps.ConvertToV2()
161+
return vfsCaps.ToString(), nil
162+
}
163+
164+
// Analogous to security/commoncap.c:rootid_owns_currentns().
165+
func rootIDOwnsCurrentUserns(creds *Credentials, rootID KUID) bool {
166+
if !rootID.Ok() {
167+
return false
168+
}
169+
for ns := creds.UserNamespace; ns != nil; ns = ns.parent {
170+
if ns.MapFromKUID(rootID) == RootUID {
171+
return true
172+
}
173+
}
174+
return false
175+
}
176+
106177
// TaskCapabilities represents all the capability sets for a task. Each of these
107178
// sets is explained in greater detail in capabilities(7).
108179
type TaskCapabilities struct {

pkg/sentry/kernel/auth/credentials.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,13 @@ func (c *Credentials) HasCapability(cp linux.Capability) bool {
196196
return c.HasCapabilityIn(cp, c.UserNamespace)
197197
}
198198

199+
// HasCapabilityOnFile returns true if creds has the given capability with
200+
// respect to a file with the given owning UID and GID, consistent with Linux's
201+
// kernel/capability.c:capable_wrt_inode_uidgid().
202+
func (c *Credentials) HasCapabilityOnFile(cp linux.Capability, kuid KUID, kgid KGID) bool {
203+
return c.HasCapability(cp) && c.UserNamespace.MapFromKUID(kuid).Ok() && c.UserNamespace.MapFromKGID(kgid).Ok()
204+
}
205+
199206
// UseUID checks that c can use uid in its user namespace, then translates it
200207
// to the root user namespace.
201208
//

pkg/sentry/loader/loader.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,6 @@ import (
3838
"gvisor.dev/gvisor/pkg/usermem"
3939
)
4040

41-
const (
42-
securityCapability = linux.XATTR_SECURITY_PREFIX + "capability"
43-
)
44-
4541
// LoadArgs holds specifications for an executable file to be loaded.
4642
type LoadArgs struct {
4743
// MemoryManager is the memory manager to load the executable into.
@@ -268,7 +264,7 @@ func Load(ctx context.Context, args LoadArgs, extraAuxv []arch.AuxEntry, vdso *V
268264
return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("failed to load %s: %v", args.Filename, err), syserr.FromError(err).ToLinux())
269265
}
270266
defer file.DecRef(ctx)
271-
xattr, err := file.GetXattr(ctx, &vfs.GetXattrOptions{Name: securityCapability, Size: linux.XATTR_CAPS_SZ_3})
267+
xattr, err := file.GetXattr(ctx, &vfs.GetXattrOptions{Name: linux.XATTR_SECURITY_CAPABILITY, Size: linux.XATTR_CAPS_SZ_3})
272268
switch {
273269
case linuxerr.Equals(linuxerr.ENODATA, err), linuxerr.Equals(linuxerr.ENOTSUP, err):
274270
xattr = ""

pkg/sentry/vfs/memxattr/xattr.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,14 @@ func (x *SimpleExtendedAttributes) GetXattr(creds *auth.Credentials, mode linux.
5656
if opts.Size != 0 && uint64(len(value)) > opts.Size {
5757
return "", linuxerr.ERANGE
5858
}
59+
if opts.Name == linux.XATTR_SECURITY_CAPABILITY {
60+
return auth.FixupVfsCapDataOnGet(creds, value)
61+
}
5962
return value, nil
6063
}
6164

6265
// SetXattr sets 'value' at 'name'.
63-
func (x *SimpleExtendedAttributes) SetXattr(creds *auth.Credentials, mode linux.FileMode, kuid auth.KUID, opts *vfs.SetXattrOptions) error {
66+
func (x *SimpleExtendedAttributes) SetXattr(creds *auth.Credentials, mode linux.FileMode, kuid auth.KUID, kgid auth.KGID, opts *vfs.SetXattrOptions) error {
6467
if err := vfs.CheckXattrPermissions(creds, vfs.MayWrite, mode, kuid, opts.Name); err != nil {
6568
return err
6669
}
@@ -82,6 +85,13 @@ func (x *SimpleExtendedAttributes) SetXattr(creds *auth.Credentials, mode linux.
8285
return linuxerr.ENODATA
8386
}
8487

88+
if opts.Name == linux.XATTR_SECURITY_CAPABILITY {
89+
var err error
90+
opts.Value, err = auth.FixupVfsCapDataOnSet(creds, opts.Value, kuid, kgid)
91+
if err != nil {
92+
return err
93+
}
94+
}
8595
x.xattrs[opts.Name] = opts.Value
8696
return nil
8797
}

pkg/sentry/vfs/permissions.go

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -209,13 +209,13 @@ func CheckSetStat(ctx context.Context, creds *auth.Credentials, opts *SetStatOpt
209209
}
210210
if stat.Mask&linux.STATX_UID != 0 {
211211
if !((creds.EffectiveKUID == kuid && auth.KUID(stat.UID) == kuid) ||
212-
HasCapabilityOnFile(creds, linux.CAP_CHOWN, kuid, kgid)) {
212+
creds.HasCapabilityOnFile(linux.CAP_CHOWN, kuid, kgid)) {
213213
return linuxerr.EPERM
214214
}
215215
}
216216
if stat.Mask&linux.STATX_GID != 0 {
217217
if !((creds.EffectiveKUID == kuid && creds.InGroup(auth.KGID(stat.GID))) ||
218-
HasCapabilityOnFile(creds, linux.CAP_CHOWN, kuid, kgid)) {
218+
creds.HasCapabilityOnFile(linux.CAP_CHOWN, kuid, kgid)) {
219219
return linuxerr.EPERM
220220
}
221221
}
@@ -249,7 +249,7 @@ func CheckDeleteSticky(creds *auth.Credentials, parentMode linux.FileMode, paren
249249
}
250250
if creds.EffectiveKUID == childKUID ||
251251
creds.EffectiveKUID == parentKUID ||
252-
HasCapabilityOnFile(creds, linux.CAP_FOWNER, childKUID, childKGID) {
252+
creds.HasCapabilityOnFile(linux.CAP_FOWNER, childKUID, childKGID) {
253253
return nil
254254
}
255255
return linuxerr.EPERM
@@ -265,13 +265,6 @@ func CanActAsOwner(creds *auth.Credentials, kuid auth.KUID) bool {
265265
return creds.HasCapability(linux.CAP_FOWNER) && creds.UserNamespace.MapFromKUID(kuid).Ok()
266266
}
267267

268-
// HasCapabilityOnFile returns true if creds has the given capability with
269-
// respect to a file with the given owning UID and GID, consistent with Linux's
270-
// kernel/capability.c:capable_wrt_inode_uidgid().
271-
func HasCapabilityOnFile(creds *auth.Credentials, cp linux.Capability, kuid auth.KUID, kgid auth.KGID) bool {
272-
return creds.HasCapability(cp) && creds.UserNamespace.MapFromKUID(kuid).Ok() && creds.UserNamespace.MapFromKGID(kgid).Ok()
273-
}
274-
275268
// CheckLimit enforces file size rlimits. It returns error if the write
276269
// operation must not proceed. Otherwise it returns the max length allowed to
277270
// without violating the limit.
@@ -297,6 +290,8 @@ func CheckLimit(ctx context.Context, offset, size int64) (int64, error) {
297290
// must be returned by filesystem implementations.
298291
// - Does not do inode permission checks. Filesystem implementations should
299292
// handle inode permission checks as they may differ across implementations.
293+
// - Writes in security namespace are not supported, except for writes to
294+
// "security.capability", which are required to support file capabilities.
300295
func CheckXattrPermissions(creds *auth.Credentials, ats AccessTypes, mode linux.FileMode, kuid auth.KUID, name string) error {
301296
switch {
302297
case strings.HasPrefix(name, linux.XATTR_TRUSTED_PREFIX):
@@ -327,6 +322,9 @@ func CheckXattrPermissions(creds *auth.Credentials, ats AccessTypes, mode linux.
327322
if ats.MayRead() {
328323
return nil
329324
}
325+
if name == linux.XATTR_SECURITY_CAPABILITY {
326+
return nil
327+
}
330328
return linuxerr.EOPNOTSUPP
331329
}
332330
return nil

0 commit comments

Comments
 (0)