Skip to content

Commit a178e09

Browse files
ayushr2gvisor-bot
authored andcommitted
Fix handling of file capabilities.
All the following changes are compatible with what Linux does. - Do not attempt to apply file capabilities when it is not present. - If the rootID specified in v3 file capability struct does not own the current userns, then file capabilities are not applied. - While applying file capabilities, EPERM is only returned if VFS_CAP_FLAGS_EFFECTIVE is set. - Re-raise capabilities of root user as per capabilities(7). Compare security/commoncap.c:handle_privileged_root(). PiperOrigin-RevId: 761725759
1 parent 2f687a2 commit a178e09

File tree

13 files changed

+308
-220
lines changed

13 files changed

+308
-220
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ iptables-tests: load-iptables $(RUNTIME_BIN)
417417
@$(call install_runtime,$(RUNTIME),--net-raw)
418418
@$(call test_runtime,$(RUNTIME),--test_env=TEST_NET_RAW=true //test/iptables:iptables_test)
419419
@$(call install_runtime,$(RUNTIME)-nftables,--net-raw --reproduce-nftables)
420-
@$(call test_runtime,$(RUNTIME)-nftables, --test_output=all //test/iptables:nftables_test --test_arg=$(RUNTIME)-nftables)
420+
@$(call test_runtime,$(RUNTIME)-nftables,--test_env=TEST_NET_RAW=true --test_output=all //test/iptables:nftables_test)
421421
.PHONY: iptables-tests
422422

423423
packetdrill-tests: load-packetdrill $(RUNTIME_BIN)

pkg/sentry/kernel/auth/BUILD

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,10 @@ go_library(
8888
"user_namespace_mutex.go",
8989
],
9090
marshal = True,
91-
visibility = ["//pkg/sentry:internal"],
91+
visibility = [
92+
"//pkg/sentry:internal",
93+
"//test/e2e:__subpackages__",
94+
],
9295
deps = [
9396
"//pkg/abi/linux",
9497
"//pkg/bits",

pkg/sentry/kernel/auth/capability_set.go

Lines changed: 77 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,16 @@ func CapabilitySetOfMany(cps []linux.Capability) CapabilitySet {
4343
return CapabilitySet(cs)
4444
}
4545

46+
// Add adds the given capability to the CapabilitySet.
47+
func (cs *CapabilitySet) Add(cp linux.Capability) {
48+
*cs |= CapabilitySetOf(cp)
49+
}
50+
51+
// Clear removes the given capability from the CapabilitySet.
52+
func (cs *CapabilitySet) Clear(cp linux.Capability) {
53+
*cs &= ^CapabilitySetOf(cp)
54+
}
55+
4656
// VfsCapDataOf returns a VfsCapData containing the file capabilities for the given slice of bytes.
4757
// For each field of the cap data, which are in the structure of either vfs_cap_data or vfs_ns_cap_data,
4858
// the bytes are ordered in little endian.
@@ -68,39 +78,28 @@ func VfsCapDataOf(data []byte) (linux.VfsNsCapData, error) {
6878
return capData, nil
6979
}
7080

71-
// CapsFromVfsCaps returns a copy of the given creds with new capability sets
72-
// by applying the file capability that is specified by capData.
73-
func CapsFromVfsCaps(capData linux.VfsNsCapData, creds *Credentials) (*Credentials, error) {
74-
// If the real or effective user ID of the process is root,
75-
// the file inheritable and permitted sets are ignored from
76-
// `Capabilities and execution of programs by root` at capabilities(7).
77-
if root := creds.UserNamespace.MapToKUID(RootUID); creds.EffectiveKUID == root || creds.RealKUID == root {
78-
return creds, nil
79-
}
80-
effective := (capData.MagicEtc & linux.VFS_CAP_FLAGS_EFFECTIVE) > 0
81-
permittedCaps := (CapabilitySet(capData.Permitted()) & creds.BoundingCaps) |
82-
(CapabilitySet(capData.Inheritable()) & creds.InheritableCaps)
83-
// P'(effective) = effective ? P'(permitted) : P'(ambient).
84-
// The ambient capabilities has not supported yet in gVisor,
85-
// set effective capabilities to 0 when effective bit is false.
86-
effectiveCaps := CapabilitySet(0)
87-
if effective {
88-
effectiveCaps = permittedCaps
89-
}
90-
// Insufficient to execute correctly.
91-
if (CapabilitySet(capData.Permitted()) & ^permittedCaps) != 0 {
92-
return nil, linuxerr.EPERM
93-
}
94-
// If the capabilities don't change, it will return the creds'
95-
// original copy.
96-
if creds.PermittedCaps == permittedCaps && creds.EffectiveCaps == effectiveCaps {
97-
return creds, nil
98-
}
99-
// The credentials object is immutable.
100-
newCreds := creds.Fork()
101-
newCreds.PermittedCaps = permittedCaps
102-
newCreds.EffectiveCaps = effectiveCaps
103-
return newCreds, nil
81+
// HandleVfsCaps updates creds based on the given vfsCaps. It returns two
82+
// booleans; the first indicates whether the effective flag is set, and the second
83+
// second indicates whether the file capability is applied.
84+
func HandleVfsCaps(vfsCaps linux.VfsNsCapData, creds *Credentials) (bool, bool, error) {
85+
// gVisor does not support ID-mapped mounts and all filesystems are owned by
86+
// the initial user namespace. So we an directly cast the root ID to KUID.
87+
rootID := KUID(vfsCaps.RootID)
88+
if !rootIDOwnsCurrentUserns(creds, rootID) {
89+
// Linux skips vfs caps in this situation.
90+
return false, false, nil
91+
}
92+
// Note that ambient capabilities are not yet supported in gVisor.
93+
// P'(permitted) = (P(inheritable) & F(inheritable)) | (F(permitted) & P(bounding)) | P'(ambient)
94+
creds.PermittedCaps = (CapabilitySet(vfsCaps.Permitted()) & creds.BoundingCaps) |
95+
(CapabilitySet(vfsCaps.Inheritable()) & creds.InheritableCaps)
96+
effective := (vfsCaps.MagicEtc & linux.VFS_CAP_FLAGS_EFFECTIVE) > 0
97+
// Insufficient to execute correctly. Linux only returns EPERM when effective
98+
// flag is set.
99+
if effective && (CapabilitySet(vfsCaps.Permitted()) & ^creds.PermittedCaps) != 0 {
100+
return effective, true, linuxerr.EPERM
101+
}
102+
return effective, true, nil
104103
}
105104

106105
// FixupVfsCapDataOnSet may convert the given value to v3 file capabilities. It
@@ -174,6 +173,51 @@ func rootIDOwnsCurrentUserns(creds *Credentials, rootID KUID) bool {
174173
return false
175174
}
176175

176+
// HandlePrivilegedRoot updates creds for a privileged root user as per
177+
// `Capabilities and execution of programs by root` in capabilities(7).
178+
// It returns true if the file effective bit should be considered set.
179+
func HandlePrivilegedRoot(creds *Credentials, hasVFSCaps bool, filename string) bool {
180+
// gVisor currently does not support SECURE_NOROOT secure bit since
181+
// PR_SET_SECUREBITS is not supported. So no need to check here.
182+
root := creds.UserNamespace.MapToKUID(RootUID)
183+
if hasVFSCaps && creds.RealKUID != root && creds.EffectiveKUID == root {
184+
log.Warningf("File %q has both SUID bit and file capabilities set, not raising all capabilities.", filename)
185+
return false
186+
}
187+
if creds.RealKUID == root || creds.EffectiveKUID == root {
188+
// P'(permitted) = P(inheritable) | P(bounding)
189+
creds.PermittedCaps = creds.BoundingCaps | creds.InheritableCaps
190+
}
191+
// Linux only sets the effective bit if the effective KUID is root.
192+
return creds.EffectiveKUID == root
193+
}
194+
195+
// UpdateCredsForNewTask updates creds for a new task as per capabilities(7).
196+
func UpdateCredsForNewTask(creds *Credentials, fileCaps string, filename string) error {
197+
// Clear the permitted capability set. It is initialized below via
198+
// HandleVfsCaps() and HandlePrivilegedRoot().
199+
creds.PermittedCaps = 0
200+
hasVFSCaps := false
201+
setEffective := false
202+
if len(fileCaps) != 0 {
203+
vfsCaps, err := VfsCapDataOf([]byte(fileCaps))
204+
if err != nil {
205+
return err
206+
}
207+
setEffective, hasVFSCaps, err = HandleVfsCaps(vfsCaps, creds)
208+
if err != nil {
209+
return err
210+
}
211+
}
212+
setEffective = HandlePrivilegedRoot(creds, hasVFSCaps, filename) || setEffective
213+
// P'(effective) = effective ? P'(permitted) : P'(ambient).
214+
creds.EffectiveCaps = 0
215+
if setEffective {
216+
creds.EffectiveCaps = creds.PermittedCaps
217+
}
218+
return nil
219+
}
220+
177221
// TaskCapabilities represents all the capability sets for a task. Each of these
178222
// sets is explained in greater detail in capabilities(7).
179223
type TaskCapabilities struct {

pkg/sentry/kernel/auth/capability_set_test.go

Lines changed: 35 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -21,22 +21,14 @@ import (
2121
"gvisor.dev/gvisor/pkg/errors/linuxerr"
2222
)
2323

24-
// capsEquals returns true when the given creds' capabilities match the given caps.
25-
func capsEquals(creds *Credentials, caps TaskCapabilities) bool {
26-
return creds.PermittedCaps == caps.PermittedCaps &&
27-
creds.InheritableCaps == caps.InheritableCaps &&
28-
creds.EffectiveCaps == caps.EffectiveCaps &&
29-
creds.BoundingCaps == caps.BoundingCaps
30-
}
31-
32-
// credentialsWithCaps returns a copy of creds with the given capabilities.
33-
func credentialsWithCaps(creds *Credentials, permittedCaps, inheritableCaps, effectiveCaps, boundingCaps CapabilitySet) *Credentials {
34-
newCreds := creds.Fork()
35-
newCreds.PermittedCaps = permittedCaps
36-
newCreds.InheritableCaps = inheritableCaps
37-
newCreds.EffectiveCaps = effectiveCaps
38-
newCreds.BoundingCaps = boundingCaps
39-
return newCreds
24+
// credentialsWithCaps creates a credentials object with the given capabilities.
25+
func credentialsWithCaps(inheritable, bounding CapabilitySet) *Credentials {
26+
creds := NewRootCredentials(NewRootUserNamespace())
27+
creds.PermittedCaps = 0
28+
creds.InheritableCaps = inheritable
29+
creds.EffectiveCaps = 0
30+
creds.BoundingCaps = bounding
31+
return creds
4032
}
4133

4234
func vfsNsCapDataFrom(effective bool, rootid uint32, permitted, inheritable CapabilitySet) linux.VfsNsCapData {
@@ -64,102 +56,68 @@ func vfsCapDataFrom(effective bool, permitted, inheritable CapabilitySet) linux.
6456

6557
func TestCapsFromVfsCaps(t *testing.T) {
6658
for _, tst := range []struct {
67-
name string
68-
capData linux.VfsNsCapData
69-
creds *Credentials
70-
wantCaps TaskCapabilities
71-
wantErr error
59+
name string
60+
capData linux.VfsNsCapData
61+
creds *Credentials
62+
wantPermitted CapabilitySet
63+
wantEffective bool
64+
wantErr error
7265
}{
7366
{
74-
name: "TestRootCredential",
67+
name: "TestSamePermittedAndInheritableCaps",
7568
capData: vfsCapDataFrom(
7669
true, // effective
7770
CapabilitySetOf(linux.CAP_NET_ADMIN), // permitted
7871
CapabilitySetOf(linux.CAP_NET_ADMIN)), // inheritable
79-
creds: credentialsWithCaps(
80-
NewRootCredentials(NewRootUserNamespace()),
81-
AllCapabilities,
82-
CapabilitySetOf(linux.CAP_NET_RAW),
83-
AllCapabilities,
84-
CapabilitySetOf(linux.CAP_SYSLOG)),
85-
wantCaps: TaskCapabilities{
86-
PermittedCaps: AllCapabilities,
87-
InheritableCaps: CapabilitySetOf(linux.CAP_NET_RAW),
88-
EffectiveCaps: AllCapabilities,
89-
BoundingCaps: CapabilitySetOf(linux.CAP_SYSLOG),
90-
},
72+
creds: credentialsWithCaps(AllCapabilities, AllCapabilities),
73+
wantPermitted: CapabilitySetOf(linux.CAP_NET_ADMIN),
74+
wantEffective: true,
9175
},
9276
{
93-
name: "TestPermittedAndInheritableCaps",
77+
name: "TestDifferentPermittedAndInheritableCaps",
9478
capData: vfsCapDataFrom(
9579
true, // effective
9680
CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETUID}), // permitted
9781
CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETGID})), // inheritable
98-
creds: credentialsWithCaps(
99-
NewUserCredentials(123, 321, nil, nil, NewRootUserNamespace()),
100-
AllCapabilities,
101-
AllCapabilities,
102-
AllCapabilities,
103-
AllCapabilities),
104-
wantCaps: TaskCapabilities{
105-
PermittedCaps: CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETUID, linux.CAP_SETGID}),
106-
InheritableCaps: AllCapabilities,
107-
EffectiveCaps: CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETUID, linux.CAP_SETGID}),
108-
BoundingCaps: AllCapabilities,
109-
},
82+
creds: credentialsWithCaps(AllCapabilities, AllCapabilities),
83+
wantPermitted: CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETUID, linux.CAP_SETGID}),
84+
wantEffective: true,
11085
},
11186
{
11287
name: "TestEffectiveBitOff",
11388
capData: vfsCapDataFrom(
11489
false, // effective
11590
CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETUID}), // permitted
11691
CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETGID})), // inheritable
117-
creds: credentialsWithCaps(
118-
NewUserCredentials(123, 321, nil, nil, NewRootUserNamespace()),
119-
AllCapabilities,
120-
AllCapabilities,
121-
AllCapabilities,
122-
AllCapabilities),
123-
wantCaps: TaskCapabilities{
124-
PermittedCaps: CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETUID, linux.CAP_SETGID}),
125-
InheritableCaps: AllCapabilities,
126-
EffectiveCaps: 0,
127-
BoundingCaps: AllCapabilities,
128-
},
92+
creds: credentialsWithCaps(AllCapabilities, AllCapabilities),
93+
wantPermitted: CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETUID, linux.CAP_SETGID}),
94+
wantEffective: false,
12995
},
13096
{
13197
name: "TestInsufficientCaps",
13298
capData: vfsCapDataFrom(
13399
true, // effective
134100
CapabilitySetOfMany([]linux.Capability{linux.CAP_CHOWN, linux.CAP_SETUID}), // permitted
135101
CapabilitySetOf(linux.CAP_CHOWN)), // inheritable
136-
creds: credentialsWithCaps(
137-
NewUserCredentials(123, 321, nil, nil, NewRootUserNamespace()),
138-
AllCapabilities,
139-
AllCapabilities,
140-
AllCapabilities,
141-
CapabilitySetOf(linux.CAP_CHOWN)),
102+
creds: credentialsWithCaps(AllCapabilities, CapabilitySetOf(linux.CAP_CHOWN)),
142103
wantErr: linuxerr.EPERM,
143104
},
144105
} {
145106
t.Run(tst.name, func(t *testing.T) {
146-
newCreds, err := CapsFromVfsCaps(tst.capData, tst.creds)
107+
setEff, _, err := HandleVfsCaps(tst.capData, tst.creds)
147108
if err == nil {
148109
if tst.wantErr != nil {
149-
t.Errorf("CapsFromVfsCaps(%v, %v) returned unexpected error %v", tst.capData, tst.creds, tst.wantErr)
110+
t.Errorf("CapsFromVfsCaps(%v) returned unexpected error %v", tst.capData, tst.wantErr)
111+
}
112+
if tst.creds.PermittedCaps != tst.wantPermitted {
113+
t.Errorf("CapsFromVfsCaps(%v) set PermittedCaps to: %#x, want capabilities: %#x",
114+
tst.capData, tst.creds.PermittedCaps, tst.wantPermitted)
150115
}
151-
if !capsEquals(newCreds, tst.wantCaps) {
152-
t.Errorf("CapsFromVfsCaps(%v, %v) returned capabilities: %v, want capabilities: %v",
153-
tst.capData, tst.creds,
154-
TaskCapabilities{
155-
PermittedCaps: newCreds.PermittedCaps,
156-
InheritableCaps: newCreds.InheritableCaps,
157-
EffectiveCaps: newCreds.EffectiveCaps,
158-
BoundingCaps: newCreds.BoundingCaps,
159-
}, tst.wantCaps)
116+
if setEff != tst.wantEffective {
117+
t.Errorf("CapsFromVfsCaps(%v) returned effective=%t, want: %t", tst.capData, setEff, tst.wantEffective)
160118
}
161119
} else if tst.wantErr == nil || tst.wantErr.Error() != err.Error() {
162-
t.Errorf("CapsFromVfsCaps(%v, %v) returned error %v, wantErr: %v", tst.capData, tst.creds, err, tst.wantErr)
120+
t.Errorf("CapsFromVfsCaps(%v) returned error %v, wantErr: %v", tst.capData, err, tst.wantErr)
163121
}
164122
})
165123
}

pkg/sentry/kernel/kernel.go

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,16 +1088,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
10881088
if se != nil {
10891089
return nil, 0, errors.New(se.String())
10901090
}
1091-
var vfsCaps linux.VfsNsCapData
1092-
if len(image.FileCaps()) != 0 {
1093-
var err error
1094-
vfsCaps, err = auth.VfsCapDataOf([]byte(image.FileCaps()))
1095-
if err != nil {
1096-
return nil, 0, err
1097-
}
1098-
}
1099-
creds, err := auth.CapsFromVfsCaps(vfsCaps, args.Credentials)
1100-
if err != nil {
1091+
if err := auth.UpdateCredsForNewTask(args.Credentials, image.FileCaps(), args.Filename); err != nil {
11011092
return nil, 0, err
11021093
}
11031094
args.FDTable.IncRef()
@@ -1109,7 +1100,7 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
11091100
TaskImage: image,
11101101
FSContext: fsContext,
11111102
FDTable: args.FDTable,
1112-
Credentials: creds,
1103+
Credentials: args.Credentials,
11131104
NetworkNamespace: k.RootNetworkNamespace(),
11141105
AllowedCPUMask: sched.NewFullCPUSet(k.applicationCores),
11151106
UTSNamespace: args.UTSNamespace,

pkg/sentry/loader/loader.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -264,10 +264,13 @@ func Load(ctx context.Context, args LoadArgs, extraAuxv []arch.AuxEntry, vdso *V
264264
return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("failed to load %s: %v", args.Filename, err), syserr.FromError(err).ToLinux())
265265
}
266266
defer file.DecRef(ctx)
267-
xattr, err := file.GetXattr(ctx, &vfs.GetXattrOptions{Name: linux.XATTR_SECURITY_CAPABILITY, Size: linux.XATTR_CAPS_SZ_3})
267+
fileCaps, err := file.GetXattr(ctx, &vfs.GetXattrOptions{Name: linux.XATTR_SECURITY_CAPABILITY, Size: linux.XATTR_CAPS_SZ_3})
268268
switch {
269-
case linuxerr.Equals(linuxerr.ENODATA, err), linuxerr.Equals(linuxerr.ENOTSUP, err):
270-
xattr = ""
269+
case linuxerr.Equals(linuxerr.ENODATA, err), linuxerr.Equals(linuxerr.EOPNOTSUPP, err):
270+
// Linux converts EOPNOTSUPP to ENODATA in
271+
// security/commoncap.c:get_vfs_caps_from_disk(). We communicate the lack
272+
// of file capabilities by an empty string.
273+
fileCaps = ""
271274
case err != nil:
272275
return ImageInfo{}, syserr.NewDynamic(fmt.Sprintf("failed to read file capabilities of %s: %v", args.Filename, err), syserr.FromError(err).ToLinux())
273276
}
@@ -355,6 +358,6 @@ func Load(ctx context.Context, args LoadArgs, extraAuxv []arch.AuxEntry, vdso *V
355358
OS: loaded.os,
356359
Arch: ac,
357360
Name: name,
358-
FileCaps: xattr,
361+
FileCaps: fileCaps,
359362
}, nil
360363
}

runsc/container/BUILD

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ go_test(
7777
tags = ["requires-kvm"],
7878
deps = [
7979
"//pkg/abi/linux",
80-
"//pkg/bits",
8180
"//pkg/cleanup",
8281
"//pkg/log",
8382
"//pkg/sentry/control",

runsc/container/container_test.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ import (
3737
specs "github.com/opencontainers/runtime-spec/specs-go"
3838
"golang.org/x/sys/unix"
3939
"gvisor.dev/gvisor/pkg/abi/linux"
40-
"gvisor.dev/gvisor/pkg/bits"
4140
"gvisor.dev/gvisor/pkg/cleanup"
4241
"gvisor.dev/gvisor/pkg/log"
4342
"gvisor.dev/gvisor/pkg/sentry/control"
@@ -2482,7 +2481,7 @@ func TestMountSymlink(t *testing.T) {
24822481

24832482
// Check that --net-raw disables the CAP_NET_RAW capability.
24842483
func TestNetRaw(t *testing.T) {
2485-
capNetRaw := strconv.FormatUint(bits.MaskOf64(int(linux.CAP_NET_RAW)), 10)
2484+
capNetRaw := strconv.FormatUint(uint64(auth.CapabilitySetOf(linux.CAP_NET_RAW)), 10)
24862485
app, err := testutil.FindFile("test/cmd/test_app/test_app")
24872486
if err != nil {
24882487
t.Fatal("error finding test_app:", err)

runsc/specutils/BUILD

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ go_library(
1818
visibility = ["//:sandbox"],
1919
deps = [
2020
"//pkg/abi/linux",
21-
"//pkg/bits",
2221
"//pkg/log",
2322
"//pkg/sentry/devices/nvproxy/nvconf",
2423
"//pkg/sentry/kernel/auth",

0 commit comments

Comments
 (0)