@@ -103,6 +103,77 @@ func CapsFromVfsCaps(capData linux.VfsNsCapData, creds *Credentials) (*Credentia
103103 return newCreds , nil
104104}
105105
106+ // FixupVfsCapDataOnSet may convert the given value to v3 file capabilities. It
107+ // is analogous to security/commoncap.c:cap_convert_nscap().
108+ func FixupVfsCapDataOnSet (creds * Credentials , value string , kuid KUID , kgid KGID ) (string , error ) {
109+ vfsCaps , err := VfsCapDataOf ([]byte (value ))
110+ if err != nil {
111+ return "" , err
112+ }
113+ if ! creds .HasCapabilityOnFile (linux .CAP_SETFCAP , kuid , kgid ) {
114+ return "" , linuxerr .EPERM
115+ }
116+ if vfsCaps .IsRevision2 () && creds .HasCapabilityIn (linux .CAP_SETFCAP , creds .UserNamespace .Root ()) {
117+ // The user is privileged, allow the v2 write.
118+ return value , nil
119+ }
120+ // Linux does the following UID gymnastics:
121+ // 1. The userspace-provided rootID is relative to the caller's user
122+ // namespace. So vfsCaps.RootID is mapped down to KUID first.
123+ // 2. If this is an ID-mapped mount, the result is mapped up using the
124+ // ID-map and then down again using the filesystem's owning user
125+ // namespace (inode->i_sb->s_user_ns). We again have a KUID result.
126+ // 3. The result is mapped up using the filesystem's owning user namespace.
127+ //
128+ // The final result is saved in the xattr value at vfs_ns_cap_data->rootid.
129+ // Since gVisor does not support ID-mapped mounts and all filesystems are
130+ // owned by the initial user namespace, we can skip steps 2 and 3.
131+ rootID := creds .UserNamespace .MapToKUID (UID (vfsCaps .RootID ))
132+ if ! rootID .Ok () {
133+ return "" , linuxerr .EINVAL
134+ }
135+ vfsCaps .ConvertToV3 (uint32 (rootID ))
136+ return vfsCaps .ToString (), nil
137+ }
138+
139+ // FixupVfsCapDataOnGet may convert the given value to v2 file capabilities. It
140+ // is analogous to security/commoncap.c:cap_inode_getsecurity().
141+ func FixupVfsCapDataOnGet (creds * Credentials , value string ) (string , error ) {
142+ vfsCaps , err := VfsCapDataOf ([]byte (value ))
143+ if err != nil {
144+ return "" , err
145+ }
146+ // Linux does the steps mentioned in FixupVfsCapDataOnSet in reverse. But
147+ // since gVisor does not support ID-mapped mounts and all filesystems are
148+ // owned by the initial user namespace, we only need to reverse step 1 here.
149+ rootID := KUID (vfsCaps .RootID )
150+ mappedRoot := creds .UserNamespace .MapFromKUID (rootID )
151+ if mappedRoot .Ok () && mappedRoot != RootUID {
152+ // Return this as v3.
153+ vfsCaps .ConvertToV3 (uint32 (mappedRoot ))
154+ return vfsCaps .ToString (), nil
155+ }
156+ if ! rootIDOwnsCurrentUserns (creds , rootID ) {
157+ return "" , linuxerr .EOVERFLOW
158+ }
159+ // Return this as v2.
160+ vfsCaps .ConvertToV2 ()
161+ return vfsCaps .ToString (), nil
162+ }
163+
164+ // Analogous to security/commoncap.c:rootid_owns_currentns().
165+ func rootIDOwnsCurrentUserns (creds * Credentials , rootID KUID ) bool {
166+ if ! rootID .Ok () {
167+ return false
168+ }
169+ for ns := creds .UserNamespace ; ns != nil ; ns = ns .parent {
170+ if ns .MapFromKUID (rootID ) == RootUID {
171+ return true
172+ }
173+ }
174+ return false
175+ }
176+
106177// TaskCapabilities represents all the capability sets for a task. Each of these
107178// sets is explained in greater detail in capabilities(7).
108179type TaskCapabilities struct {
0 commit comments