11use std:: fs:: File ;
2+ use std:: ops:: Deref ;
23use std:: os:: fd:: AsFd ;
34use std:: path:: Path ;
45
@@ -38,23 +39,17 @@ impl IdMap {
3839 }
3940}
4041
41- pub struct MntNamespace {
42- mnt_fd : File ,
42+ pub struct UserNamespace {
4343 uid_map : IdMap ,
4444 gid_map : IdMap ,
4545}
4646
47- impl MntNamespace {
47+ impl UserNamespace {
4848 /// Open the mount namespace of a process.
49- pub fn of_pid ( pid : Pid ) -> Result < MntNamespace > {
50- let mnt_fd = File :: open ( format ! ( "/proc/{}/ns/mnt" , pid. as_raw_nonzero( ) ) ) ?;
49+ pub fn of_pid ( pid : Pid ) -> Result < Self > {
5150 let uid_map = IdMap :: read ( format ! ( "/proc/{}/uid_map" , pid. as_raw_nonzero( ) ) . as_ref ( ) ) ?;
5251 let gid_map = IdMap :: read ( format ! ( "/proc/{}/gid_map" , pid. as_raw_nonzero( ) ) . as_ref ( ) ) ?;
53- Ok ( MntNamespace {
54- mnt_fd,
55- uid_map,
56- gid_map,
57- } )
52+ Ok ( Self { uid_map, gid_map } )
5853 }
5954
6055 /// Check if we're in an user namespace.
@@ -72,48 +67,85 @@ impl MntNamespace {
7267 Ok ( self . gid_map . translate ( gid) . context ( "GID overflows" ) ?)
7368 }
7469
70+ /// "Enter" the user namespace.
71+ ///
72+ /// This operation is not reversible.
73+ ///
74+ /// This does not actually enter the user namespace, but rather just switch to become the root
75+ /// user inside the namespace.
76+ ///
77+ /// Entering the user namespace turns out to be problematic.
78+ /// The reason seems to be this line [1]:
79+ /// which means `CAP_MKNOD` capability of the *init* namespace is needed.
80+ /// However task's associated security context is all relative to its current
81+ /// user namespace [2], so once you enter a user namespace there's no way of getting
82+ /// back `CAP_MKNOD` of the init namespace anymore.
83+ /// (Yes this means that even if CAP_MKNOD is granted to the container, you cannot
84+ /// create device nodes within it.)
85+ ///
86+ /// [1]: https://elixir.bootlin.com/linux/v6.11.1/source/fs/namei.c#L4073
87+ /// [2]: https://elixir.bootlin.com/linux/v6.11.1/source/include/linux/cred.h#L111
88+ pub fn enter ( & self ) -> Result < ( ) > {
89+ // By default `setuid` will drop capabilities when transitioning from root
90+ // to non-root user. This bit prevents it so our code still have superpower.
91+ rustix:: thread:: set_capabilities_secure_bits ( CapabilitiesSecureBits :: NO_SETUID_FIXUP ) ?;
92+
93+ rustix:: thread:: set_thread_uid ( Uid :: from_raw ( self . uid ( 0 ) ?) ) ?;
94+ rustix:: thread:: set_thread_gid ( Gid :: from_raw ( self . gid ( 0 ) ?) ) ?;
95+ Ok ( ( ) )
96+ }
97+ }
98+
99+ pub struct MntNamespace {
100+ mnt_fd : File ,
101+ user_ns : UserNamespace ,
102+ }
103+
104+ impl Deref for MntNamespace {
105+ type Target = UserNamespace ;
106+
107+ fn deref ( & self ) -> & UserNamespace {
108+ & self . user_ns
109+ }
110+ }
111+
112+ impl MntNamespace {
113+ /// Open the mount namespace of a process.
114+ pub fn of_pid ( pid : Pid ) -> Result < MntNamespace > {
115+ let mnt_fd = File :: open ( format ! ( "/proc/{}/ns/mnt" , pid. as_raw_nonzero( ) ) ) ?;
116+ let user_ns = UserNamespace :: of_pid ( pid) ?;
117+ Ok ( MntNamespace { mnt_fd, user_ns } )
118+ }
119+
75120 /// Enter the mount namespace.
76- pub fn enter < T : Send , F : FnOnce ( ) -> T + Send > ( & self , f : F ) -> Result < T > {
121+ ///
122+ /// This operation is not reversible.
123+ pub fn enter ( & self ) -> Result < ( ) > {
124+ // Unshare FS for this specific thread so we can switch to another namespace.
125+ // Not doing this will cause EINVAL when switching to namespaces.
126+ rustix:: thread:: unshare ( UnshareFlags :: FS ) ?;
127+
128+ // Switch this particular thread to the container's mount namespace.
129+ rustix:: thread:: move_into_link_name_space (
130+ self . mnt_fd . as_fd ( ) ,
131+ Some ( LinkNameSpaceType :: Mount ) ,
132+ ) ?;
133+
134+ // If user namespace is used, we must act like the root user *inside*
135+ // namespace to be able to create files properly (otherwise EOVERFLOW
136+ // will be returned when creating file).
137+ self . user_ns . enter ( ) ?;
138+ Ok ( ( ) )
139+ }
140+
141+ /// Execute inside the mount namespace.
142+ pub fn with < T : Send , F : FnOnce ( ) -> T + Send > ( & self , f : F ) -> Result < T > {
77143 // To avoid messing with rest of the process, we do everything in a new thread.
78144 // Use scoped thread to avoid 'static bound (we need to access fd).
79145 std:: thread:: scope ( |scope| {
80146 scope
81147 . spawn ( || -> Result < T > {
82- // Unshare FS for this specific thread so we can switch to another namespace.
83- // Not doing this will cause EINVAL when switching to namespaces.
84- rustix:: thread:: unshare ( UnshareFlags :: FS ) ?;
85-
86- // Switch this particular thread to the container's mount namespace.
87- rustix:: thread:: move_into_link_name_space (
88- self . mnt_fd . as_fd ( ) ,
89- Some ( LinkNameSpaceType :: Mount ) ,
90- ) ?;
91-
92- // If user namespace is used, we must act like the root user *inside*
93- // namespace to be able to create files properly (otherwise EOVERFLOW
94- // will be returned when creating file).
95- //
96- // Entering the user namespace turns out to be problematic.
97- // The reason seems to be this line [1]:
98- // which means `CAP_MKNOD` capability of the *init* namespace is needed.
99- // However task's associated security context is all relative to its current
100- // user namespace [2], so once you enter a user namespace there's no way of getting
101- // back `CAP_MKNOD` of the init namespace anymore.
102- // (Yes this means that even if CAP_MKNOD is granted to the container, you cannot
103- // create device nodes within it.)
104- //
105- // [1]: https://elixir.bootlin.com/linux/v6.11.1/source/fs/namei.c#L4073
106- // [2]: https://elixir.bootlin.com/linux/v6.11.1/source/include/linux/cred.h#L111
107-
108- // By default `setuid` will drop capabilities when transitioning from root
109- // to non-root user. This bit prevents it so our code still have superpower.
110- rustix:: thread:: set_capabilities_secure_bits (
111- CapabilitiesSecureBits :: NO_SETUID_FIXUP ,
112- ) ?;
113-
114- rustix:: thread:: set_thread_uid ( Uid :: from_raw ( self . uid ( 0 ) ?) ) ?;
115- rustix:: thread:: set_thread_gid ( Gid :: from_raw ( self . gid ( 0 ) ?) ) ?;
116-
148+ self . enter ( ) ?;
117149 Ok ( f ( ) )
118150 } )
119151 . join ( )
0 commit comments