1616#include <unistd.h>
1717#include <sys/socket.h>
1818#include <sys/stat.h>
19+ #include <linux/ioctl.h>
1920
2021#include "pidfd.h"
2122#include "../clone3/clone3_selftests.h"
2223#include "../kselftest_harness.h"
2324
25+ #ifndef PIDFS_IOCTL_MAGIC
26+ #define PIDFS_IOCTL_MAGIC 0xFF
27+ #endif
28+
29+ #ifndef PIDFD_GET_CGROUP_NAMESPACE
30+ #define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1)
31+ #endif
32+
33+ #ifndef PIDFD_GET_IPC_NAMESPACE
34+ #define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2)
35+ #endif
36+
37+ #ifndef PIDFD_GET_MNT_NAMESPACE
38+ #define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3)
39+ #endif
40+
41+ #ifndef PIDFD_GET_NET_NAMESPACE
42+ #define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4)
43+ #endif
44+
45+ #ifndef PIDFD_GET_PID_NAMESPACE
46+ #define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5)
47+ #endif
48+
49+ #ifndef PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE
50+ #define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6)
51+ #endif
52+
53+ #ifndef PIDFD_GET_TIME_NAMESPACE
54+ #define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7)
55+ #endif
56+
57+ #ifndef PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE
58+ #define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
59+ #endif
60+
61+ #ifndef PIDFD_GET_USER_NAMESPACE
62+ #define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9)
63+ #endif
64+
65+ #ifndef PIDFD_GET_UTS_NAMESPACE
66+ #define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10)
67+ #endif
68+
2469enum {
2570 PIDFD_NS_USER ,
2671 PIDFD_NS_MNT ,
@@ -31,40 +76,46 @@ enum {
3176 PIDFD_NS_CGROUP ,
3277 PIDFD_NS_PIDCLD ,
3378 PIDFD_NS_TIME ,
79+ PIDFD_NS_TIMECLD ,
3480 PIDFD_NS_MAX
3581};
3682
3783const struct ns_info {
3884 const char * name ;
3985 int flag ;
86+ unsigned int pidfd_ioctl ;
4087} ns_info [] = {
41- [PIDFD_NS_USER ] = { "user" , CLONE_NEWUSER , },
42- [PIDFD_NS_MNT ] = { "mnt" , CLONE_NEWNS , },
43- [PIDFD_NS_PID ] = { "pid" , CLONE_NEWPID , },
44- [PIDFD_NS_UTS ] = { "uts" , CLONE_NEWUTS , },
45- [PIDFD_NS_IPC ] = { "ipc" , CLONE_NEWIPC , },
46- [PIDFD_NS_NET ] = { "net" , CLONE_NEWNET , },
47- [PIDFD_NS_CGROUP ] = { "cgroup" , CLONE_NEWCGROUP , },
48- [PIDFD_NS_PIDCLD ] = { "pid_for_children" , 0 , },
49- [PIDFD_NS_TIME ] = { "time" , CLONE_NEWTIME , },
88+ [PIDFD_NS_USER ] = { "user" , CLONE_NEWUSER , PIDFD_GET_USER_NAMESPACE , },
89+ [PIDFD_NS_MNT ] = { "mnt" , CLONE_NEWNS , PIDFD_GET_MNT_NAMESPACE , },
90+ [PIDFD_NS_PID ] = { "pid" , CLONE_NEWPID , PIDFD_GET_PID_NAMESPACE , },
91+ [PIDFD_NS_UTS ] = { "uts" , CLONE_NEWUTS , PIDFD_GET_UTS_NAMESPACE , },
92+ [PIDFD_NS_IPC ] = { "ipc" , CLONE_NEWIPC , PIDFD_GET_IPC_NAMESPACE , },
93+ [PIDFD_NS_NET ] = { "net" , CLONE_NEWNET , PIDFD_GET_NET_NAMESPACE , },
94+ [PIDFD_NS_CGROUP ] = { "cgroup" , CLONE_NEWCGROUP , PIDFD_GET_CGROUP_NAMESPACE , },
95+ [PIDFD_NS_TIME ] = { "time" , CLONE_NEWTIME , PIDFD_GET_TIME_NAMESPACE , },
96+ [PIDFD_NS_PIDCLD ] = { "pid_for_children" , 0 , PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE , },
97+ [PIDFD_NS_TIMECLD ] = { "time_for_children" , 0 , PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE , },
5098};
5199
52100FIXTURE (current_nsset )
53101{
54102 pid_t pid ;
55103 int pidfd ;
56104 int nsfds [PIDFD_NS_MAX ];
105+ int child_pidfd_derived_nsfds [PIDFD_NS_MAX ];
57106
58107 pid_t child_pid_exited ;
59108 int child_pidfd_exited ;
60109
61110 pid_t child_pid1 ;
62111 int child_pidfd1 ;
63112 int child_nsfds1 [PIDFD_NS_MAX ];
113+ int child_pidfd_derived_nsfds1 [PIDFD_NS_MAX ];
64114
65115 pid_t child_pid2 ;
66116 int child_pidfd2 ;
67117 int child_nsfds2 [PIDFD_NS_MAX ];
118+ int child_pidfd_derived_nsfds2 [PIDFD_NS_MAX ];
68119};
69120
70121static int sys_waitid (int which , pid_t pid , int options )
@@ -128,9 +179,12 @@ FIXTURE_SETUP(current_nsset)
128179 char c ;
129180
130181 for (i = 0 ; i < PIDFD_NS_MAX ; i ++ ) {
131- self -> nsfds [i ] = - EBADF ;
132- self -> child_nsfds1 [i ] = - EBADF ;
133- self -> child_nsfds2 [i ] = - EBADF ;
182+ self -> nsfds [i ] = - EBADF ;
183+ self -> child_nsfds1 [i ] = - EBADF ;
184+ self -> child_nsfds2 [i ] = - EBADF ;
185+ self -> child_pidfd_derived_nsfds [i ] = - EBADF ;
186+ self -> child_pidfd_derived_nsfds1 [i ] = - EBADF ;
187+ self -> child_pidfd_derived_nsfds2 [i ] = - EBADF ;
134188 }
135189
136190 proc_fd = open ("/proc/self/ns" , O_DIRECTORY | O_CLOEXEC );
@@ -139,6 +193,11 @@ FIXTURE_SETUP(current_nsset)
139193 }
140194
141195 self -> pid = getpid ();
196+ self -> pidfd = sys_pidfd_open (self -> pid , 0 );
197+ EXPECT_GT (self -> pidfd , 0 ) {
198+ TH_LOG ("%m - Failed to open pidfd for process %d" , self -> pid );
199+ }
200+
142201 for (i = 0 ; i < PIDFD_NS_MAX ; i ++ ) {
143202 const struct ns_info * info = & ns_info [i ];
144203 self -> nsfds [i ] = openat (proc_fd , info -> name , O_RDONLY | O_CLOEXEC );
@@ -148,20 +207,27 @@ FIXTURE_SETUP(current_nsset)
148207 info -> name , self -> pid );
149208 }
150209 }
151- }
152210
153- self -> pidfd = sys_pidfd_open (self -> pid , 0 );
154- EXPECT_GT (self -> pidfd , 0 ) {
155- TH_LOG ("%m - Failed to open pidfd for process %d" , self -> pid );
211+ self -> child_pidfd_derived_nsfds [i ] = ioctl (self -> pidfd , info -> pidfd_ioctl , 0 );
212+ if (self -> child_pidfd_derived_nsfds [i ] < 0 ) {
213+ EXPECT_EQ (errno , EOPNOTSUPP ) {
214+ TH_LOG ("%m - Failed to derive %s namespace from pidfd of process %d" ,
215+ info -> name , self -> pid );
216+ }
217+ }
156218 }
157219
158220 /* Create task that exits right away. */
159- self -> child_pid_exited = create_child (& self -> child_pidfd_exited ,
160- CLONE_NEWUSER | CLONE_NEWNET );
221+ self -> child_pid_exited = create_child (& self -> child_pidfd_exited , 0 );
161222 EXPECT_GE (self -> child_pid_exited , 0 );
162223
163- if (self -> child_pid_exited == 0 )
224+ if (self -> child_pid_exited == 0 ) {
225+ if (self -> nsfds [PIDFD_NS_USER ] >= 0 && unshare (CLONE_NEWUSER ) < 0 )
226+ _exit (EXIT_FAILURE );
227+ if (self -> nsfds [PIDFD_NS_NET ] >= 0 && unshare (CLONE_NEWNET ) < 0 )
228+ _exit (EXIT_FAILURE );
164229 _exit (EXIT_SUCCESS );
230+ }
165231
166232 ASSERT_EQ (sys_waitid (P_PID , self -> child_pid_exited , WEXITED | WNOWAIT ), 0 );
167233
@@ -174,18 +240,43 @@ FIXTURE_SETUP(current_nsset)
174240 EXPECT_EQ (ret , 0 );
175241
176242 /* Create tasks that will be stopped. */
177- self -> child_pid1 = create_child (& self -> child_pidfd1 ,
178- CLONE_NEWUSER | CLONE_NEWNS |
179- CLONE_NEWCGROUP | CLONE_NEWIPC |
180- CLONE_NEWUTS | CLONE_NEWPID |
181- CLONE_NEWNET );
243+ if (self -> nsfds [PIDFD_NS_USER ] >= 0 && self -> nsfds [PIDFD_NS_PID ] >= 0 )
244+ self -> child_pid1 = create_child (& self -> child_pidfd1 , CLONE_NEWUSER | CLONE_NEWPID );
245+ else if (self -> nsfds [PIDFD_NS_PID ] >= 0 )
246+ self -> child_pid1 = create_child (& self -> child_pidfd1 , CLONE_NEWPID );
247+ else if (self -> nsfds [PIDFD_NS_USER ] >= 0 )
248+ self -> child_pid1 = create_child (& self -> child_pidfd1 , CLONE_NEWUSER );
249+ else
250+ self -> child_pid1 = create_child (& self -> child_pidfd1 , 0 );
182251 EXPECT_GE (self -> child_pid1 , 0 );
183252
184253 if (self -> child_pid1 == 0 ) {
185254 close (ipc_sockets [0 ]);
186255
187- if (!switch_timens ())
256+ if (self -> nsfds [PIDFD_NS_MNT ] >= 0 && unshare (CLONE_NEWNS ) < 0 ) {
257+ TH_LOG ("%m - Failed to unshare mount namespace for process %d" , self -> pid );
188258 _exit (EXIT_FAILURE );
259+ }
260+ if (self -> nsfds [PIDFD_NS_CGROUP ] >= 0 && unshare (CLONE_NEWCGROUP ) < 0 ) {
261+ TH_LOG ("%m - Failed to unshare cgroup namespace for process %d" , self -> pid );
262+ _exit (EXIT_FAILURE );
263+ }
264+ if (self -> nsfds [PIDFD_NS_IPC ] >= 0 && unshare (CLONE_NEWIPC ) < 0 ) {
265+ TH_LOG ("%m - Failed to unshare ipc namespace for process %d" , self -> pid );
266+ _exit (EXIT_FAILURE );
267+ }
268+ if (self -> nsfds [PIDFD_NS_UTS ] >= 0 && unshare (CLONE_NEWUTS ) < 0 ) {
269+ TH_LOG ("%m - Failed to unshare uts namespace for process %d" , self -> pid );
270+ _exit (EXIT_FAILURE );
271+ }
272+ if (self -> nsfds [PIDFD_NS_NET ] >= 0 && unshare (CLONE_NEWNET ) < 0 ) {
273+ TH_LOG ("%m - Failed to unshare net namespace for process %d" , self -> pid );
274+ _exit (EXIT_FAILURE );
275+ }
276+ if (self -> nsfds [PIDFD_NS_TIME ] >= 0 && !switch_timens ()) {
277+ TH_LOG ("%m - Failed to unshare time namespace for process %d" , self -> pid );
278+ _exit (EXIT_FAILURE );
279+ }
189280
190281 if (write_nointr (ipc_sockets [1 ], "1" , 1 ) < 0 )
191282 _exit (EXIT_FAILURE );
@@ -203,18 +294,43 @@ FIXTURE_SETUP(current_nsset)
203294 ret = socketpair (AF_LOCAL , SOCK_STREAM | SOCK_CLOEXEC , 0 , ipc_sockets );
204295 EXPECT_EQ (ret , 0 );
205296
206- self -> child_pid2 = create_child (& self -> child_pidfd2 ,
207- CLONE_NEWUSER | CLONE_NEWNS |
208- CLONE_NEWCGROUP | CLONE_NEWIPC |
209- CLONE_NEWUTS | CLONE_NEWPID |
210- CLONE_NEWNET );
297+ if (self -> nsfds [PIDFD_NS_USER ] >= 0 && self -> nsfds [PIDFD_NS_PID ] >= 0 )
298+ self -> child_pid2 = create_child (& self -> child_pidfd2 , CLONE_NEWUSER | CLONE_NEWPID );
299+ else if (self -> nsfds [PIDFD_NS_PID ] >= 0 )
300+ self -> child_pid2 = create_child (& self -> child_pidfd2 , CLONE_NEWPID );
301+ else if (self -> nsfds [PIDFD_NS_USER ] >= 0 )
302+ self -> child_pid2 = create_child (& self -> child_pidfd2 , CLONE_NEWUSER );
303+ else
304+ self -> child_pid2 = create_child (& self -> child_pidfd2 , 0 );
211305 EXPECT_GE (self -> child_pid2 , 0 );
212306
213307 if (self -> child_pid2 == 0 ) {
214308 close (ipc_sockets [0 ]);
215309
216- if (!switch_timens ())
310+ if (self -> nsfds [PIDFD_NS_MNT ] >= 0 && unshare (CLONE_NEWNS ) < 0 ) {
311+ TH_LOG ("%m - Failed to unshare mount namespace for process %d" , self -> pid );
312+ _exit (EXIT_FAILURE );
313+ }
314+ if (self -> nsfds [PIDFD_NS_CGROUP ] >= 0 && unshare (CLONE_NEWCGROUP ) < 0 ) {
315+ TH_LOG ("%m - Failed to unshare cgroup namespace for process %d" , self -> pid );
217316 _exit (EXIT_FAILURE );
317+ }
318+ if (self -> nsfds [PIDFD_NS_IPC ] >= 0 && unshare (CLONE_NEWIPC ) < 0 ) {
319+ TH_LOG ("%m - Failed to unshare ipc namespace for process %d" , self -> pid );
320+ _exit (EXIT_FAILURE );
321+ }
322+ if (self -> nsfds [PIDFD_NS_UTS ] >= 0 && unshare (CLONE_NEWUTS ) < 0 ) {
323+ TH_LOG ("%m - Failed to unshare uts namespace for process %d" , self -> pid );
324+ _exit (EXIT_FAILURE );
325+ }
326+ if (self -> nsfds [PIDFD_NS_NET ] >= 0 && unshare (CLONE_NEWNET ) < 0 ) {
327+ TH_LOG ("%m - Failed to unshare net namespace for process %d" , self -> pid );
328+ _exit (EXIT_FAILURE );
329+ }
330+ if (self -> nsfds [PIDFD_NS_TIME ] >= 0 && !switch_timens ()) {
331+ TH_LOG ("%m - Failed to unshare time namespace for process %d" , self -> pid );
332+ _exit (EXIT_FAILURE );
333+ }
218334
219335 if (write_nointr (ipc_sockets [1 ], "1" , 1 ) < 0 )
220336 _exit (EXIT_FAILURE );
@@ -267,6 +383,22 @@ FIXTURE_SETUP(current_nsset)
267383 info -> name , self -> child_pid1 );
268384 }
269385 }
386+
387+ self -> child_pidfd_derived_nsfds1 [i ] = ioctl (self -> child_pidfd1 , info -> pidfd_ioctl , 0 );
388+ if (self -> child_pidfd_derived_nsfds1 [i ] < 0 ) {
389+ EXPECT_EQ (errno , EOPNOTSUPP ) {
390+ TH_LOG ("%m - Failed to derive %s namespace from pidfd of process %d" ,
391+ info -> name , self -> child_pid1 );
392+ }
393+ }
394+
395+ self -> child_pidfd_derived_nsfds2 [i ] = ioctl (self -> child_pidfd2 , info -> pidfd_ioctl , 0 );
396+ if (self -> child_pidfd_derived_nsfds2 [i ] < 0 ) {
397+ EXPECT_EQ (errno , EOPNOTSUPP ) {
398+ TH_LOG ("%m - Failed to derive %s namespace from pidfd of process %d" ,
399+ info -> name , self -> child_pid2 );
400+ }
401+ }
270402 }
271403
272404 close (proc_fd );
@@ -288,6 +420,12 @@ FIXTURE_TEARDOWN(current_nsset)
288420 close (self -> child_nsfds1 [i ]);
289421 if (self -> child_nsfds2 [i ] >= 0 )
290422 close (self -> child_nsfds2 [i ]);
423+ if (self -> child_pidfd_derived_nsfds [i ] >= 0 )
424+ close (self -> child_pidfd_derived_nsfds [i ]);
425+ if (self -> child_pidfd_derived_nsfds1 [i ] >= 0 )
426+ close (self -> child_pidfd_derived_nsfds1 [i ]);
427+ if (self -> child_pidfd_derived_nsfds2 [i ] >= 0 )
428+ close (self -> child_pidfd_derived_nsfds2 [i ]);
291429 }
292430
293431 if (self -> child_pidfd1 >= 0 )
@@ -446,6 +584,42 @@ TEST_F(current_nsset, nsfd_incremental_setns)
446584 }
447585}
448586
587+ TEST_F (current_nsset , pidfd_derived_nsfd_incremental_setns )
588+ {
589+ int i ;
590+ pid_t pid ;
591+
592+ pid = getpid ();
593+ for (i = 0 ; i < PIDFD_NS_MAX ; i ++ ) {
594+ const struct ns_info * info = & ns_info [i ];
595+ int nsfd ;
596+
597+ if (self -> child_pidfd_derived_nsfds1 [i ] < 0 )
598+ continue ;
599+
600+ if (info -> flag ) {
601+ ASSERT_EQ (setns (self -> child_pidfd_derived_nsfds1 [i ], info -> flag ), 0 ) {
602+ TH_LOG ("%m - Failed to setns to %s namespace of %d via nsfd %d" ,
603+ info -> name , self -> child_pid1 ,
604+ self -> child_pidfd_derived_nsfds1 [i ]);
605+ }
606+ }
607+
608+ /* Verify that we have changed to the correct namespaces. */
609+ if (info -> flag == CLONE_NEWPID )
610+ nsfd = self -> child_pidfd_derived_nsfds [i ];
611+ else
612+ nsfd = self -> child_pidfd_derived_nsfds1 [i ];
613+ ASSERT_EQ (in_same_namespace (nsfd , pid , info -> name ), 1 ) {
614+ TH_LOG ("setns failed to place us correctly into %s namespace of %d via nsfd %d" ,
615+ info -> name , self -> child_pid1 ,
616+ self -> child_pidfd_derived_nsfds1 [i ]);
617+ }
618+ TH_LOG ("Managed to correctly setns to %s namespace of %d via nsfd %d" ,
619+ info -> name , self -> child_pid1 , self -> child_pidfd_derived_nsfds1 [i ]);
620+ }
621+ }
622+
449623TEST_F (current_nsset , pidfd_one_shot_setns )
450624{
451625 unsigned flags = 0 ;
@@ -542,6 +716,28 @@ TEST_F(current_nsset, no_foul_play)
542716 info -> name , self -> child_pid2 ,
543717 self -> child_nsfds2 [i ]);
544718 }
719+
720+ /*
721+ * Can't setns to a user namespace outside of our hierarchy since we
722+ * don't have caps in there and didn't create it. That means that under
723+ * no circumstances should we be able to setns to any of the other
724+ * ones since they aren't owned by our user namespace.
725+ */
726+ for (i = 0 ; i < PIDFD_NS_MAX ; i ++ ) {
727+ const struct ns_info * info = & ns_info [i ];
728+
729+ if (self -> child_pidfd_derived_nsfds2 [i ] < 0 || !info -> flag )
730+ continue ;
731+
732+ ASSERT_NE (setns (self -> child_pidfd_derived_nsfds2 [i ], info -> flag ), 0 ) {
733+ TH_LOG ("Managed to setns to %s namespace of %d via nsfd %d" ,
734+ info -> name , self -> child_pid2 ,
735+ self -> child_pidfd_derived_nsfds2 [i ]);
736+ }
737+ TH_LOG ("%m - Correctly failed to setns to %s namespace of %d via nsfd %d" ,
738+ info -> name , self -> child_pid2 ,
739+ self -> child_pidfd_derived_nsfds2 [i ]);
740+ }
545741}
546742
547743TEST (setns_einval )
0 commit comments