16
16
#include <unistd.h>
17
17
#include <sys/socket.h>
18
18
#include <sys/stat.h>
19
+ #include <linux/ioctl.h>
19
20
20
21
#include "pidfd.h"
21
22
#include "../clone3/clone3_selftests.h"
22
23
#include "../kselftest_harness.h"
23
24
25
+ #ifndef PIDFS_IOCTL_MAGIC
26
+ #define PIDFS_IOCTL_MAGIC 0xFF
27
+ #endif
28
+
29
+ #ifndef PIDFD_GET_CGROUP_NAMESPACE
30
+ #define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1)
31
+ #endif
32
+
33
+ #ifndef PIDFD_GET_IPC_NAMESPACE
34
+ #define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2)
35
+ #endif
36
+
37
+ #ifndef PIDFD_GET_MNT_NAMESPACE
38
+ #define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3)
39
+ #endif
40
+
41
+ #ifndef PIDFD_GET_NET_NAMESPACE
42
+ #define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4)
43
+ #endif
44
+
45
+ #ifndef PIDFD_GET_PID_NAMESPACE
46
+ #define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5)
47
+ #endif
48
+
49
+ #ifndef PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE
50
+ #define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6)
51
+ #endif
52
+
53
+ #ifndef PIDFD_GET_TIME_NAMESPACE
54
+ #define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7)
55
+ #endif
56
+
57
+ #ifndef PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE
58
+ #define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
59
+ #endif
60
+
61
+ #ifndef PIDFD_GET_USER_NAMESPACE
62
+ #define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9)
63
+ #endif
64
+
65
+ #ifndef PIDFD_GET_UTS_NAMESPACE
66
+ #define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10)
67
+ #endif
68
+
24
69
enum {
25
70
PIDFD_NS_USER ,
26
71
PIDFD_NS_MNT ,
@@ -31,40 +76,46 @@ enum {
31
76
PIDFD_NS_CGROUP ,
32
77
PIDFD_NS_PIDCLD ,
33
78
PIDFD_NS_TIME ,
79
+ PIDFD_NS_TIMECLD ,
34
80
PIDFD_NS_MAX
35
81
};
36
82
37
83
const struct ns_info {
38
84
const char * name ;
39
85
int flag ;
86
+ unsigned int pidfd_ioctl ;
40
87
} ns_info [] = {
41
- [PIDFD_NS_USER ] = { "user" , CLONE_NEWUSER , },
42
- [PIDFD_NS_MNT ] = { "mnt" , CLONE_NEWNS , },
43
- [PIDFD_NS_PID ] = { "pid" , CLONE_NEWPID , },
44
- [PIDFD_NS_UTS ] = { "uts" , CLONE_NEWUTS , },
45
- [PIDFD_NS_IPC ] = { "ipc" , CLONE_NEWIPC , },
46
- [PIDFD_NS_NET ] = { "net" , CLONE_NEWNET , },
47
- [PIDFD_NS_CGROUP ] = { "cgroup" , CLONE_NEWCGROUP , },
48
- [PIDFD_NS_PIDCLD ] = { "pid_for_children" , 0 , },
49
- [PIDFD_NS_TIME ] = { "time" , CLONE_NEWTIME , },
88
+ [PIDFD_NS_USER ] = { "user" , CLONE_NEWUSER , PIDFD_GET_USER_NAMESPACE , },
89
+ [PIDFD_NS_MNT ] = { "mnt" , CLONE_NEWNS , PIDFD_GET_MNT_NAMESPACE , },
90
+ [PIDFD_NS_PID ] = { "pid" , CLONE_NEWPID , PIDFD_GET_PID_NAMESPACE , },
91
+ [PIDFD_NS_UTS ] = { "uts" , CLONE_NEWUTS , PIDFD_GET_UTS_NAMESPACE , },
92
+ [PIDFD_NS_IPC ] = { "ipc" , CLONE_NEWIPC , PIDFD_GET_IPC_NAMESPACE , },
93
+ [PIDFD_NS_NET ] = { "net" , CLONE_NEWNET , PIDFD_GET_NET_NAMESPACE , },
94
+ [PIDFD_NS_CGROUP ] = { "cgroup" , CLONE_NEWCGROUP , PIDFD_GET_CGROUP_NAMESPACE , },
95
+ [PIDFD_NS_TIME ] = { "time" , CLONE_NEWTIME , PIDFD_GET_TIME_NAMESPACE , },
96
+ [PIDFD_NS_PIDCLD ] = { "pid_for_children" , 0 , PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE , },
97
+ [PIDFD_NS_TIMECLD ] = { "time_for_children" , 0 , PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE , },
50
98
};
51
99
52
100
FIXTURE (current_nsset )
53
101
{
54
102
pid_t pid ;
55
103
int pidfd ;
56
104
int nsfds [PIDFD_NS_MAX ];
105
+ int child_pidfd_derived_nsfds [PIDFD_NS_MAX ];
57
106
58
107
pid_t child_pid_exited ;
59
108
int child_pidfd_exited ;
60
109
61
110
pid_t child_pid1 ;
62
111
int child_pidfd1 ;
63
112
int child_nsfds1 [PIDFD_NS_MAX ];
113
+ int child_pidfd_derived_nsfds1 [PIDFD_NS_MAX ];
64
114
65
115
pid_t child_pid2 ;
66
116
int child_pidfd2 ;
67
117
int child_nsfds2 [PIDFD_NS_MAX ];
118
+ int child_pidfd_derived_nsfds2 [PIDFD_NS_MAX ];
68
119
};
69
120
70
121
static int sys_waitid (int which , pid_t pid , int options )
@@ -128,9 +179,12 @@ FIXTURE_SETUP(current_nsset)
128
179
char c ;
129
180
130
181
for (i = 0 ; i < PIDFD_NS_MAX ; i ++ ) {
131
- self -> nsfds [i ] = - EBADF ;
132
- self -> child_nsfds1 [i ] = - EBADF ;
133
- self -> child_nsfds2 [i ] = - EBADF ;
182
+ self -> nsfds [i ] = - EBADF ;
183
+ self -> child_nsfds1 [i ] = - EBADF ;
184
+ self -> child_nsfds2 [i ] = - EBADF ;
185
+ self -> child_pidfd_derived_nsfds [i ] = - EBADF ;
186
+ self -> child_pidfd_derived_nsfds1 [i ] = - EBADF ;
187
+ self -> child_pidfd_derived_nsfds2 [i ] = - EBADF ;
134
188
}
135
189
136
190
proc_fd = open ("/proc/self/ns" , O_DIRECTORY | O_CLOEXEC );
@@ -139,6 +193,11 @@ FIXTURE_SETUP(current_nsset)
139
193
}
140
194
141
195
self -> pid = getpid ();
196
+ self -> pidfd = sys_pidfd_open (self -> pid , 0 );
197
+ EXPECT_GT (self -> pidfd , 0 ) {
198
+ TH_LOG ("%m - Failed to open pidfd for process %d" , self -> pid );
199
+ }
200
+
142
201
for (i = 0 ; i < PIDFD_NS_MAX ; i ++ ) {
143
202
const struct ns_info * info = & ns_info [i ];
144
203
self -> nsfds [i ] = openat (proc_fd , info -> name , O_RDONLY | O_CLOEXEC );
@@ -148,20 +207,27 @@ FIXTURE_SETUP(current_nsset)
148
207
info -> name , self -> pid );
149
208
}
150
209
}
151
- }
152
210
153
- self -> pidfd = sys_pidfd_open (self -> pid , 0 );
154
- EXPECT_GT (self -> pidfd , 0 ) {
155
- TH_LOG ("%m - Failed to open pidfd for process %d" , self -> pid );
211
+ self -> child_pidfd_derived_nsfds [i ] = ioctl (self -> pidfd , info -> pidfd_ioctl , 0 );
212
+ if (self -> child_pidfd_derived_nsfds [i ] < 0 ) {
213
+ EXPECT_EQ (errno , EOPNOTSUPP ) {
214
+ TH_LOG ("%m - Failed to derive %s namespace from pidfd of process %d" ,
215
+ info -> name , self -> pid );
216
+ }
217
+ }
156
218
}
157
219
158
220
/* Create task that exits right away. */
159
- self -> child_pid_exited = create_child (& self -> child_pidfd_exited ,
160
- CLONE_NEWUSER | CLONE_NEWNET );
221
+ self -> child_pid_exited = create_child (& self -> child_pidfd_exited , 0 );
161
222
EXPECT_GE (self -> child_pid_exited , 0 );
162
223
163
- if (self -> child_pid_exited == 0 )
224
+ if (self -> child_pid_exited == 0 ) {
225
+ if (self -> nsfds [PIDFD_NS_USER ] >= 0 && unshare (CLONE_NEWUSER ) < 0 )
226
+ _exit (EXIT_FAILURE );
227
+ if (self -> nsfds [PIDFD_NS_NET ] >= 0 && unshare (CLONE_NEWNET ) < 0 )
228
+ _exit (EXIT_FAILURE );
164
229
_exit (EXIT_SUCCESS );
230
+ }
165
231
166
232
ASSERT_EQ (sys_waitid (P_PID , self -> child_pid_exited , WEXITED | WNOWAIT ), 0 );
167
233
@@ -174,18 +240,43 @@ FIXTURE_SETUP(current_nsset)
174
240
EXPECT_EQ (ret , 0 );
175
241
176
242
/* Create tasks that will be stopped. */
177
- self -> child_pid1 = create_child (& self -> child_pidfd1 ,
178
- CLONE_NEWUSER | CLONE_NEWNS |
179
- CLONE_NEWCGROUP | CLONE_NEWIPC |
180
- CLONE_NEWUTS | CLONE_NEWPID |
181
- CLONE_NEWNET );
243
+ if (self -> nsfds [PIDFD_NS_USER ] >= 0 && self -> nsfds [PIDFD_NS_PID ] >= 0 )
244
+ self -> child_pid1 = create_child (& self -> child_pidfd1 , CLONE_NEWUSER | CLONE_NEWPID );
245
+ else if (self -> nsfds [PIDFD_NS_PID ] >= 0 )
246
+ self -> child_pid1 = create_child (& self -> child_pidfd1 , CLONE_NEWPID );
247
+ else if (self -> nsfds [PIDFD_NS_USER ] >= 0 )
248
+ self -> child_pid1 = create_child (& self -> child_pidfd1 , CLONE_NEWUSER );
249
+ else
250
+ self -> child_pid1 = create_child (& self -> child_pidfd1 , 0 );
182
251
EXPECT_GE (self -> child_pid1 , 0 );
183
252
184
253
if (self -> child_pid1 == 0 ) {
185
254
close (ipc_sockets [0 ]);
186
255
187
- if (!switch_timens ())
256
+ if (self -> nsfds [PIDFD_NS_MNT ] >= 0 && unshare (CLONE_NEWNS ) < 0 ) {
257
+ TH_LOG ("%m - Failed to unshare mount namespace for process %d" , self -> pid );
188
258
_exit (EXIT_FAILURE );
259
+ }
260
+ if (self -> nsfds [PIDFD_NS_CGROUP ] >= 0 && unshare (CLONE_NEWCGROUP ) < 0 ) {
261
+ TH_LOG ("%m - Failed to unshare cgroup namespace for process %d" , self -> pid );
262
+ _exit (EXIT_FAILURE );
263
+ }
264
+ if (self -> nsfds [PIDFD_NS_IPC ] >= 0 && unshare (CLONE_NEWIPC ) < 0 ) {
265
+ TH_LOG ("%m - Failed to unshare ipc namespace for process %d" , self -> pid );
266
+ _exit (EXIT_FAILURE );
267
+ }
268
+ if (self -> nsfds [PIDFD_NS_UTS ] >= 0 && unshare (CLONE_NEWUTS ) < 0 ) {
269
+ TH_LOG ("%m - Failed to unshare uts namespace for process %d" , self -> pid );
270
+ _exit (EXIT_FAILURE );
271
+ }
272
+ if (self -> nsfds [PIDFD_NS_NET ] >= 0 && unshare (CLONE_NEWNET ) < 0 ) {
273
+ TH_LOG ("%m - Failed to unshare net namespace for process %d" , self -> pid );
274
+ _exit (EXIT_FAILURE );
275
+ }
276
+ if (self -> nsfds [PIDFD_NS_TIME ] >= 0 && !switch_timens ()) {
277
+ TH_LOG ("%m - Failed to unshare time namespace for process %d" , self -> pid );
278
+ _exit (EXIT_FAILURE );
279
+ }
189
280
190
281
if (write_nointr (ipc_sockets [1 ], "1" , 1 ) < 0 )
191
282
_exit (EXIT_FAILURE );
@@ -203,18 +294,43 @@ FIXTURE_SETUP(current_nsset)
203
294
ret = socketpair (AF_LOCAL , SOCK_STREAM | SOCK_CLOEXEC , 0 , ipc_sockets );
204
295
EXPECT_EQ (ret , 0 );
205
296
206
- self -> child_pid2 = create_child (& self -> child_pidfd2 ,
207
- CLONE_NEWUSER | CLONE_NEWNS |
208
- CLONE_NEWCGROUP | CLONE_NEWIPC |
209
- CLONE_NEWUTS | CLONE_NEWPID |
210
- CLONE_NEWNET );
297
+ if (self -> nsfds [PIDFD_NS_USER ] >= 0 && self -> nsfds [PIDFD_NS_PID ] >= 0 )
298
+ self -> child_pid2 = create_child (& self -> child_pidfd2 , CLONE_NEWUSER | CLONE_NEWPID );
299
+ else if (self -> nsfds [PIDFD_NS_PID ] >= 0 )
300
+ self -> child_pid2 = create_child (& self -> child_pidfd2 , CLONE_NEWPID );
301
+ else if (self -> nsfds [PIDFD_NS_USER ] >= 0 )
302
+ self -> child_pid2 = create_child (& self -> child_pidfd2 , CLONE_NEWUSER );
303
+ else
304
+ self -> child_pid2 = create_child (& self -> child_pidfd2 , 0 );
211
305
EXPECT_GE (self -> child_pid2 , 0 );
212
306
213
307
if (self -> child_pid2 == 0 ) {
214
308
close (ipc_sockets [0 ]);
215
309
216
- if (!switch_timens ())
310
+ if (self -> nsfds [PIDFD_NS_MNT ] >= 0 && unshare (CLONE_NEWNS ) < 0 ) {
311
+ TH_LOG ("%m - Failed to unshare mount namespace for process %d" , self -> pid );
312
+ _exit (EXIT_FAILURE );
313
+ }
314
+ if (self -> nsfds [PIDFD_NS_CGROUP ] >= 0 && unshare (CLONE_NEWCGROUP ) < 0 ) {
315
+ TH_LOG ("%m - Failed to unshare cgroup namespace for process %d" , self -> pid );
217
316
_exit (EXIT_FAILURE );
317
+ }
318
+ if (self -> nsfds [PIDFD_NS_IPC ] >= 0 && unshare (CLONE_NEWIPC ) < 0 ) {
319
+ TH_LOG ("%m - Failed to unshare ipc namespace for process %d" , self -> pid );
320
+ _exit (EXIT_FAILURE );
321
+ }
322
+ if (self -> nsfds [PIDFD_NS_UTS ] >= 0 && unshare (CLONE_NEWUTS ) < 0 ) {
323
+ TH_LOG ("%m - Failed to unshare uts namespace for process %d" , self -> pid );
324
+ _exit (EXIT_FAILURE );
325
+ }
326
+ if (self -> nsfds [PIDFD_NS_NET ] >= 0 && unshare (CLONE_NEWNET ) < 0 ) {
327
+ TH_LOG ("%m - Failed to unshare net namespace for process %d" , self -> pid );
328
+ _exit (EXIT_FAILURE );
329
+ }
330
+ if (self -> nsfds [PIDFD_NS_TIME ] >= 0 && !switch_timens ()) {
331
+ TH_LOG ("%m - Failed to unshare time namespace for process %d" , self -> pid );
332
+ _exit (EXIT_FAILURE );
333
+ }
218
334
219
335
if (write_nointr (ipc_sockets [1 ], "1" , 1 ) < 0 )
220
336
_exit (EXIT_FAILURE );
@@ -267,6 +383,22 @@ FIXTURE_SETUP(current_nsset)
267
383
info -> name , self -> child_pid1 );
268
384
}
269
385
}
386
+
387
+ self -> child_pidfd_derived_nsfds1 [i ] = ioctl (self -> child_pidfd1 , info -> pidfd_ioctl , 0 );
388
+ if (self -> child_pidfd_derived_nsfds1 [i ] < 0 ) {
389
+ EXPECT_EQ (errno , EOPNOTSUPP ) {
390
+ TH_LOG ("%m - Failed to derive %s namespace from pidfd of process %d" ,
391
+ info -> name , self -> child_pid1 );
392
+ }
393
+ }
394
+
395
+ self -> child_pidfd_derived_nsfds2 [i ] = ioctl (self -> child_pidfd2 , info -> pidfd_ioctl , 0 );
396
+ if (self -> child_pidfd_derived_nsfds2 [i ] < 0 ) {
397
+ EXPECT_EQ (errno , EOPNOTSUPP ) {
398
+ TH_LOG ("%m - Failed to derive %s namespace from pidfd of process %d" ,
399
+ info -> name , self -> child_pid2 );
400
+ }
401
+ }
270
402
}
271
403
272
404
close (proc_fd );
@@ -288,6 +420,12 @@ FIXTURE_TEARDOWN(current_nsset)
288
420
close (self -> child_nsfds1 [i ]);
289
421
if (self -> child_nsfds2 [i ] >= 0 )
290
422
close (self -> child_nsfds2 [i ]);
423
+ if (self -> child_pidfd_derived_nsfds [i ] >= 0 )
424
+ close (self -> child_pidfd_derived_nsfds [i ]);
425
+ if (self -> child_pidfd_derived_nsfds1 [i ] >= 0 )
426
+ close (self -> child_pidfd_derived_nsfds1 [i ]);
427
+ if (self -> child_pidfd_derived_nsfds2 [i ] >= 0 )
428
+ close (self -> child_pidfd_derived_nsfds2 [i ]);
291
429
}
292
430
293
431
if (self -> child_pidfd1 >= 0 )
@@ -446,6 +584,42 @@ TEST_F(current_nsset, nsfd_incremental_setns)
446
584
}
447
585
}
448
586
587
+ TEST_F (current_nsset , pidfd_derived_nsfd_incremental_setns )
588
+ {
589
+ int i ;
590
+ pid_t pid ;
591
+
592
+ pid = getpid ();
593
+ for (i = 0 ; i < PIDFD_NS_MAX ; i ++ ) {
594
+ const struct ns_info * info = & ns_info [i ];
595
+ int nsfd ;
596
+
597
+ if (self -> child_pidfd_derived_nsfds1 [i ] < 0 )
598
+ continue ;
599
+
600
+ if (info -> flag ) {
601
+ ASSERT_EQ (setns (self -> child_pidfd_derived_nsfds1 [i ], info -> flag ), 0 ) {
602
+ TH_LOG ("%m - Failed to setns to %s namespace of %d via nsfd %d" ,
603
+ info -> name , self -> child_pid1 ,
604
+ self -> child_pidfd_derived_nsfds1 [i ]);
605
+ }
606
+ }
607
+
608
+ /* Verify that we have changed to the correct namespaces. */
609
+ if (info -> flag == CLONE_NEWPID )
610
+ nsfd = self -> child_pidfd_derived_nsfds [i ];
611
+ else
612
+ nsfd = self -> child_pidfd_derived_nsfds1 [i ];
613
+ ASSERT_EQ (in_same_namespace (nsfd , pid , info -> name ), 1 ) {
614
+ TH_LOG ("setns failed to place us correctly into %s namespace of %d via nsfd %d" ,
615
+ info -> name , self -> child_pid1 ,
616
+ self -> child_pidfd_derived_nsfds1 [i ]);
617
+ }
618
+ TH_LOG ("Managed to correctly setns to %s namespace of %d via nsfd %d" ,
619
+ info -> name , self -> child_pid1 , self -> child_pidfd_derived_nsfds1 [i ]);
620
+ }
621
+ }
622
+
449
623
TEST_F (current_nsset , pidfd_one_shot_setns )
450
624
{
451
625
unsigned flags = 0 ;
@@ -542,6 +716,28 @@ TEST_F(current_nsset, no_foul_play)
542
716
info -> name , self -> child_pid2 ,
543
717
self -> child_nsfds2 [i ]);
544
718
}
719
+
720
+ /*
721
+ * Can't setns to a user namespace outside of our hierarchy since we
722
+ * don't have caps in there and didn't create it. That means that under
723
+ * no circumstances should we be able to setns to any of the other
724
+ * ones since they aren't owned by our user namespace.
725
+ */
726
+ for (i = 0 ; i < PIDFD_NS_MAX ; i ++ ) {
727
+ const struct ns_info * info = & ns_info [i ];
728
+
729
+ if (self -> child_pidfd_derived_nsfds2 [i ] < 0 || !info -> flag )
730
+ continue ;
731
+
732
+ ASSERT_NE (setns (self -> child_pidfd_derived_nsfds2 [i ], info -> flag ), 0 ) {
733
+ TH_LOG ("Managed to setns to %s namespace of %d via nsfd %d" ,
734
+ info -> name , self -> child_pid2 ,
735
+ self -> child_pidfd_derived_nsfds2 [i ]);
736
+ }
737
+ TH_LOG ("%m - Correctly failed to setns to %s namespace of %d via nsfd %d" ,
738
+ info -> name , self -> child_pid2 ,
739
+ self -> child_pidfd_derived_nsfds2 [i ]);
740
+ }
545
741
}
546
742
547
743
TEST (setns_einval )
0 commit comments