Skip to content

Commit e7c93cb

Browse files
committed
Merge tag 'threads-v5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux
Pull thread updates from Christian Brauner: "We have been discussing using pidfds to attach to namespaces for quite a while and the patches have in one form or another already existed for about a year. But I wanted to wait to see how the general api would be received and adopted. This contains the changes to make it possible to use pidfds to attach to the namespaces of a process, i.e. they can be passed as the first argument to the setns() syscall. When only a single namespace type is specified the semantics are equivalent to passing an nsfd. That means setns(nsfd, CLONE_NEWNET) equals setns(pidfd, CLONE_NEWNET). However, when a pidfd is passed, multiple namespace flags can be specified in the second setns() argument and setns() will attach the caller to all the specified namespaces all at once or to none of them. Specifying 0 is not valid together with a pidfd. Here are just two obvious examples: setns(pidfd, CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET); setns(pidfd, CLONE_NEWUSER); Allowing to also attach subsets of namespaces supports various use-cases where callers setns to a subset of namespaces to retain privilege, perform an action and then re-attach another subset of namespaces. Apart from significantly reducing the number of syscalls needed to attach to all currently supported namespaces (eight "open+setns" sequences vs just a single "setns()"), this also allows atomic setns to a set of namespaces, i.e. either attaching to all namespaces succeeds or we fail without having changed anything. This is centered around a new internal struct nsset which holds all information necessary for a task to switch to a new set of namespaces atomically. Fwiw, with this change a pidfd becomes the only token needed to interact with a container. I'm expecting this to be picked-up by util-linux for nsenter rather soon. Associated with this change is a shiny new test-suite dedicated to setns() (for pidfds and nsfds alike)" * tag 'threads-v5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux: selftests/pidfd: add pidfd setns tests nsproxy: attach to namespaces via pidfds nsproxy: add struct nsset
2 parents d479c5a + 2b40c5d commit e7c93cb

File tree

18 files changed

+833
-47
lines changed

18 files changed

+833
-47
lines changed

fs/namespace.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1786,6 +1786,11 @@ static struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
17861786
return container_of(ns, struct mnt_namespace, ns);
17871787
}
17881788

1789+
struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
1790+
{
1791+
return &mnt->ns;
1792+
}
1793+
17891794
static bool mnt_ns_loop(struct dentry *dentry)
17901795
{
17911796
/* Could bind mounting the mount namespace inode cause a
@@ -4013,16 +4018,18 @@ static void mntns_put(struct ns_common *ns)
40134018
put_mnt_ns(to_mnt_ns(ns));
40144019
}
40154020

4016-
static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
4021+
static int mntns_install(struct nsset *nsset, struct ns_common *ns)
40174022
{
4018-
struct fs_struct *fs = current->fs;
4023+
struct nsproxy *nsproxy = nsset->nsproxy;
4024+
struct fs_struct *fs = nsset->fs;
40194025
struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
4026+
struct user_namespace *user_ns = nsset->cred->user_ns;
40204027
struct path root;
40214028
int err;
40224029

40234030
if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
4024-
!ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
4025-
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
4031+
!ns_capable(user_ns, CAP_SYS_CHROOT) ||
4032+
!ns_capable(user_ns, CAP_SYS_ADMIN))
40264033
return -EPERM;
40274034

40284035
if (is_anon_ns(mnt_ns))

fs/nsfs.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,11 @@ int ns_get_name(char *buf, size_t size, struct task_struct *task,
229229
return res;
230230
}
231231

232+
bool proc_ns_file(const struct file *file)
233+
{
234+
return file->f_op == &ns_file_operations;
235+
}
236+
232237
struct file *proc_ns_fget(int fd)
233238
{
234239
struct file *file;

include/linux/mnt_namespace.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66
struct mnt_namespace;
77
struct fs_struct;
88
struct user_namespace;
9+
struct ns_common;
910

1011
extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
1112
struct user_namespace *, struct fs_struct *);
1213
extern void put_mnt_ns(struct mnt_namespace *ns);
14+
extern struct ns_common *from_mnt_ns(struct mnt_namespace *);
1315

1416
extern const struct file_operations proc_mounts_operations;
1517
extern const struct file_operations proc_mountinfo_operations;

include/linux/nsproxy.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,30 @@ struct nsproxy {
4141
};
4242
extern struct nsproxy init_nsproxy;
4343

44+
/*
45+
* A structure to encompass all bits needed to install
46+
* a partial or complete new set of namespaces.
47+
*
48+
* If a new user namespace is requested cred will
49+
* point to a modifiable set of credentials. If a pointer
50+
* to a modifiable set is needed nsset_cred() must be
51+
* used and tested.
52+
*/
53+
struct nsset {
54+
unsigned flags;
55+
struct nsproxy *nsproxy;
56+
struct fs_struct *fs;
57+
const struct cred *cred;
58+
};
59+
60+
static inline struct cred *nsset_cred(struct nsset *set)
61+
{
62+
if (set->flags & CLONE_NEWUSER)
63+
return (struct cred *)set->cred;
64+
65+
return NULL;
66+
}
67+
4468
/*
4569
* the namespaces access rules are:
4670
*

include/linux/proc_fs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,4 +179,6 @@ static inline struct pid_namespace *proc_pid_ns(const struct inode *inode)
179179
return inode->i_sb->s_fs_info;
180180
}
181181

182+
bool proc_ns_file(const struct file *file);
183+
182184
#endif /* _LINUX_PROC_FS_H */

include/linux/proc_ns.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#include <linux/ns_common.h>
99

1010
struct pid_namespace;
11-
struct nsproxy;
11+
struct nsset;
1212
struct path;
1313
struct task_struct;
1414
struct inode;
@@ -19,7 +19,7 @@ struct proc_ns_operations {
1919
int type;
2020
struct ns_common *(*get)(struct task_struct *task);
2121
void (*put)(struct ns_common *ns);
22-
int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
22+
int (*install)(struct nsset *nsset, struct ns_common *ns);
2323
struct user_namespace *(*owner)(struct ns_common *ns);
2424
struct ns_common *(*get_parent)(struct ns_common *ns);
2525
} __randomize_layout;

ipc/namespace.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -177,15 +177,14 @@ static void ipcns_put(struct ns_common *ns)
177177
return put_ipc_ns(to_ipc_ns(ns));
178178
}
179179

180-
static int ipcns_install(struct nsproxy *nsproxy, struct ns_common *new)
180+
static int ipcns_install(struct nsset *nsset, struct ns_common *new)
181181
{
182+
struct nsproxy *nsproxy = nsset->nsproxy;
182183
struct ipc_namespace *ns = to_ipc_ns(new);
183184
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
184-
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
185+
!ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
185186
return -EPERM;
186187

187-
/* Ditch state from the old ipc namespace */
188-
exit_sem(current);
189188
put_ipc_ns(nsproxy->ipc_ns);
190189
nsproxy->ipc_ns = get_ipc_ns(ns);
191190
return 0;

kernel/cgroup/namespace.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,12 @@ static inline struct cgroup_namespace *to_cg_ns(struct ns_common *ns)
9595
return container_of(ns, struct cgroup_namespace, ns);
9696
}
9797

98-
static int cgroupns_install(struct nsproxy *nsproxy, struct ns_common *ns)
98+
static int cgroupns_install(struct nsset *nsset, struct ns_common *ns)
9999
{
100+
struct nsproxy *nsproxy = nsset->nsproxy;
100101
struct cgroup_namespace *cgroup_ns = to_cg_ns(ns);
101102

102-
if (!ns_capable(current_user_ns(), CAP_SYS_ADMIN) ||
103+
if (!ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN) ||
103104
!ns_capable(cgroup_ns->user_ns, CAP_SYS_ADMIN))
104105
return -EPERM;
105106

0 commit comments

Comments
 (0)