Skip to content

Commit 98f3a9a

Browse files
committed
Merge tag 'vfs-6.11.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull pidfs updates from Christian Brauner: "This contains work to make it possible to derive namespace file descriptors from pidfd file descriptors. Right now it is already possible to use a pidfd with setns() to atomically change multiple namespaces at the same time. In other words, it is possible to switch to the namespace context of a process using a pidfd. There is no need to first open namespace file descriptors via procfs. The work included here is an extension of these abilities by allowing to open namespace file descriptors using a pidfd. This means it is now possible to interact with namespaces without ever touching procfs. To this end a new set of ioctls() on pidfds is introduced covering all supported namespace types" * tag 'vfs-6.11.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: pidfs: allow retrieval of namespace file descriptors nsfs: add open_namespace() nsproxy: add helper to go from arbitrary namespace to ns_common nsproxy: add a cleanup helper for nsproxy file: add take_fd() cleanup helper
2 parents 1b074ab + 5b08bd4 commit 98f3a9a

File tree

7 files changed

+178
-29
lines changed

7 files changed

+178
-29
lines changed

fs/internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ struct fs_context;
1717
struct pipe_inode_info;
1818
struct iov_iter;
1919
struct mnt_idmap;
20+
struct ns_common;
2021

2122
/*
2223
* block/bdev.c
@@ -239,6 +240,7 @@ extern void mnt_pin_kill(struct mount *m);
239240
* fs/nsfs.c
240241
*/
241242
extern const struct dentry_operations ns_dentry_operations;
243+
int open_namespace(struct ns_common *ns);
242244

243245
/*
244246
* fs/stat.c:

fs/nsfs.c

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -84,40 +84,47 @@ int ns_get_path(struct path *path, struct task_struct *task,
8484
return ns_get_path_cb(path, ns_get_path_task, &args);
8585
}
8686

87-
int open_related_ns(struct ns_common *ns,
88-
struct ns_common *(*get_ns)(struct ns_common *ns))
87+
/**
88+
* open_namespace - open a namespace
89+
* @ns: the namespace to open
90+
*
91+
* This will consume a reference to @ns indendent of success or failure.
92+
*
93+
* Return: A file descriptor on success or a negative error code on failure.
94+
*/
95+
int open_namespace(struct ns_common *ns)
8996
{
90-
struct path path = {};
91-
struct ns_common *relative;
97+
struct path path __free(path_put) = {};
9298
struct file *f;
9399
int err;
94-
int fd;
95100

96-
fd = get_unused_fd_flags(O_CLOEXEC);
101+
/* call first to consume reference */
102+
err = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path);
103+
if (err < 0)
104+
return err;
105+
106+
CLASS(get_unused_fd, fd)(O_CLOEXEC);
97107
if (fd < 0)
98108
return fd;
99109

110+
f = dentry_open(&path, O_RDONLY, current_cred());
111+
if (IS_ERR(f))
112+
return PTR_ERR(f);
113+
114+
fd_install(fd, f);
115+
return take_fd(fd);
116+
}
117+
118+
int open_related_ns(struct ns_common *ns,
119+
struct ns_common *(*get_ns)(struct ns_common *ns))
120+
{
121+
struct ns_common *relative;
122+
100123
relative = get_ns(ns);
101-
if (IS_ERR(relative)) {
102-
put_unused_fd(fd);
124+
if (IS_ERR(relative))
103125
return PTR_ERR(relative);
104-
}
105126

106-
err = path_from_stashed(&relative->stashed, nsfs_mnt, relative, &path);
107-
if (err < 0) {
108-
put_unused_fd(fd);
109-
return err;
110-
}
111-
112-
f = dentry_open(&path, O_RDONLY, current_cred());
113-
path_put(&path);
114-
if (IS_ERR(f)) {
115-
put_unused_fd(fd);
116-
fd = PTR_ERR(f);
117-
} else
118-
fd_install(fd, f);
119-
120-
return fd;
127+
return open_namespace(relative);
121128
}
122129
EXPORT_SYMBOL_GPL(open_related_ns);
123130

fs/pidfs.c

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,16 @@
1111
#include <linux/proc_fs.h>
1212
#include <linux/proc_ns.h>
1313
#include <linux/pseudo_fs.h>
14+
#include <linux/ptrace.h>
1415
#include <linux/seq_file.h>
1516
#include <uapi/linux/pidfd.h>
17+
#include <linux/ipc_namespace.h>
18+
#include <linux/time_namespace.h>
19+
#include <linux/utsname.h>
20+
#include <net/net_namespace.h>
1621

1722
#include "internal.h"
23+
#include "mount.h"
1824

1925
#ifdef CONFIG_PROC_FS
2026
/**
@@ -108,11 +114,95 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
108114
return poll_flags;
109115
}
110116

117+
static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
118+
{
119+
struct task_struct *task __free(put_task) = NULL;
120+
struct nsproxy *nsp __free(put_nsproxy) = NULL;
121+
struct pid *pid = pidfd_pid(file);
122+
struct ns_common *ns_common;
123+
124+
if (arg)
125+
return -EINVAL;
126+
127+
task = get_pid_task(pid, PIDTYPE_PID);
128+
if (!task)
129+
return -ESRCH;
130+
131+
scoped_guard(task_lock, task) {
132+
nsp = task->nsproxy;
133+
if (nsp)
134+
get_nsproxy(nsp);
135+
}
136+
if (!nsp)
137+
return -ESRCH; /* just pretend it didn't exist */
138+
139+
/*
140+
* We're trying to open a file descriptor to the namespace so perform a
141+
* filesystem cred ptrace check. Also, we mirror nsfs behavior.
142+
*/
143+
if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
144+
return -EACCES;
145+
146+
switch (cmd) {
147+
/* Namespaces that hang of nsproxy. */
148+
case PIDFD_GET_CGROUP_NAMESPACE:
149+
get_cgroup_ns(nsp->cgroup_ns);
150+
ns_common = to_ns_common(nsp->cgroup_ns);
151+
break;
152+
case PIDFD_GET_IPC_NAMESPACE:
153+
get_ipc_ns(nsp->ipc_ns);
154+
ns_common = to_ns_common(nsp->ipc_ns);
155+
break;
156+
case PIDFD_GET_MNT_NAMESPACE:
157+
get_mnt_ns(nsp->mnt_ns);
158+
ns_common = to_ns_common(nsp->mnt_ns);
159+
break;
160+
case PIDFD_GET_NET_NAMESPACE:
161+
ns_common = to_ns_common(nsp->net_ns);
162+
get_net_ns(ns_common);
163+
break;
164+
case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE:
165+
get_pid_ns(nsp->pid_ns_for_children);
166+
ns_common = to_ns_common(nsp->pid_ns_for_children);
167+
break;
168+
case PIDFD_GET_TIME_NAMESPACE:
169+
get_time_ns(nsp->time_ns);
170+
ns_common = to_ns_common(nsp->time_ns);
171+
break;
172+
case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE:
173+
get_time_ns(nsp->time_ns_for_children);
174+
ns_common = to_ns_common(nsp->time_ns_for_children);
175+
break;
176+
case PIDFD_GET_UTS_NAMESPACE:
177+
get_uts_ns(nsp->uts_ns);
178+
ns_common = to_ns_common(nsp->uts_ns);
179+
break;
180+
/* Namespaces that don't hang of nsproxy. */
181+
case PIDFD_GET_USER_NAMESPACE:
182+
rcu_read_lock();
183+
ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns)));
184+
rcu_read_unlock();
185+
break;
186+
case PIDFD_GET_PID_NAMESPACE:
187+
rcu_read_lock();
188+
ns_common = to_ns_common(get_pid_ns(task_active_pid_ns(task)));
189+
rcu_read_unlock();
190+
break;
191+
default:
192+
return -ENOIOCTLCMD;
193+
}
194+
195+
/* open_namespace() unconditionally consumes the reference */
196+
return open_namespace(ns_common);
197+
}
198+
111199
static const struct file_operations pidfs_file_operations = {
112200
.poll = pidfd_poll,
113201
#ifdef CONFIG_PROC_FS
114202
.show_fdinfo = pidfd_show_fdinfo,
115203
#endif
204+
.unlocked_ioctl = pidfd_ioctl,
205+
.compat_ioctl = compat_ptr_ioctl,
116206
};
117207

118208
struct pid *pidfd_pid(const struct file *file)

include/linux/cleanup.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,17 +63,20 @@
6363

6464
#define __free(_name) __cleanup(__free_##_name)
6565

66-
#define __get_and_null_ptr(p) \
67-
({ __auto_type __ptr = &(p); \
68-
__auto_type __val = *__ptr; \
69-
*__ptr = NULL; __val; })
66+
#define __get_and_null(p, nullvalue) \
67+
({ \
68+
__auto_type __ptr = &(p); \
69+
__auto_type __val = *__ptr; \
70+
*__ptr = nullvalue; \
71+
__val; \
72+
})
7073

7174
static inline __must_check
7275
const volatile void * __must_check_fn(const volatile void *val)
7376
{ return val; }
7477

7578
#define no_free_ptr(p) \
76-
((typeof(p)) __must_check_fn(__get_and_null_ptr(p)))
79+
((typeof(p)) __must_check_fn(__get_and_null(p, NULL)))
7780

7881
#define return_ptr(p) return no_free_ptr(p)
7982

include/linux/file.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,26 @@ extern void put_unused_fd(unsigned int fd);
9797
DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T),
9898
get_unused_fd_flags(flags), unsigned flags)
9999

100+
/*
101+
* take_fd() will take care to set @fd to -EBADF ensuring that
102+
* CLASS(get_unused_fd) won't call put_unused_fd(). This makes it
103+
* easier to rely on CLASS(get_unused_fd):
104+
*
105+
* struct file *f;
106+
*
107+
* CLASS(get_unused_fd, fd)(O_CLOEXEC);
108+
* if (fd < 0)
109+
* return fd;
110+
*
111+
* f = dentry_open(&path, O_RDONLY, current_cred());
112+
* if (IS_ERR(f))
113+
* return PTR_ERR(fd);
114+
*
115+
* fd_install(fd, f);
116+
* return take_fd(fd);
117+
*/
118+
#define take_fd(fd) __get_and_null(fd, -EBADF)
119+
100120
extern void fd_install(unsigned int fd, struct file *file);
101121

102122
int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags);

include/linux/nsproxy.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,17 @@ struct nsproxy {
4242
};
4343
extern struct nsproxy init_nsproxy;
4444

45+
#define to_ns_common(__ns) \
46+
_Generic((__ns), \
47+
struct cgroup_namespace *: &(__ns->ns), \
48+
struct ipc_namespace *: &(__ns->ns), \
49+
struct net *: &(__ns->ns), \
50+
struct pid_namespace *: &(__ns->ns), \
51+
struct mnt_namespace *: &(__ns->ns), \
52+
struct time_namespace *: &(__ns->ns), \
53+
struct user_namespace *: &(__ns->ns), \
54+
struct uts_namespace *: &(__ns->ns))
55+
4556
/*
4657
* A structure to encompass all bits needed to install
4758
* a partial or complete new set of namespaces.
@@ -112,4 +123,6 @@ static inline void get_nsproxy(struct nsproxy *ns)
112123
refcount_inc(&ns->count);
113124
}
114125

126+
DEFINE_FREE(put_nsproxy, struct nsproxy *, if (_T) put_nsproxy(_T))
127+
115128
#endif

include/uapi/linux/pidfd.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
#include <linux/types.h>
77
#include <linux/fcntl.h>
8+
#include <linux/ioctl.h>
89

910
/* Flags for pidfd_open(). */
1011
#define PIDFD_NONBLOCK O_NONBLOCK
@@ -15,4 +16,17 @@
1516
#define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1)
1617
#define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2)
1718

19+
#define PIDFS_IOCTL_MAGIC 0xFF
20+
21+
#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1)
22+
#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2)
23+
#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3)
24+
#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4)
25+
#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5)
26+
#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6)
27+
#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7)
28+
#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
29+
#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9)
30+
#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10)
31+
1832
#endif /* _UAPI_LINUX_PIDFD_H */

0 commit comments

Comments
 (0)