Skip to content

Commit 60997c3

Browse files
author
Christian Brauner
committed
close_range: add CLOSE_RANGE_UNSHARE
One of the use-cases of close_range() is to drop file descriptors just before execve(). This would usually be expressed in the sequence: unshare(CLONE_FILES); close_range(3, ~0U); as pointed out by Linus it might be desirable to have this be a part of close_range() itself under a new flag CLOSE_RANGE_UNSHARE. This expands {dup,unshare)_fd() to take a max_fds argument that indicates the maximum number of file descriptors to copy from the old struct files. When the user requests that all file descriptors are supposed to be closed via close_range(min, max) then we can cap via unshare_fd(min) and hence don't need to do any of the heavy fput() work for everything above min. The patch makes it so that if CLOSE_RANGE_UNSHARE is requested and we do in fact currently share our file descriptor table we create a new private copy. We then close all fds in the requested range and finally after we're done we install the new fd table. Suggested-by: Linus Torvalds <[email protected]> Signed-off-by: Christian Brauner <[email protected]>
1 parent 2c5db60 commit 60997c3

File tree

5 files changed

+79
-19
lines changed

5 files changed

+79
-19
lines changed

fs/file.c

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <linux/bitops.h>
2020
#include <linux/spinlock.h>
2121
#include <linux/rcupdate.h>
22+
#include <linux/close_range.h>
2223

2324
unsigned int sysctl_nr_open __read_mostly = 1024*1024;
2425
unsigned int sysctl_nr_open_min = BITS_PER_LONG;
@@ -265,12 +266,22 @@ static unsigned int count_open_files(struct fdtable *fdt)
265266
return i;
266267
}
267268

269+
static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds)
270+
{
271+
unsigned int count;
272+
273+
count = count_open_files(fdt);
274+
if (max_fds < NR_OPEN_DEFAULT)
275+
max_fds = NR_OPEN_DEFAULT;
276+
return min(count, max_fds);
277+
}
278+
268279
/*
269280
* Allocate a new files structure and copy contents from the
270281
* passed in files structure.
271282
* errorp will be valid only when the returned files_struct is NULL.
272283
*/
273-
struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
284+
struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp)
274285
{
275286
struct files_struct *newf;
276287
struct file **old_fds, **new_fds;
@@ -297,7 +308,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
297308

298309
spin_lock(&oldf->file_lock);
299310
old_fdt = files_fdtable(oldf);
300-
open_files = count_open_files(old_fdt);
311+
open_files = sane_fdtable_size(old_fdt, max_fds);
301312

302313
/*
303314
* Check whether we need to allocate a larger fd array and fd set.
@@ -328,7 +339,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
328339
*/
329340
spin_lock(&oldf->file_lock);
330341
old_fdt = files_fdtable(oldf);
331-
open_files = count_open_files(old_fdt);
342+
open_files = sane_fdtable_size(old_fdt, max_fds);
332343
}
333344

334345
copy_fd_bitmaps(new_fdt, old_fdt, open_files);
@@ -665,32 +676,72 @@ EXPORT_SYMBOL(__close_fd); /* for ksys_close() */
665676
* This closes a range of file descriptors. All file descriptors
666677
* from @fd up to and including @max_fd are closed.
667678
*/
668-
int __close_range(struct files_struct *files, unsigned fd, unsigned max_fd)
679+
int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
669680
{
670681
unsigned int cur_max;
682+
struct task_struct *me = current;
683+
struct files_struct *cur_fds = me->files, *fds = NULL;
684+
685+
if (flags & ~CLOSE_RANGE_UNSHARE)
686+
return -EINVAL;
671687

672688
if (fd > max_fd)
673689
return -EINVAL;
674690

675691
rcu_read_lock();
676-
cur_max = files_fdtable(files)->max_fds;
692+
cur_max = files_fdtable(cur_fds)->max_fds;
677693
rcu_read_unlock();
678694

679695
/* cap to last valid index into fdtable */
680696
cur_max--;
681697

698+
if (flags & CLOSE_RANGE_UNSHARE) {
699+
int ret;
700+
unsigned int max_unshare_fds = NR_OPEN_MAX;
701+
702+
/*
703+
* If the requested range is greater than the current maximum,
704+
* we're closing everything so only copy all file descriptors
705+
* beneath the lowest file descriptor.
706+
*/
707+
if (max_fd >= cur_max)
708+
max_unshare_fds = fd;
709+
710+
ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds);
711+
if (ret)
712+
return ret;
713+
714+
/*
715+
* We used to share our file descriptor table, and have now
716+
* created a private one, make sure we're using it below.
717+
*/
718+
if (fds)
719+
swap(cur_fds, fds);
720+
}
721+
682722
max_fd = min(max_fd, cur_max);
683723
while (fd <= max_fd) {
684724
struct file *file;
685725

686-
file = pick_file(files, fd++);
726+
file = pick_file(cur_fds, fd++);
687727
if (!file)
688728
continue;
689729

690-
filp_close(file, files);
730+
filp_close(file, cur_fds);
691731
cond_resched();
692732
}
693733

734+
if (fds) {
735+
/*
736+
* We're done closing the files we were supposed to. Time to install
737+
* the new file descriptor table and drop the old one.
738+
*/
739+
task_lock(me);
740+
me->files = cur_fds;
741+
task_unlock(me);
742+
put_files_struct(fds);
743+
}
744+
694745
return 0;
695746
}
696747

fs/open.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1324,10 +1324,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
13241324
SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd,
13251325
unsigned int, flags)
13261326
{
1327-
if (flags)
1328-
return -EINVAL;
1329-
1330-
return __close_range(current->files, fd, max_fd);
1327+
return __close_range(fd, max_fd, flags);
13311328
}
13321329

13331330
/*

include/linux/fdtable.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
* as this is the granularity returned by copy_fdset().
2323
*/
2424
#define NR_OPEN_DEFAULT BITS_PER_LONG
25+
#define NR_OPEN_MAX ~0U
2526

2627
struct fdtable {
2728
unsigned int max_fds;
@@ -109,7 +110,7 @@ struct files_struct *get_files_struct(struct task_struct *);
109110
void put_files_struct(struct files_struct *fs);
110111
void reset_files_struct(struct files_struct *);
111112
int unshare_files(struct files_struct **);
112-
struct files_struct *dup_fd(struct files_struct *, int *) __latent_entropy;
113+
struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy;
113114
void do_close_on_exec(struct files_struct *);
114115
int iterate_fd(struct files_struct *, unsigned,
115116
int (*)(const void *, struct file *, unsigned),
@@ -121,9 +122,10 @@ extern void __fd_install(struct files_struct *files,
121122
unsigned int fd, struct file *file);
122123
extern int __close_fd(struct files_struct *files,
123124
unsigned int fd);
124-
extern int __close_range(struct files_struct *files, unsigned int fd,
125-
unsigned int max_fd);
125+
extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
126126
extern int __close_fd_get_file(unsigned int fd, struct file **res);
127+
extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
128+
struct files_struct **new_fdp);
127129

128130
extern struct kmem_cache *files_cachep;
129131

include/uapi/linux/close_range.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2+
#ifndef _UAPI_LINUX_CLOSE_RANGE_H
3+
#define _UAPI_LINUX_CLOSE_RANGE_H
4+
5+
/* Unshare the file descriptor table before closing file descriptors. */
6+
#define CLOSE_RANGE_UNSHARE (1U << 1)
7+
8+
#endif /* _UAPI_LINUX_CLOSE_RANGE_H */
9+

kernel/fork.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1474,7 +1474,7 @@ static int copy_files(unsigned long clone_flags, struct task_struct *tsk)
14741474
goto out;
14751475
}
14761476

1477-
newf = dup_fd(oldf, &error);
1477+
newf = dup_fd(oldf, NR_OPEN_MAX, &error);
14781478
if (!newf)
14791479
goto out;
14801480

@@ -2907,14 +2907,15 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
29072907
/*
29082908
* Unshare file descriptor table if it is being shared
29092909
*/
2910-
static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
2910+
int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
2911+
struct files_struct **new_fdp)
29112912
{
29122913
struct files_struct *fd = current->files;
29132914
int error = 0;
29142915

29152916
if ((unshare_flags & CLONE_FILES) &&
29162917
(fd && atomic_read(&fd->count) > 1)) {
2917-
*new_fdp = dup_fd(fd, &error);
2918+
*new_fdp = dup_fd(fd, max_fds, &error);
29182919
if (!*new_fdp)
29192920
return error;
29202921
}
@@ -2974,7 +2975,7 @@ int ksys_unshare(unsigned long unshare_flags)
29742975
err = unshare_fs(unshare_flags, &new_fs);
29752976
if (err)
29762977
goto bad_unshare_out;
2977-
err = unshare_fd(unshare_flags, &new_fd);
2978+
err = unshare_fd(unshare_flags, NR_OPEN_MAX, &new_fd);
29782979
if (err)
29792980
goto bad_unshare_cleanup_fs;
29802981
err = unshare_userns(unshare_flags, &new_cred);
@@ -3063,7 +3064,7 @@ int unshare_files(struct files_struct **displaced)
30633064
struct files_struct *copy = NULL;
30643065
int error;
30653066

3066-
error = unshare_fd(CLONE_FILES, &copy);
3067+
error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, &copy);
30673068
if (error || !copy) {
30683069
*displaced = NULL;
30693070
return error;

0 commit comments

Comments
 (0)