Skip to content

Commit 6cca119

Browse files
committed
Merge tag 'pull-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull close_range() fix from Al Viro: "Fix the logic in descriptor table trimming" * tag 'pull-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: close_range(): fix the logics in descriptor table trimming
2 parents 0c55932 + 678379e commit 6cca119

File tree

3 files changed

+52
-83
lines changed

3 files changed

+52
-83
lines changed

fs/file.c

Lines changed: 34 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -272,59 +272,45 @@ static inline bool fd_is_open(unsigned int fd, const struct fdtable *fdt)
272272
return test_bit(fd, fdt->open_fds);
273273
}
274274

275-
static unsigned int count_open_files(struct fdtable *fdt)
276-
{
277-
unsigned int size = fdt->max_fds;
278-
unsigned int i;
279-
280-
/* Find the last open fd */
281-
for (i = size / BITS_PER_LONG; i > 0; ) {
282-
if (fdt->open_fds[--i])
283-
break;
284-
}
285-
i = (i + 1) * BITS_PER_LONG;
286-
return i;
287-
}
288-
289275
/*
290276
* Note that a sane fdtable size always has to be a multiple of
291277
* BITS_PER_LONG, since we have bitmaps that are sized by this.
292278
*
293-
* 'max_fds' will normally already be properly aligned, but it
294-
* turns out that in the close_range() -> __close_range() ->
295-
* unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end
296-
* up having a 'max_fds' value that isn't already aligned.
297-
*
298-
* Rather than make close_range() have to worry about this,
299-
* just make that BITS_PER_LONG alignment be part of a sane
300-
* fdtable size. Becuase that's really what it is.
279+
* punch_hole is optional - when close_range() is asked to unshare
280+
* and close, we don't need to copy descriptors in that range, so
281+
* a smaller cloned descriptor table might suffice if the last
282+
* currently opened descriptor falls into that range.
301283
*/
302-
static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds)
284+
static unsigned int sane_fdtable_size(struct fdtable *fdt, struct fd_range *punch_hole)
303285
{
304-
unsigned int count;
305-
306-
count = count_open_files(fdt);
307-
if (max_fds < NR_OPEN_DEFAULT)
308-
max_fds = NR_OPEN_DEFAULT;
309-
return ALIGN(min(count, max_fds), BITS_PER_LONG);
286+
unsigned int last = find_last_bit(fdt->open_fds, fdt->max_fds);
287+
288+
if (last == fdt->max_fds)
289+
return NR_OPEN_DEFAULT;
290+
if (punch_hole && punch_hole->to >= last && punch_hole->from <= last) {
291+
last = find_last_bit(fdt->open_fds, punch_hole->from);
292+
if (last == punch_hole->from)
293+
return NR_OPEN_DEFAULT;
294+
}
295+
return ALIGN(last + 1, BITS_PER_LONG);
310296
}
311297

312298
/*
313-
* Allocate a new files structure and copy contents from the
314-
* passed in files structure.
315-
* errorp will be valid only when the returned files_struct is NULL.
299+
* Allocate a new descriptor table and copy contents from the passed in
300+
* instance. Returns a pointer to cloned table on success, ERR_PTR()
301+
* on failure. For 'punch_hole' see sane_fdtable_size().
316302
*/
317-
struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int *errorp)
303+
struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_hole)
318304
{
319305
struct files_struct *newf;
320306
struct file **old_fds, **new_fds;
321307
unsigned int open_files, i;
322308
struct fdtable *old_fdt, *new_fdt;
309+
int error;
323310

324-
*errorp = -ENOMEM;
325311
newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
326312
if (!newf)
327-
goto out;
313+
return ERR_PTR(-ENOMEM);
328314

329315
atomic_set(&newf->count, 1);
330316

@@ -341,7 +327,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
341327

342328
spin_lock(&oldf->file_lock);
343329
old_fdt = files_fdtable(oldf);
344-
open_files = sane_fdtable_size(old_fdt, max_fds);
330+
open_files = sane_fdtable_size(old_fdt, punch_hole);
345331

346332
/*
347333
* Check whether we need to allocate a larger fd array and fd set.
@@ -354,14 +340,14 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
354340

355341
new_fdt = alloc_fdtable(open_files - 1);
356342
if (!new_fdt) {
357-
*errorp = -ENOMEM;
343+
error = -ENOMEM;
358344
goto out_release;
359345
}
360346

361347
/* beyond sysctl_nr_open; nothing to do */
362348
if (unlikely(new_fdt->max_fds < open_files)) {
363349
__free_fdtable(new_fdt);
364-
*errorp = -EMFILE;
350+
error = -EMFILE;
365351
goto out_release;
366352
}
367353

@@ -372,7 +358,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
372358
*/
373359
spin_lock(&oldf->file_lock);
374360
old_fdt = files_fdtable(oldf);
375-
open_files = sane_fdtable_size(old_fdt, max_fds);
361+
open_files = sane_fdtable_size(old_fdt, punch_hole);
376362
}
377363

378364
copy_fd_bitmaps(new_fdt, old_fdt, open_files / BITS_PER_LONG);
@@ -406,8 +392,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
406392

407393
out_release:
408394
kmem_cache_free(files_cachep, newf);
409-
out:
410-
return NULL;
395+
return ERR_PTR(error);
411396
}
412397

413398
static struct fdtable *close_files(struct files_struct * files)
@@ -748,37 +733,25 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
748733
if (fd > max_fd)
749734
return -EINVAL;
750735

751-
if (flags & CLOSE_RANGE_UNSHARE) {
752-
int ret;
753-
unsigned int max_unshare_fds = NR_OPEN_MAX;
736+
if ((flags & CLOSE_RANGE_UNSHARE) && atomic_read(&cur_fds->count) > 1) {
737+
struct fd_range range = {fd, max_fd}, *punch_hole = &range;
754738

755739
/*
756740
* If the caller requested all fds to be made cloexec we always
757741
* copy all of the file descriptors since they still want to
758742
* use them.
759743
*/
760-
if (!(flags & CLOSE_RANGE_CLOEXEC)) {
761-
/*
762-
* If the requested range is greater than the current
763-
* maximum, we're closing everything so only copy all
764-
* file descriptors beneath the lowest file descriptor.
765-
*/
766-
rcu_read_lock();
767-
if (max_fd >= last_fd(files_fdtable(cur_fds)))
768-
max_unshare_fds = fd;
769-
rcu_read_unlock();
770-
}
771-
772-
ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds);
773-
if (ret)
774-
return ret;
744+
if (flags & CLOSE_RANGE_CLOEXEC)
745+
punch_hole = NULL;
775746

747+
fds = dup_fd(cur_fds, punch_hole);
748+
if (IS_ERR(fds))
749+
return PTR_ERR(fds);
776750
/*
777751
* We used to share our file descriptor table, and have now
778752
* created a private one, make sure we're using it below.
779753
*/
780-
if (fds)
781-
swap(cur_fds, fds);
754+
swap(cur_fds, fds);
782755
}
783756

784757
if (flags & CLOSE_RANGE_CLOEXEC)

include/linux/fdtable.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
* as this is the granularity returned by copy_fdset().
2323
*/
2424
#define NR_OPEN_DEFAULT BITS_PER_LONG
25-
#define NR_OPEN_MAX ~0U
2625

2726
struct fdtable {
2827
unsigned int max_fds;
@@ -106,7 +105,10 @@ struct task_struct;
106105

107106
void put_files_struct(struct files_struct *fs);
108107
int unshare_files(void);
109-
struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy;
108+
struct fd_range {
109+
unsigned int from, to;
110+
};
111+
struct files_struct *dup_fd(struct files_struct *, struct fd_range *) __latent_entropy;
110112
void do_close_on_exec(struct files_struct *);
111113
int iterate_fd(struct files_struct *, unsigned,
112114
int (*)(const void *, struct file *, unsigned),
@@ -115,8 +117,6 @@ int iterate_fd(struct files_struct *, unsigned,
115117
extern int close_fd(unsigned int fd);
116118
extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
117119
extern struct file *file_close_fd(unsigned int fd);
118-
extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
119-
struct files_struct **new_fdp);
120120

121121
extern struct kmem_cache *files_cachep;
122122

kernel/fork.c

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1756,33 +1756,30 @@ static int copy_files(unsigned long clone_flags, struct task_struct *tsk,
17561756
int no_files)
17571757
{
17581758
struct files_struct *oldf, *newf;
1759-
int error = 0;
17601759

17611760
/*
17621761
* A background process may not have any files ...
17631762
*/
17641763
oldf = current->files;
17651764
if (!oldf)
1766-
goto out;
1765+
return 0;
17671766

17681767
if (no_files) {
17691768
tsk->files = NULL;
1770-
goto out;
1769+
return 0;
17711770
}
17721771

17731772
if (clone_flags & CLONE_FILES) {
17741773
atomic_inc(&oldf->count);
1775-
goto out;
1774+
return 0;
17761775
}
17771776

1778-
newf = dup_fd(oldf, NR_OPEN_MAX, &error);
1779-
if (!newf)
1780-
goto out;
1777+
newf = dup_fd(oldf, NULL);
1778+
if (IS_ERR(newf))
1779+
return PTR_ERR(newf);
17811780

17821781
tsk->files = newf;
1783-
error = 0;
1784-
out:
1785-
return error;
1782+
return 0;
17861783
}
17871784

17881785
static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
@@ -3238,17 +3235,16 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
32383235
/*
32393236
* Unshare file descriptor table if it is being shared
32403237
*/
3241-
int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
3242-
struct files_struct **new_fdp)
3238+
static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
32433239
{
32443240
struct files_struct *fd = current->files;
3245-
int error = 0;
32463241

32473242
if ((unshare_flags & CLONE_FILES) &&
32483243
(fd && atomic_read(&fd->count) > 1)) {
3249-
*new_fdp = dup_fd(fd, max_fds, &error);
3250-
if (!*new_fdp)
3251-
return error;
3244+
fd = dup_fd(fd, NULL);
3245+
if (IS_ERR(fd))
3246+
return PTR_ERR(fd);
3247+
*new_fdp = fd;
32523248
}
32533249

32543250
return 0;
@@ -3306,7 +3302,7 @@ int ksys_unshare(unsigned long unshare_flags)
33063302
err = unshare_fs(unshare_flags, &new_fs);
33073303
if (err)
33083304
goto bad_unshare_out;
3309-
err = unshare_fd(unshare_flags, NR_OPEN_MAX, &new_fd);
3305+
err = unshare_fd(unshare_flags, &new_fd);
33103306
if (err)
33113307
goto bad_unshare_cleanup_fs;
33123308
err = unshare_userns(unshare_flags, &new_cred);
@@ -3398,7 +3394,7 @@ int unshare_files(void)
33983394
struct files_struct *old, *copy = NULL;
33993395
int error;
34003396

3401-
error = unshare_fd(CLONE_FILES, NR_OPEN_MAX, &copy);
3397+
error = unshare_fd(CLONE_FILES, &copy);
34023398
if (error || !copy)
34033399
return error;
34043400

0 commit comments

Comments
 (0)