@@ -272,59 +272,45 @@ static inline bool fd_is_open(unsigned int fd, const struct fdtable *fdt)
272
272
return test_bit (fd , fdt -> open_fds );
273
273
}
274
274
275
- static unsigned int count_open_files (struct fdtable * fdt )
276
- {
277
- unsigned int size = fdt -> max_fds ;
278
- unsigned int i ;
279
-
280
- /* Find the last open fd */
281
- for (i = size / BITS_PER_LONG ; i > 0 ; ) {
282
- if (fdt -> open_fds [-- i ])
283
- break ;
284
- }
285
- i = (i + 1 ) * BITS_PER_LONG ;
286
- return i ;
287
- }
288
-
289
275
/*
290
276
* Note that a sane fdtable size always has to be a multiple of
291
277
* BITS_PER_LONG, since we have bitmaps that are sized by this.
292
278
*
293
- * 'max_fds' will normally already be properly aligned, but it
294
- * turns out that in the close_range() -> __close_range() ->
295
- * unshare_fd() -> dup_fd() -> sane_fdtable_size() we can end
296
- * up having a 'max_fds' value that isn't already aligned.
297
- *
298
- * Rather than make close_range() have to worry about this,
299
- * just make that BITS_PER_LONG alignment be part of a sane
300
- * fdtable size. Becuase that's really what it is.
279
+ * punch_hole is optional - when close_range() is asked to unshare
280
+ * and close, we don't need to copy descriptors in that range, so
281
+ * a smaller cloned descriptor table might suffice if the last
282
+ * currently opened descriptor falls into that range.
301
283
*/
302
- static unsigned int sane_fdtable_size (struct fdtable * fdt , unsigned int max_fds )
284
+ static unsigned int sane_fdtable_size (struct fdtable * fdt , struct fd_range * punch_hole )
303
285
{
304
- unsigned int count ;
305
-
306
- count = count_open_files (fdt );
307
- if (max_fds < NR_OPEN_DEFAULT )
308
- max_fds = NR_OPEN_DEFAULT ;
309
- return ALIGN (min (count , max_fds ), BITS_PER_LONG );
286
+ unsigned int last = find_last_bit (fdt -> open_fds , fdt -> max_fds );
287
+
288
+ if (last == fdt -> max_fds )
289
+ return NR_OPEN_DEFAULT ;
290
+ if (punch_hole && punch_hole -> to >= last && punch_hole -> from <= last ) {
291
+ last = find_last_bit (fdt -> open_fds , punch_hole -> from );
292
+ if (last == punch_hole -> from )
293
+ return NR_OPEN_DEFAULT ;
294
+ }
295
+ return ALIGN (last + 1 , BITS_PER_LONG );
310
296
}
311
297
312
298
/*
313
- * Allocate a new files structure and copy contents from the
314
- * passed in files structure.
315
- * errorp will be valid only when the returned files_struct is NULL .
299
+ * Allocate a new descriptor table and copy contents from the passed in
300
+ * instance. Returns a pointer to cloned table on success, ERR_PTR()
301
+ * on failure. For 'punch_hole' see sane_fdtable_size() .
316
302
*/
317
- struct files_struct * dup_fd (struct files_struct * oldf , unsigned int max_fds , int * errorp )
303
+ struct files_struct * dup_fd (struct files_struct * oldf , struct fd_range * punch_hole )
318
304
{
319
305
struct files_struct * newf ;
320
306
struct file * * old_fds , * * new_fds ;
321
307
unsigned int open_files , i ;
322
308
struct fdtable * old_fdt , * new_fdt ;
309
+ int error ;
323
310
324
- * errorp = - ENOMEM ;
325
311
newf = kmem_cache_alloc (files_cachep , GFP_KERNEL );
326
312
if (!newf )
327
- goto out ;
313
+ return ERR_PTR ( - ENOMEM ) ;
328
314
329
315
atomic_set (& newf -> count , 1 );
330
316
@@ -341,7 +327,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
341
327
342
328
spin_lock (& oldf -> file_lock );
343
329
old_fdt = files_fdtable (oldf );
344
- open_files = sane_fdtable_size (old_fdt , max_fds );
330
+ open_files = sane_fdtable_size (old_fdt , punch_hole );
345
331
346
332
/*
347
333
* Check whether we need to allocate a larger fd array and fd set.
@@ -354,14 +340,14 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
354
340
355
341
new_fdt = alloc_fdtable (open_files - 1 );
356
342
if (!new_fdt ) {
357
- * errorp = - ENOMEM ;
343
+ error = - ENOMEM ;
358
344
goto out_release ;
359
345
}
360
346
361
347
/* beyond sysctl_nr_open; nothing to do */
362
348
if (unlikely (new_fdt -> max_fds < open_files )) {
363
349
__free_fdtable (new_fdt );
364
- * errorp = - EMFILE ;
350
+ error = - EMFILE ;
365
351
goto out_release ;
366
352
}
367
353
@@ -372,7 +358,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
372
358
*/
373
359
spin_lock (& oldf -> file_lock );
374
360
old_fdt = files_fdtable (oldf );
375
- open_files = sane_fdtable_size (old_fdt , max_fds );
361
+ open_files = sane_fdtable_size (old_fdt , punch_hole );
376
362
}
377
363
378
364
copy_fd_bitmaps (new_fdt , old_fdt , open_files / BITS_PER_LONG );
@@ -406,8 +392,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
406
392
407
393
out_release :
408
394
kmem_cache_free (files_cachep , newf );
409
- out :
410
- return NULL ;
395
+ return ERR_PTR (error );
411
396
}
412
397
413
398
static struct fdtable * close_files (struct files_struct * files )
@@ -748,37 +733,25 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
748
733
if (fd > max_fd )
749
734
return - EINVAL ;
750
735
751
- if (flags & CLOSE_RANGE_UNSHARE ) {
752
- int ret ;
753
- unsigned int max_unshare_fds = NR_OPEN_MAX ;
736
+ if ((flags & CLOSE_RANGE_UNSHARE ) && atomic_read (& cur_fds -> count ) > 1 ) {
737
+ struct fd_range range = {fd , max_fd }, * punch_hole = & range ;
754
738
755
739
/*
756
740
* If the caller requested all fds to be made cloexec we always
757
741
* copy all of the file descriptors since they still want to
758
742
* use them.
759
743
*/
760
- if (!(flags & CLOSE_RANGE_CLOEXEC )) {
761
- /*
762
- * If the requested range is greater than the current
763
- * maximum, we're closing everything so only copy all
764
- * file descriptors beneath the lowest file descriptor.
765
- */
766
- rcu_read_lock ();
767
- if (max_fd >= last_fd (files_fdtable (cur_fds )))
768
- max_unshare_fds = fd ;
769
- rcu_read_unlock ();
770
- }
771
-
772
- ret = unshare_fd (CLONE_FILES , max_unshare_fds , & fds );
773
- if (ret )
774
- return ret ;
744
+ if (flags & CLOSE_RANGE_CLOEXEC )
745
+ punch_hole = NULL ;
775
746
747
+ fds = dup_fd (cur_fds , punch_hole );
748
+ if (IS_ERR (fds ))
749
+ return PTR_ERR (fds );
776
750
/*
777
751
* We used to share our file descriptor table, and have now
778
752
* created a private one, make sure we're using it below.
779
753
*/
780
- if (fds )
781
- swap (cur_fds , fds );
754
+ swap (cur_fds , fds );
782
755
}
783
756
784
757
if (flags & CLOSE_RANGE_CLOEXEC )
0 commit comments