Skip to content

Commit e386dfc

Browse files
committed
fget: clarify and improve __fget_files() implementation
Commit 054aa8d ("fget: check that the fd still exists after getting a ref to it") fixed a race with getting a reference to a file just as it was being closed. It was a fairly minimal patch, and I didn't think re-checking the file pointer lookup would be a measurable overhead, since it was all right there and cached. But I was wrong, as pointed out by the kernel test robot. The 'poll2' case of the will-it-scale.per_thread_ops benchmark regressed quite noticeably. Admittedly it seems to be a very artificial test: doing "poll()" system calls on regular files in a very tight loop in multiple threads. That means that basically all the time is spent just looking up file descriptors without ever doing anything useful with them (not that doing 'poll()' on a regular file is useful to begin with). And as a result it shows the extra "re-check fd" cost as a sore thumb. Happily, the regression is fixable by just writing the code to loook up the fd to be better and clearer. There's still a cost to verify the file pointer, but now it's basically in the noise even for that benchmark that does nothing else - and the code is more understandable and has better comments too. [ Side note: this patch is also a classic case of one that looks very messy with the default greedy Myers diff - it's much more legible with either the patience of histogram diff algorithm ] Link: https://lore.kernel.org/lkml/20211210053743.GA36420@xsang-OptiPlex-9020/ Link: https://lore.kernel.org/lkml/[email protected]/ Reported-by: kernel test robot <[email protected]> Tested-by: Carel Si <[email protected]> Cc: Jann Horn <[email protected]> Cc: Miklos Szeredi <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 2585cf9 commit e386dfc

File tree

1 file changed

+56
-16
lines changed

1 file changed

+56
-16
lines changed

fs/file.c

Lines changed: 56 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -841,28 +841,68 @@ void do_close_on_exec(struct files_struct *files)
841841
spin_unlock(&files->file_lock);
842842
}
843843

844-
static struct file *__fget_files(struct files_struct *files, unsigned int fd,
845-
fmode_t mask, unsigned int refs)
844+
static inline struct file *__fget_files_rcu(struct files_struct *files,
845+
unsigned int fd, fmode_t mask, unsigned int refs)
846846
{
847-
struct file *file;
847+
for (;;) {
848+
struct file *file;
849+
struct fdtable *fdt = rcu_dereference_raw(files->fdt);
850+
struct file __rcu **fdentry;
848851

849-
rcu_read_lock();
850-
loop:
851-
file = files_lookup_fd_rcu(files, fd);
852-
if (file) {
853-
/* File object ref couldn't be taken.
854-
* dup2() atomicity guarantee is the reason
855-
* we loop to catch the new file (or NULL pointer)
852+
if (unlikely(fd >= fdt->max_fds))
853+
return NULL;
854+
855+
fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds);
856+
file = rcu_dereference_raw(*fdentry);
857+
if (unlikely(!file))
858+
return NULL;
859+
860+
if (unlikely(file->f_mode & mask))
861+
return NULL;
862+
863+
/*
864+
* Ok, we have a file pointer. However, because we do
865+
* this all locklessly under RCU, we may be racing with
866+
* that file being closed.
867+
*
868+
* Such a race can take two forms:
869+
*
870+
* (a) the file ref already went down to zero,
871+
* and get_file_rcu_many() fails. Just try
872+
* again:
856873
*/
857-
if (file->f_mode & mask)
858-
file = NULL;
859-
else if (!get_file_rcu_many(file, refs))
860-
goto loop;
861-
else if (files_lookup_fd_raw(files, fd) != file) {
874+
if (unlikely(!get_file_rcu_many(file, refs)))
875+
continue;
876+
877+
/*
878+
* (b) the file table entry has changed under us.
879+
* Note that we don't need to re-check the 'fdt->fd'
880+
* pointer having changed, because it always goes
881+
* hand-in-hand with 'fdt'.
882+
*
883+
* If so, we need to put our refs and try again.
884+
*/
885+
if (unlikely(rcu_dereference_raw(files->fdt) != fdt) ||
886+
unlikely(rcu_dereference_raw(*fdentry) != file)) {
862887
fput_many(file, refs);
863-
goto loop;
888+
continue;
864889
}
890+
891+
/*
892+
* Ok, we have a ref to the file, and checked that it
893+
* still exists.
894+
*/
895+
return file;
865896
}
897+
}
898+
899+
static struct file *__fget_files(struct files_struct *files, unsigned int fd,
900+
fmode_t mask, unsigned int refs)
901+
{
902+
struct file *file;
903+
904+
rcu_read_lock();
905+
file = __fget_files_rcu(files, fd, mask, refs);
866906
rcu_read_unlock();
867907

868908
return file;

0 commit comments

Comments
 (0)