Skip to content

Commit 912b82d

Browse files
committed
Merge tag 'vfs-6.15-rc1.file' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs file handling updates from Christian Brauner: "This contains performance improvements for struct file's new refcount mechanism and various other performance work: - The stock kernel transitioning the file to no refs held penalizes the caller with an extra atomic to block any increments. For cases where the file is highly likely to be going away this is easily avoidable. Add file_ref_put_close() to better handle the common case where closing a file descriptor also operates on the last reference and build fput_close_sync() and fput_close() on top of it. This brings about 1% performance improvement by eliding one atomic in the common case. - Predict no error in close() since the vast majority of the time system call returns 0. - Reduce the work done in fdget_pos() by predicting that the file was found and by explicitly comparing the reference count to one and ignoring the dead zone" * tag 'vfs-6.15-rc1.file' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: fs: reduce work in fdget_pos() fs: use fput_close() in path_openat() fs: use fput_close() in filp_close() fs: use fput_close_sync() in close() file: add fput and file_ref_put routines optimized for use when closing a fd fs: predict no error in close()
2 parents d41066d + 5370b43 commit 912b82d

File tree

6 files changed

+141
-49
lines changed

6 files changed

+141
-49
lines changed

fs/file.c

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,28 @@
2626

2727
#include "internal.h"
2828

29+
bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt)
30+
{
31+
/*
32+
* If the reference count was already in the dead zone, then this
33+
* put() operation is imbalanced. Warn, put the reference count back to
34+
* DEAD and tell the caller to not deconstruct the object.
35+
*/
36+
if (WARN_ONCE(cnt >= FILE_REF_RELEASED, "imbalanced put on file reference count")) {
37+
atomic_long_set(&ref->refcnt, FILE_REF_DEAD);
38+
return false;
39+
}
40+
41+
/*
42+
* This is a put() operation on a saturated refcount. Restore the
43+
* mean saturation value and tell the caller to not deconstruct the
44+
* object.
45+
*/
46+
if (cnt > FILE_REF_MAXREF)
47+
atomic_long_set(&ref->refcnt, FILE_REF_SATURATED);
48+
return false;
49+
}
50+
2951
/**
3052
* __file_ref_put - Slowpath of file_ref_put()
3153
* @ref: Pointer to the reference count
@@ -67,24 +89,7 @@ bool __file_ref_put(file_ref_t *ref, unsigned long cnt)
6789
return true;
6890
}
6991

70-
/*
71-
* If the reference count was already in the dead zone, then this
72-
* put() operation is imbalanced. Warn, put the reference count back to
73-
* DEAD and tell the caller to not deconstruct the object.
74-
*/
75-
if (WARN_ONCE(cnt >= FILE_REF_RELEASED, "imbalanced put on file reference count")) {
76-
atomic_long_set(&ref->refcnt, FILE_REF_DEAD);
77-
return false;
78-
}
79-
80-
/*
81-
* This is a put() operation on a saturated refcount. Restore the
82-
* mean saturation value and tell the caller to not deconstruct the
83-
* object.
84-
*/
85-
if (cnt > FILE_REF_MAXREF)
86-
atomic_long_set(&ref->refcnt, FILE_REF_SATURATED);
87-
return false;
92+
return __file_ref_put_badval(ref, cnt);
8893
}
8994
EXPORT_SYMBOL_GPL(__file_ref_put);
9095

@@ -1179,8 +1184,13 @@ struct fd fdget_raw(unsigned int fd)
11791184
*/
11801185
static inline bool file_needs_f_pos_lock(struct file *file)
11811186
{
1182-
return (file->f_mode & FMODE_ATOMIC_POS) &&
1183-
(file_count(file) > 1 || file->f_op->iterate_shared);
1187+
if (!(file->f_mode & FMODE_ATOMIC_POS))
1188+
return false;
1189+
if (__file_ref_read_raw(&file->f_ref) != FILE_REF_ONEREF)
1190+
return true;
1191+
if (file->f_op->iterate_shared)
1192+
return true;
1193+
return false;
11841194
}
11851195

11861196
bool file_seek_cur_needs_f_lock(struct file *file)
@@ -1198,7 +1208,7 @@ struct fd fdget_pos(unsigned int fd)
11981208
struct fd f = fdget(fd);
11991209
struct file *file = fd_file(f);
12001210

1201-
if (file && file_needs_f_pos_lock(file)) {
1211+
if (likely(file) && file_needs_f_pos_lock(file)) {
12021212
f.word |= FDPUT_POS_UNLOCK;
12031213
mutex_lock(&file->f_pos_lock);
12041214
}

fs/file_table.c

Lines changed: 49 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -512,31 +512,37 @@ void flush_delayed_fput(void)
512512
}
513513
EXPORT_SYMBOL_GPL(flush_delayed_fput);
514514

515-
void fput(struct file *file)
515+
static void __fput_deferred(struct file *file)
516516
{
517-
if (file_ref_put(&file->f_ref)) {
518-
struct task_struct *task = current;
517+
struct task_struct *task = current;
518+
519+
if (unlikely(!(file->f_mode & (FMODE_BACKING | FMODE_OPENED)))) {
520+
file_free(file);
521+
return;
522+
}
519523

520-
if (unlikely(!(file->f_mode & (FMODE_BACKING | FMODE_OPENED)))) {
521-
file_free(file);
524+
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
525+
init_task_work(&file->f_task_work, ____fput);
526+
if (!task_work_add(task, &file->f_task_work, TWA_RESUME))
522527
return;
523-
}
524-
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
525-
init_task_work(&file->f_task_work, ____fput);
526-
if (!task_work_add(task, &file->f_task_work, TWA_RESUME))
527-
return;
528-
/*
529-
* After this task has run exit_task_work(),
530-
* task_work_add() will fail. Fall through to delayed
531-
* fput to avoid leaking *file.
532-
*/
533-
}
534-
535-
if (llist_add(&file->f_llist, &delayed_fput_list))
536-
schedule_delayed_work(&delayed_fput_work, 1);
528+
/*
529+
* After this task has run exit_task_work(),
530+
* task_work_add() will fail. Fall through to delayed
531+
* fput to avoid leaking *file.
532+
*/
537533
}
534+
535+
if (llist_add(&file->f_llist, &delayed_fput_list))
536+
schedule_delayed_work(&delayed_fput_work, 1);
538537
}
539538

539+
void fput(struct file *file)
540+
{
541+
if (unlikely(file_ref_put(&file->f_ref)))
542+
__fput_deferred(file);
543+
}
544+
EXPORT_SYMBOL(fput);
545+
540546
/*
541547
* synchronous analog of fput(); for kernel threads that might be needed
542548
* in some umount() (and thus can't use flush_delayed_fput() without
@@ -550,10 +556,32 @@ void __fput_sync(struct file *file)
550556
if (file_ref_put(&file->f_ref))
551557
__fput(file);
552558
}
553-
554-
EXPORT_SYMBOL(fput);
555559
EXPORT_SYMBOL(__fput_sync);
556560

561+
/*
562+
* Equivalent to __fput_sync(), but optimized for being called with the last
563+
* reference.
564+
*
565+
* See file_ref_put_close() for details.
566+
*/
567+
void fput_close_sync(struct file *file)
568+
{
569+
if (likely(file_ref_put_close(&file->f_ref)))
570+
__fput(file);
571+
}
572+
573+
/*
574+
* Equivalent to fput(), but optimized for being called with the last
575+
* reference.
576+
*
577+
* See file_ref_put_close() for details.
578+
*/
579+
void fput_close(struct file *file)
580+
{
581+
if (file_ref_put_close(&file->f_ref))
582+
__fput_deferred(file);
583+
}
584+
557585
void __init files_init(void)
558586
{
559587
struct kmem_cache_args args = {

fs/internal.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,9 @@ static inline void put_file_access(struct file *file)
118118
}
119119
}
120120

121+
void fput_close_sync(struct file *);
122+
void fput_close(struct file *);
123+
121124
/*
122125
* super.c
123126
*/

fs/namei.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4010,7 +4010,7 @@ static struct file *path_openat(struct nameidata *nd,
40104010
WARN_ON(1);
40114011
error = -EINVAL;
40124012
}
4013-
fput(file);
4013+
fput_close(file);
40144014
if (error == -EOPENSTALE) {
40154015
if (flags & LOOKUP_RCU)
40164016
error = -ECHILD;

fs/open.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1552,7 +1552,7 @@ int filp_close(struct file *filp, fl_owner_t id)
15521552
int retval;
15531553

15541554
retval = filp_flush(filp, id);
1555-
fput(filp);
1555+
fput_close(filp);
15561556

15571557
return retval;
15581558
}
@@ -1578,13 +1578,16 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
15781578
* We're returning to user space. Don't bother
15791579
* with any delayed fput() cases.
15801580
*/
1581-
__fput_sync(file);
1581+
fput_close_sync(file);
1582+
1583+
if (likely(retval == 0))
1584+
return 0;
15821585

15831586
/* can't restart close syscall because file table entry was cleared */
1584-
if (unlikely(retval == -ERESTARTSYS ||
1585-
retval == -ERESTARTNOINTR ||
1586-
retval == -ERESTARTNOHAND ||
1587-
retval == -ERESTART_RESTARTBLOCK))
1587+
if (retval == -ERESTARTSYS ||
1588+
retval == -ERESTARTNOINTR ||
1589+
retval == -ERESTARTNOHAND ||
1590+
retval == -ERESTART_RESTARTBLOCK)
15881591
retval = -EINTR;
15891592

15901593
return retval;

include/linux/file_ref.h

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ static inline void file_ref_init(file_ref_t *ref, unsigned long cnt)
6161
atomic_long_set(&ref->refcnt, cnt - 1);
6262
}
6363

64+
bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt);
6465
bool __file_ref_put(file_ref_t *ref, unsigned long cnt);
6566

6667
/**
@@ -160,6 +161,39 @@ static __always_inline __must_check bool file_ref_put(file_ref_t *ref)
160161
return __file_ref_put(ref, cnt);
161162
}
162163

164+
/**
165+
* file_ref_put_close - drop a reference expecting it would transition to FILE_REF_NOREF
166+
* @ref: Pointer to the reference count
167+
*
168+
* Semantically it is equivalent to calling file_ref_put(), but it trades lower
169+
* performance in face of other CPUs also modifying the refcount for higher
170+
* performance when this happens to be the last reference.
171+
*
172+
* For the last reference file_ref_put() issues 2 atomics. One to drop the
173+
* reference and another to transition it to FILE_REF_DEAD. This routine does
174+
* the work in one step, but in order to do it has to pre-read the variable which
175+
* decreases scalability.
176+
*
177+
* Use with close() et al, stick to file_ref_put() by default.
178+
*/
179+
static __always_inline __must_check bool file_ref_put_close(file_ref_t *ref)
180+
{
181+
long old, new;
182+
183+
old = atomic_long_read(&ref->refcnt);
184+
do {
185+
if (unlikely(old < 0))
186+
return __file_ref_put_badval(ref, old);
187+
188+
if (old == FILE_REF_ONEREF)
189+
new = FILE_REF_DEAD;
190+
else
191+
new = old - 1;
192+
} while (!atomic_long_try_cmpxchg(&ref->refcnt, &old, new));
193+
194+
return new == FILE_REF_DEAD;
195+
}
196+
163197
/**
164198
* file_ref_read - Read the number of file references
165199
* @ref: Pointer to the reference count
@@ -174,4 +208,18 @@ static inline unsigned long file_ref_read(file_ref_t *ref)
174208
return c >= FILE_REF_RELEASED ? 0 : c + 1;
175209
}
176210

211+
/*
212+
* __file_ref_read_raw - Return the value stored in ref->refcnt
213+
* @ref: Pointer to the reference count
214+
*
215+
* Return: The raw value found in the counter
216+
*
217+
* A hack for file_needs_f_pos_lock(), you probably want to use
218+
* file_ref_read() instead.
219+
*/
220+
static inline unsigned long __file_ref_read_raw(file_ref_t *ref)
221+
{
222+
return atomic_long_read(&ref->refcnt);
223+
}
224+
177225
#endif

0 commit comments

Comments
 (0)