Skip to content

Commit dba2e3b

Browse files
committed
Merge patch series "avoid the extra atomic on a ref when closing a fd"
Mateusz Guzik <[email protected]> says: The stock kernel transitioning the file to no refs held penalizes the caller with an extra atomic to block any increments. For cases where the file is highly likely to be going away this is easily avoidable. In the open+close case the win is very modest because of the following problems: - kmem and memcg having terrible performance - putname using an atomic (I have a wip to whack that) - open performing an extra ref/unref on the dentry (there are patches to do it, including by Al. I mailed about them in [1]) - creds using atomics (I have a wip to whack that) - apparmor using atomics (ditto, same mechanism) On top of that I have a WIP patch to dodge some of the work at lookup itself. All in all there is several % avoidably lost here. stats colected during a kernel build with: bpftrace -e 'kprobe:filp_close,kprobe:fput,kprobe:fput_close* { @[probe] = hist(((struct file *)arg0)->f_ref.refcnt.counter > 0); }' @[kprobe:filp_close]: [0] 32195 |@@@@@@@@@@ | [1] 164567 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| @[kprobe:fput]: [0] 339240 |@@@@@@ | [1] 2888064 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| @[kprobe:fput_close]: [0] 5116767 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [1] 164544 |@ | @[kprobe:fput_close_sync]: [0] 5340660 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| [1] 358943 |@@@ | 0 indicates the last reference, 1 that there is more. filp_close is largely skewed because of close_on_exec. vast majority of last fputs are from remove_vma. I think that code wants to be patched to batch them (as in something like fput_many should be added -- something for later). [1] https://lore.kernel.org/linux-fsdevel/[email protected]/T/#u * patches from https://lore.kernel.org/r/[email protected]: fs: use fput_close() in path_openat() fs: use fput_close() in filp_close() fs: use fput_close_sync() in close() file: add fput and file_ref_put routines optimized for use when closing a fd Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Christian Brauner <[email protected]>
2 parents 23e4903 + 606623d commit dba2e3b

File tree

6 files changed

+112
-42
lines changed

6 files changed

+112
-42
lines changed

fs/file.c

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,28 @@
2626

2727
#include "internal.h"
2828

29+
bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt)
30+
{
31+
/*
32+
* If the reference count was already in the dead zone, then this
33+
* put() operation is imbalanced. Warn, put the reference count back to
34+
* DEAD and tell the caller to not deconstruct the object.
35+
*/
36+
if (WARN_ONCE(cnt >= FILE_REF_RELEASED, "imbalanced put on file reference count")) {
37+
atomic_long_set(&ref->refcnt, FILE_REF_DEAD);
38+
return false;
39+
}
40+
41+
/*
42+
* This is a put() operation on a saturated refcount. Restore the
43+
* mean saturation value and tell the caller to not deconstruct the
44+
* object.
45+
*/
46+
if (cnt > FILE_REF_MAXREF)
47+
atomic_long_set(&ref->refcnt, FILE_REF_SATURATED);
48+
return false;
49+
}
50+
2951
/**
3052
* __file_ref_put - Slowpath of file_ref_put()
3153
* @ref: Pointer to the reference count
@@ -67,24 +89,7 @@ bool __file_ref_put(file_ref_t *ref, unsigned long cnt)
6789
return true;
6890
}
6991

70-
/*
71-
* If the reference count was already in the dead zone, then this
72-
* put() operation is imbalanced. Warn, put the reference count back to
73-
* DEAD and tell the caller to not deconstruct the object.
74-
*/
75-
if (WARN_ONCE(cnt >= FILE_REF_RELEASED, "imbalanced put on file reference count")) {
76-
atomic_long_set(&ref->refcnt, FILE_REF_DEAD);
77-
return false;
78-
}
79-
80-
/*
81-
* This is a put() operation on a saturated refcount. Restore the
82-
* mean saturation value and tell the caller to not deconstruct the
83-
* object.
84-
*/
85-
if (cnt > FILE_REF_MAXREF)
86-
atomic_long_set(&ref->refcnt, FILE_REF_SATURATED);
87-
return false;
92+
return __file_ref_put_badval(ref, cnt);
8893
}
8994
EXPORT_SYMBOL_GPL(__file_ref_put);
9095

fs/file_table.c

Lines changed: 49 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -495,31 +495,37 @@ void flush_delayed_fput(void)
495495
}
496496
EXPORT_SYMBOL_GPL(flush_delayed_fput);
497497

498-
void fput(struct file *file)
498+
static void __fput_deferred(struct file *file)
499499
{
500-
if (file_ref_put(&file->f_ref)) {
501-
struct task_struct *task = current;
500+
struct task_struct *task = current;
501+
502+
if (unlikely(!(file->f_mode & (FMODE_BACKING | FMODE_OPENED)))) {
503+
file_free(file);
504+
return;
505+
}
502506

503-
if (unlikely(!(file->f_mode & (FMODE_BACKING | FMODE_OPENED)))) {
504-
file_free(file);
507+
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
508+
init_task_work(&file->f_task_work, ____fput);
509+
if (!task_work_add(task, &file->f_task_work, TWA_RESUME))
505510
return;
506-
}
507-
if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
508-
init_task_work(&file->f_task_work, ____fput);
509-
if (!task_work_add(task, &file->f_task_work, TWA_RESUME))
510-
return;
511-
/*
512-
* After this task has run exit_task_work(),
513-
* task_work_add() will fail. Fall through to delayed
514-
* fput to avoid leaking *file.
515-
*/
516-
}
517-
518-
if (llist_add(&file->f_llist, &delayed_fput_list))
519-
schedule_delayed_work(&delayed_fput_work, 1);
511+
/*
512+
* After this task has run exit_task_work(),
513+
* task_work_add() will fail. Fall through to delayed
514+
* fput to avoid leaking *file.
515+
*/
520516
}
517+
518+
if (llist_add(&file->f_llist, &delayed_fput_list))
519+
schedule_delayed_work(&delayed_fput_work, 1);
521520
}
522521

522+
void fput(struct file *file)
523+
{
524+
if (unlikely(file_ref_put(&file->f_ref)))
525+
__fput_deferred(file);
526+
}
527+
EXPORT_SYMBOL(fput);
528+
523529
/*
524530
* synchronous analog of fput(); for kernel threads that might be needed
525531
* in some umount() (and thus can't use flush_delayed_fput() without
@@ -533,10 +539,32 @@ void __fput_sync(struct file *file)
533539
if (file_ref_put(&file->f_ref))
534540
__fput(file);
535541
}
536-
537-
EXPORT_SYMBOL(fput);
538542
EXPORT_SYMBOL(__fput_sync);
539543

544+
/*
545+
* Equivalent to __fput_sync(), but optimized for being called with the last
546+
* reference.
547+
*
548+
* See file_ref_put_close() for details.
549+
*/
550+
void fput_close_sync(struct file *file)
551+
{
552+
if (likely(file_ref_put_close(&file->f_ref)))
553+
__fput(file);
554+
}
555+
556+
/*
557+
* Equivalent to fput(), but optimized for being called with the last
558+
* reference.
559+
*
560+
* See file_ref_put_close() for details.
561+
*/
562+
void fput_close(struct file *file)
563+
{
564+
if (file_ref_put_close(&file->f_ref))
565+
__fput_deferred(file);
566+
}
567+
540568
void __init files_init(void)
541569
{
542570
struct kmem_cache_args args = {

fs/internal.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,9 @@ static inline void put_file_access(struct file *file)
118118
}
119119
}
120120

121+
void fput_close_sync(struct file *);
122+
void fput_close(struct file *);
123+
121124
/*
122125
* super.c
123126
*/

fs/namei.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3995,7 +3995,7 @@ static struct file *path_openat(struct nameidata *nd,
39953995
WARN_ON(1);
39963996
error = -EINVAL;
39973997
}
3998-
fput(file);
3998+
fput_close(file);
39993999
if (error == -EOPENSTALE) {
40004000
if (flags & LOOKUP_RCU)
40014001
error = -ECHILD;

fs/open.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1550,7 +1550,7 @@ int filp_close(struct file *filp, fl_owner_t id)
15501550
int retval;
15511551

15521552
retval = filp_flush(filp, id);
1553-
fput(filp);
1553+
fput_close(filp);
15541554

15551555
return retval;
15561556
}
@@ -1576,7 +1576,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
15761576
* We're returning to user space. Don't bother
15771577
* with any delayed fput() cases.
15781578
*/
1579-
__fput_sync(file);
1579+
fput_close_sync(file);
15801580

15811581
if (likely(retval == 0))
15821582
return 0;

include/linux/file_ref.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ static inline void file_ref_init(file_ref_t *ref, unsigned long cnt)
6161
atomic_long_set(&ref->refcnt, cnt - 1);
6262
}
6363

64+
bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt);
6465
bool __file_ref_put(file_ref_t *ref, unsigned long cnt);
6566

6667
/**
@@ -160,6 +161,39 @@ static __always_inline __must_check bool file_ref_put(file_ref_t *ref)
160161
return __file_ref_put(ref, cnt);
161162
}
162163

164+
/**
165+
* file_ref_put_close - drop a reference expecting it would transition to FILE_REF_NOREF
166+
* @ref: Pointer to the reference count
167+
*
168+
* Semantically it is equivalent to calling file_ref_put(), but it trades lower
169+
* performance in face of other CPUs also modifying the refcount for higher
170+
* performance when this happens to be the last reference.
171+
*
172+
* For the last reference file_ref_put() issues 2 atomics. One to drop the
173+
* reference and another to transition it to FILE_REF_DEAD. This routine does
174+
* the work in one step, but in order to do it has to pre-read the variable which
175+
* decreases scalability.
176+
*
177+
* Use with close() et al, stick to file_ref_put() by default.
178+
*/
179+
static __always_inline __must_check bool file_ref_put_close(file_ref_t *ref)
180+
{
181+
long old, new;
182+
183+
old = atomic_long_read(&ref->refcnt);
184+
do {
185+
if (unlikely(old < 0))
186+
return __file_ref_put_badval(ref, old);
187+
188+
if (old == FILE_REF_ONEREF)
189+
new = FILE_REF_DEAD;
190+
else
191+
new = old - 1;
192+
} while (!atomic_long_try_cmpxchg(&ref->refcnt, &old, new));
193+
194+
return new == FILE_REF_DEAD;
195+
}
196+
163197
/**
164198
* file_ref_read - Read the number of file references
165199
* @ref: Pointer to the reference count

0 commit comments

Comments
 (0)