Skip to content

Commit 2aae1d6

Browse files
committed
Merge tag 'vfs-6.11.inode' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs inode / dentry updates from Christian Brauner: "This contains smaller performance improvements to inodes and dentries: inode: - Add rcu based inode lookup variants. They avoid one inode hash lock acquire in the common case thereby significantly reducing contention. We already support RCU-based operations but didn't take advantage of them during inode insertion. Callers of iget_locked() get the improvement without any code changes. Callers that need a custom callback can switch to iget5_locked_rcu() as e.g., did btrfs. With 20 threads each walking a dedicated 1000 dirs * 1000 files directory tree to stat(2) on a 32 core + 24GB ram vm: before: 3.54s user 892.30s system 1966% cpu 45.549 total after: 3.28s user 738.66s system 1955% cpu 37.932 total (-16.7%) Long-term we should pick up the effort to introduce more fine-grained locking and possibly improve on the currently used hash implementation. - Start zeroing i_state in inode_init_always() instead of doing it in individual filesystems. This allows us to remove an unneeded lock acquire in new_inode() and not burden individual filesystems with this. dcache: - Move d_lockref out of the area used by RCU lookup to avoid cacheline ping poing because the embedded name is sharing a cacheline with d_lockref. - Fix dentry size on 32bit with CONFIG_SMP=y so it does actually end up with 128 bytes in total" * tag 'vfs-6.11.inode' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: fs: fix dentry size vfs: move d_lockref out of the area used by RCU lookup bcachefs: remove now spurious i_state initialization xfs: remove now spurious i_state initialization in xfs_inode_alloc vfs: partially sanitize i_state zeroing on inode creation xfs: preserve i_state around inode_init_always in xfs_reinit_inode btrfs: use iget5_locked_rcu vfs: add rcu-based find_inode variants for iget ops
2 parents b8fc1bd + dc99c0f commit 2aae1d6

File tree

6 files changed

+99
-33
lines changed

6 files changed

+99
-33
lines changed

fs/bcachefs/fs.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,6 @@ static struct bch_inode_info *__bch2_new_inode(struct bch_fs *c)
244244
inode->ei_flags = 0;
245245
mutex_init(&inode->ei_quota_lock);
246246
memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush));
247-
inode->v.i_state = 0;
248247

249248
if (unlikely(inode_init_always(c->vfs_sb, &inode->v))) {
250249
kmem_cache_free(bch2_inode_cache, inode);

fs/btrfs/inode.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5587,7 +5587,7 @@ static struct inode *btrfs_iget_locked(struct super_block *s, u64 ino,
55875587
args.ino = ino;
55885588
args.root = root;
55895589

5590-
inode = iget5_locked(s, hashval, btrfs_find_actor,
5590+
inode = iget5_locked_rcu(s, hashval, btrfs_find_actor,
55915591
btrfs_init_locked_inode,
55925592
(void *)&args);
55935593
return inode;

fs/inode.c

Lines changed: 82 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
162162
inode->i_sb = sb;
163163
inode->i_blkbits = sb->s_blocksize_bits;
164164
inode->i_flags = 0;
165+
inode->i_state = 0;
165166
atomic64_set(&inode->i_sequence, 0);
166167
atomic_set(&inode->i_count, 1);
167168
inode->i_op = &empty_iops;
@@ -231,6 +232,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
231232

232233
if (unlikely(security_inode_alloc(inode)))
233234
return -ENOMEM;
235+
234236
this_cpu_inc(nr_inodes);
235237

236238
return 0;
@@ -886,36 +888,45 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
886888
return freed;
887889
}
888890

889-
static void __wait_on_freeing_inode(struct inode *inode);
891+
static void __wait_on_freeing_inode(struct inode *inode, bool locked);
890892
/*
891893
* Called with the inode lock held.
892894
*/
893895
static struct inode *find_inode(struct super_block *sb,
894896
struct hlist_head *head,
895897
int (*test)(struct inode *, void *),
896-
void *data)
898+
void *data, bool locked)
897899
{
898900
struct inode *inode = NULL;
899901

902+
if (locked)
903+
lockdep_assert_held(&inode_hash_lock);
904+
else
905+
lockdep_assert_not_held(&inode_hash_lock);
906+
907+
rcu_read_lock();
900908
repeat:
901-
hlist_for_each_entry(inode, head, i_hash) {
909+
hlist_for_each_entry_rcu(inode, head, i_hash) {
902910
if (inode->i_sb != sb)
903911
continue;
904912
if (!test(inode, data))
905913
continue;
906914
spin_lock(&inode->i_lock);
907915
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
908-
__wait_on_freeing_inode(inode);
916+
__wait_on_freeing_inode(inode, locked);
909917
goto repeat;
910918
}
911919
if (unlikely(inode->i_state & I_CREATING)) {
912920
spin_unlock(&inode->i_lock);
921+
rcu_read_unlock();
913922
return ERR_PTR(-ESTALE);
914923
}
915924
__iget(inode);
916925
spin_unlock(&inode->i_lock);
926+
rcu_read_unlock();
917927
return inode;
918928
}
929+
rcu_read_unlock();
919930
return NULL;
920931
}
921932

@@ -924,29 +935,39 @@ static struct inode *find_inode(struct super_block *sb,
924935
* iget_locked for details.
925936
*/
926937
static struct inode *find_inode_fast(struct super_block *sb,
927-
struct hlist_head *head, unsigned long ino)
938+
struct hlist_head *head, unsigned long ino,
939+
bool locked)
928940
{
929941
struct inode *inode = NULL;
930942

943+
if (locked)
944+
lockdep_assert_held(&inode_hash_lock);
945+
else
946+
lockdep_assert_not_held(&inode_hash_lock);
947+
948+
rcu_read_lock();
931949
repeat:
932-
hlist_for_each_entry(inode, head, i_hash) {
950+
hlist_for_each_entry_rcu(inode, head, i_hash) {
933951
if (inode->i_ino != ino)
934952
continue;
935953
if (inode->i_sb != sb)
936954
continue;
937955
spin_lock(&inode->i_lock);
938956
if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
939-
__wait_on_freeing_inode(inode);
957+
__wait_on_freeing_inode(inode, locked);
940958
goto repeat;
941959
}
942960
if (unlikely(inode->i_state & I_CREATING)) {
943961
spin_unlock(&inode->i_lock);
962+
rcu_read_unlock();
944963
return ERR_PTR(-ESTALE);
945964
}
946965
__iget(inode);
947966
spin_unlock(&inode->i_lock);
967+
rcu_read_unlock();
948968
return inode;
949969
}
970+
rcu_read_unlock();
950971
return NULL;
951972
}
952973

@@ -1004,14 +1025,7 @@ EXPORT_SYMBOL(get_next_ino);
10041025
*/
10051026
struct inode *new_inode_pseudo(struct super_block *sb)
10061027
{
1007-
struct inode *inode = alloc_inode(sb);
1008-
1009-
if (inode) {
1010-
spin_lock(&inode->i_lock);
1011-
inode->i_state = 0;
1012-
spin_unlock(&inode->i_lock);
1013-
}
1014-
return inode;
1028+
return alloc_inode(sb);
10151029
}
10161030

10171031
/**
@@ -1161,7 +1175,7 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
11611175

11621176
again:
11631177
spin_lock(&inode_hash_lock);
1164-
old = find_inode(inode->i_sb, head, test, data);
1178+
old = find_inode(inode->i_sb, head, test, data, true);
11651179
if (unlikely(old)) {
11661180
/*
11671181
* Uhhuh, somebody else created the same inode under us.
@@ -1235,7 +1249,6 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
12351249
struct inode *new = alloc_inode(sb);
12361250

12371251
if (new) {
1238-
new->i_state = 0;
12391252
inode = inode_insert5(new, hashval, test, set, data);
12401253
if (unlikely(inode != new))
12411254
destroy_inode(new);
@@ -1245,6 +1258,47 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
12451258
}
12461259
EXPORT_SYMBOL(iget5_locked);
12471260

1261+
/**
1262+
* iget5_locked_rcu - obtain an inode from a mounted file system
1263+
* @sb: super block of file system
1264+
* @hashval: hash value (usually inode number) to get
1265+
* @test: callback used for comparisons between inodes
1266+
* @set: callback used to initialize a new struct inode
1267+
* @data: opaque data pointer to pass to @test and @set
1268+
*
1269+
* This is equivalent to iget5_locked, except the @test callback must
1270+
* tolerate the inode not being stable, including being mid-teardown.
1271+
*/
1272+
struct inode *iget5_locked_rcu(struct super_block *sb, unsigned long hashval,
1273+
int (*test)(struct inode *, void *),
1274+
int (*set)(struct inode *, void *), void *data)
1275+
{
1276+
struct hlist_head *head = inode_hashtable + hash(sb, hashval);
1277+
struct inode *inode, *new;
1278+
1279+
again:
1280+
inode = find_inode(sb, head, test, data, false);
1281+
if (inode) {
1282+
if (IS_ERR(inode))
1283+
return NULL;
1284+
wait_on_inode(inode);
1285+
if (unlikely(inode_unhashed(inode))) {
1286+
iput(inode);
1287+
goto again;
1288+
}
1289+
return inode;
1290+
}
1291+
1292+
new = alloc_inode(sb);
1293+
if (new) {
1294+
inode = inode_insert5(new, hashval, test, set, data);
1295+
if (unlikely(inode != new))
1296+
destroy_inode(new);
1297+
}
1298+
return inode;
1299+
}
1300+
EXPORT_SYMBOL_GPL(iget5_locked_rcu);
1301+
12481302
/**
12491303
* iget_locked - obtain an inode from a mounted file system
12501304
* @sb: super block of file system
@@ -1263,9 +1317,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
12631317
struct hlist_head *head = inode_hashtable + hash(sb, ino);
12641318
struct inode *inode;
12651319
again:
1266-
spin_lock(&inode_hash_lock);
1267-
inode = find_inode_fast(sb, head, ino);
1268-
spin_unlock(&inode_hash_lock);
1320+
inode = find_inode_fast(sb, head, ino, false);
12691321
if (inode) {
12701322
if (IS_ERR(inode))
12711323
return NULL;
@@ -1283,7 +1335,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
12831335

12841336
spin_lock(&inode_hash_lock);
12851337
/* We released the lock, so.. */
1286-
old = find_inode_fast(sb, head, ino);
1338+
old = find_inode_fast(sb, head, ino, true);
12871339
if (!old) {
12881340
inode->i_ino = ino;
12891341
spin_lock(&inode->i_lock);
@@ -1419,7 +1471,7 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
14191471
struct inode *inode;
14201472

14211473
spin_lock(&inode_hash_lock);
1422-
inode = find_inode(sb, head, test, data);
1474+
inode = find_inode(sb, head, test, data, true);
14231475
spin_unlock(&inode_hash_lock);
14241476

14251477
return IS_ERR(inode) ? NULL : inode;
@@ -1474,7 +1526,7 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
14741526
struct inode *inode;
14751527
again:
14761528
spin_lock(&inode_hash_lock);
1477-
inode = find_inode_fast(sb, head, ino);
1529+
inode = find_inode_fast(sb, head, ino, true);
14781530
spin_unlock(&inode_hash_lock);
14791531

14801532
if (inode) {
@@ -2235,17 +2287,21 @@ EXPORT_SYMBOL(inode_needs_sync);
22352287
* wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
22362288
* will DTRT.
22372289
*/
2238-
static void __wait_on_freeing_inode(struct inode *inode)
2290+
static void __wait_on_freeing_inode(struct inode *inode, bool locked)
22392291
{
22402292
wait_queue_head_t *wq;
22412293
DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
22422294
wq = bit_waitqueue(&inode->i_state, __I_NEW);
22432295
prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
22442296
spin_unlock(&inode->i_lock);
2245-
spin_unlock(&inode_hash_lock);
2297+
rcu_read_unlock();
2298+
if (locked)
2299+
spin_unlock(&inode_hash_lock);
22462300
schedule();
22472301
finish_wait(wq, &wait.wq_entry);
2248-
spin_lock(&inode_hash_lock);
2302+
if (locked)
2303+
spin_lock(&inode_hash_lock);
2304+
rcu_read_lock();
22492305
}
22502306

22512307
static __initdata unsigned long ihash_entries;

fs/xfs/xfs_icache.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,8 @@ xfs_inode_alloc(
8686
return NULL;
8787
}
8888

89-
/* VFS doesn't initialise i_mode or i_state! */
89+
/* VFS doesn't initialise i_mode! */
9090
VFS_I(ip)->i_mode = 0;
91-
VFS_I(ip)->i_state = 0;
9291
mapping_set_large_folios(VFS_I(ip)->i_mapping);
9392

9493
XFS_STATS_INC(mp, vn_active);
@@ -314,6 +313,7 @@ xfs_reinit_inode(
314313
dev_t dev = inode->i_rdev;
315314
kuid_t uid = inode->i_uid;
316315
kgid_t gid = inode->i_gid;
316+
unsigned long state = inode->i_state;
317317

318318
error = inode_init_always(mp->m_super, inode);
319319

@@ -324,6 +324,7 @@ xfs_reinit_inode(
324324
inode->i_rdev = dev;
325325
inode->i_uid = uid;
326326
inode->i_gid = gid;
327+
inode->i_state = state;
327328
mapping_set_large_folios(inode->i_mapping);
328329
return error;
329330
}

include/linux/dcache.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ extern const struct qstr dotdot_name;
7171
# define DNAME_INLINE_LEN 40 /* 192 bytes */
7272
#else
7373
# ifdef CONFIG_SMP
74-
# define DNAME_INLINE_LEN 40 /* 128 bytes */
74+
# define DNAME_INLINE_LEN 36 /* 128 bytes */
7575
# else
7676
# define DNAME_INLINE_LEN 44 /* 128 bytes */
7777
# endif
@@ -89,13 +89,18 @@ struct dentry {
8989
struct inode *d_inode; /* Where the name belongs to - NULL is
9090
* negative */
9191
unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
92+
/* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */
9293

9394
/* Ref lookup also touches following */
94-
struct lockref d_lockref; /* per-dentry lock and refcount */
9595
const struct dentry_operations *d_op;
9696
struct super_block *d_sb; /* The root of the dentry tree */
9797
unsigned long d_time; /* used by d_revalidate */
9898
void *d_fsdata; /* fs-specific data */
99+
/* --- cacheline 2 boundary (128 bytes) --- */
100+
struct lockref d_lockref; /* per-dentry lock and refcount
101+
* keep separate from RCU lookup area if
102+
* possible!
103+
*/
99104

100105
union {
101106
struct list_head d_lru; /* LRU list */

include/linux/fs.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3047,7 +3047,12 @@ extern struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
30473047
int (*test)(struct inode *, void *),
30483048
int (*set)(struct inode *, void *),
30493049
void *data);
3050-
extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *);
3050+
struct inode *iget5_locked(struct super_block *, unsigned long,
3051+
int (*test)(struct inode *, void *),
3052+
int (*set)(struct inode *, void *), void *);
3053+
struct inode *iget5_locked_rcu(struct super_block *, unsigned long,
3054+
int (*test)(struct inode *, void *),
3055+
int (*set)(struct inode *, void *), void *);
30513056
extern struct inode * iget_locked(struct super_block *, unsigned long);
30523057
extern struct inode *find_inode_nowait(struct super_block *,
30533058
unsigned long,

0 commit comments

Comments
 (0)