Skip to content

Commit 9dc9bee

Browse files
bcodding-rhgregkh
authored andcommitted
NFS: switch back to to ->iterate()
[ Upstream commit b044f64 ] NFS has some optimizations for readdir to choose between using READDIR or READDIRPLUS based on workload, and which NFS operation to use is determined by subsequent interactions with lookup, d_revalidate, and getattr. Concurrent use of nfs_readdir() via ->iterate_shared() can cause those optimizations to repeatedly invalidate the pagecache used to store directory entries during readdir(), which causes some very bad performance for directories with many entries (more than about 10000). There's a couple ways to fix this in NFS, but no fix would be as simple as going back to ->iterate() to serialize nfs_readdir(), and neither fix I tested performed as well as going back to ->iterate(). The first required taking the directory's i_lock for each entry, with the result of terrible contention. The second way adds another flag to the nfs_inode, and so keeps the optimizations working for large directories. The difference from using ->iterate() here is that much more memory is consumed for a given workload without any performance gain. The workings of nfs_readdir() are such that concurrent users are serialized within read_cache_page() waiting to retrieve pages of entries from the server. By serializing this work in iterate_dir() instead, contention for cache pages is reduced. Waiting processes can have an uncontended pass at the entirety of the directory's pagecache once previous processes have completed filling it. v2 - Keep the bits needed for parallel lookup Signed-off-by: Benjamin Coddington <[email protected]> Signed-off-by: Trond Myklebust <[email protected]> Signed-off-by: Sasha Levin <[email protected]>
1 parent aeba8c4 commit 9dc9bee

File tree

1 file changed

+12
-25
lines changed

1 file changed

+12
-25
lines changed

fs/nfs/dir.c

Lines changed: 12 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ static void nfs_readdir_clear_array(struct page*);
5757
const struct file_operations nfs_dir_operations = {
5858
.llseek = nfs_llseek_dir,
5959
.read = generic_read_dir,
60-
.iterate_shared = nfs_readdir,
60+
.iterate = nfs_readdir,
6161
.open = nfs_opendir,
6262
.release = nfs_closedir,
6363
.fsync = nfs_fsync_dir,
@@ -145,7 +145,6 @@ struct nfs_cache_array_entry {
145145
};
146146

147147
struct nfs_cache_array {
148-
atomic_t refcount;
149148
int size;
150149
int eof_index;
151150
u64 last_cookie;
@@ -201,20 +200,11 @@ void nfs_readdir_clear_array(struct page *page)
201200
int i;
202201

203202
array = kmap_atomic(page);
204-
if (atomic_dec_and_test(&array->refcount))
205-
for (i = 0; i < array->size; i++)
206-
kfree(array->array[i].string.name);
203+
for (i = 0; i < array->size; i++)
204+
kfree(array->array[i].string.name);
207205
kunmap_atomic(array);
208206
}
209207

210-
static bool grab_page(struct page *page)
211-
{
212-
struct nfs_cache_array *array = kmap_atomic(page);
213-
bool res = atomic_inc_not_zero(&array->refcount);
214-
kunmap_atomic(array);
215-
return res;
216-
}
217-
218208
/*
219209
* the caller is responsible for freeing qstr.name
220210
* when called by nfs_readdir_add_to_array, the strings will be freed in
@@ -674,7 +664,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
674664
goto out_label_free;
675665
}
676666
memset(array, 0, sizeof(struct nfs_cache_array));
677-
atomic_set(&array->refcount, 1);
678667
array->eof_index = -1;
679668

680669
status = nfs_readdir_alloc_pages(pages, array_size);
@@ -737,24 +726,17 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
737726
static
738727
void cache_page_release(nfs_readdir_descriptor_t *desc)
739728
{
740-
nfs_readdir_clear_array(desc->page);
729+
if (!desc->page->mapping)
730+
nfs_readdir_clear_array(desc->page);
741731
put_page(desc->page);
742732
desc->page = NULL;
743733
}
744734

745735
static
746736
struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
747737
{
748-
struct page *page;
749-
750-
for (;;) {
751-
page = read_cache_page(desc->file->f_mapping,
738+
return read_cache_page(desc->file->f_mapping,
752739
desc->page_index, (filler_t *)nfs_readdir_filler, desc);
753-
if (IS_ERR(page) || grab_page(page))
754-
break;
755-
put_page(page);
756-
}
757-
return page;
758740
}
759741

760742
/*
@@ -960,25 +942,30 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
960942

961943
static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
962944
{
945+
struct inode *inode = file_inode(filp);
963946
struct nfs_open_dir_context *dir_ctx = filp->private_data;
964947

965948
dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
966949
filp, offset, whence);
967950

951+
inode_lock(inode);
968952
switch (whence) {
969953
case 1:
970954
offset += filp->f_pos;
971955
case 0:
972956
if (offset >= 0)
973957
break;
974958
default:
975-
return -EINVAL;
959+
offset = -EINVAL;
960+
goto out;
976961
}
977962
if (offset != filp->f_pos) {
978963
filp->f_pos = offset;
979964
dir_ctx->dir_cookie = 0;
980965
dir_ctx->duped = 0;
981966
}
967+
out:
968+
inode_unlock(inode);
982969
return offset;
983970
}
984971

0 commit comments

Comments
 (0)