Skip to content

Commit c450846

Browse files
committed
afs: Set up the iov_iter before calling afs_extract_data()
afs_extract_data() sets up a temporary iov_iter and passes it to AF_RXRPC each time it is called to describe the remaining buffer to be filled. Instead: (1) Put an iterator in the afs_call struct. (2) Set the iterator for each marshalling stage to load data into the appropriate places. A number of convenience functions are provided to this end (eg. afs_extract_to_buf()). This iterator is then passed to afs_extract_data(). (3) Use the new ITER_XARRAY iterator when reading data to load directly into the inode's pages without needing to create a list of them. This will allow O_DIRECT calls to be supported in future patches. Signed-off-by: David Howells <[email protected]> Tested-By: Marc Dionne <[email protected]> cc: [email protected] cc: [email protected] cc: [email protected] Link: https://lore.kernel.org/r/152898380012.11616.12094591785228251717.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/153685394431.14766.3178466345696987059.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/153999787395.866.11218209749223643998.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/154033911195.12041.3882700371848894587.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/158861250059.340223.1248231474865140653.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/159465827399.1377938.11181327349704960046.stgit@warthog.procyon.org.uk/ Link: https://lore.kernel.org/r/160588533776.3465195.3612752083351956948.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161118151238.1232039.17015723405750601161.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/161161047240.2537118.14721975104810564022.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/161340410333.1303470.16260122230371140878.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/161539554187.286939.15305559004905459852.stgit@warthog.procyon.org.uk/ # v4 Link: https://lore.kernel.org/r/161653810525.2770958.4630666029125411789.stgit@warthog.procyon.org.uk/ # v5 Link: https://lore.kernel.org/r/161789093719.6155.7877160739235087723.stgit@warthog.procyon.org.uk/ # v6
1 parent 0509275 commit c450846

File tree

6 files changed

+314
-249
lines changed

6 files changed

+314
-249
lines changed

fs/afs/dir.c

Lines changed: 149 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,35 @@ struct afs_lookup_cookie {
102102
struct afs_fid fids[50];
103103
};
104104

105+
/*
106+
* Drop the refs that we're holding on the pages we were reading into. We've
107+
* got refs on the first nr_pages pages.
108+
*/
109+
static void afs_dir_read_cleanup(struct afs_read *req)
110+
{
111+
struct address_space *mapping = req->vnode->vfs_inode.i_mapping;
112+
struct page *page;
113+
pgoff_t last = req->nr_pages - 1;
114+
115+
XA_STATE(xas, &mapping->i_pages, 0);
116+
117+
if (unlikely(!req->nr_pages))
118+
return;
119+
120+
rcu_read_lock();
121+
xas_for_each(&xas, page, last) {
122+
if (xas_retry(&xas, page))
123+
continue;
124+
BUG_ON(xa_is_value(page));
125+
BUG_ON(PageCompound(page));
126+
ASSERTCMP(page->mapping, ==, mapping);
127+
128+
put_page(page);
129+
}
130+
131+
rcu_read_unlock();
132+
}
133+
105134
/*
106135
* check that a directory page is valid
107136
*/
@@ -127,7 +156,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
127156
qty /= sizeof(union afs_xdr_dir_block);
128157

129158
/* check them */
130-
dbuf = kmap(page);
159+
dbuf = kmap_atomic(page);
131160
for (tmp = 0; tmp < qty; tmp++) {
132161
if (dbuf->blocks[tmp].hdr.magic != AFS_DIR_MAGIC) {
133162
printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n",
@@ -146,7 +175,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
146175
((u8 *)&dbuf->blocks[tmp])[AFS_DIR_BLOCK_SIZE - 1] = 0;
147176
}
148177

149-
kunmap(page);
178+
kunmap_atomic(dbuf);
150179

151180
checked:
152181
afs_stat_v(dvnode, n_read_dir);
@@ -157,35 +186,74 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
157186
}
158187

159188
/*
160-
* Check the contents of a directory that we've just read.
189+
* Dump the contents of a directory.
161190
*/
162-
static bool afs_dir_check_pages(struct afs_vnode *dvnode, struct afs_read *req)
191+
static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req)
163192
{
164193
struct afs_xdr_dir_page *dbuf;
165-
unsigned int i, j, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block);
194+
struct address_space *mapping = dvnode->vfs_inode.i_mapping;
195+
struct page *page;
196+
unsigned int i, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block);
197+
pgoff_t last = req->nr_pages - 1;
166198

167-
for (i = 0; i < req->nr_pages; i++)
168-
if (!afs_dir_check_page(dvnode, req->pages[i], req->actual_len))
169-
goto bad;
170-
return true;
199+
XA_STATE(xas, &mapping->i_pages, 0);
171200

172-
bad:
173-
pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx r=%llx\n",
201+
pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx\n",
174202
dvnode->fid.vid, dvnode->fid.vnode,
175-
req->file_size, req->len, req->actual_len, req->remain);
176-
pr_warn("DIR %llx %x %x %x\n",
177-
req->pos, req->index, req->nr_pages, req->offset);
203+
req->file_size, req->len, req->actual_len);
204+
pr_warn("DIR %llx %x %zx %zx\n",
205+
req->pos, req->nr_pages,
206+
req->iter->iov_offset, iov_iter_count(req->iter));
178207

179-
for (i = 0; i < req->nr_pages; i++) {
180-
dbuf = kmap(req->pages[i]);
181-
for (j = 0; j < qty; j++) {
182-
union afs_xdr_dir_block *block = &dbuf->blocks[j];
208+
xas_for_each(&xas, page, last) {
209+
if (xas_retry(&xas, page))
210+
continue;
211+
212+
BUG_ON(PageCompound(page));
213+
BUG_ON(page->mapping != mapping);
214+
215+
dbuf = kmap_atomic(page);
216+
for (i = 0; i < qty; i++) {
217+
union afs_xdr_dir_block *block = &dbuf->blocks[i];
183218

184-
pr_warn("[%02x] %32phN\n", i * qty + j, block);
219+
pr_warn("[%02lx] %32phN\n", page->index * qty + i, block);
185220
}
186-
kunmap(req->pages[i]);
221+
kunmap_atomic(dbuf);
187222
}
188-
return false;
223+
}
224+
225+
/*
226+
* Check all the pages in a directory. All the pages are held pinned.
227+
*/
228+
static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req)
229+
{
230+
struct address_space *mapping = dvnode->vfs_inode.i_mapping;
231+
struct page *page;
232+
pgoff_t last = req->nr_pages - 1;
233+
int ret = 0;
234+
235+
XA_STATE(xas, &mapping->i_pages, 0);
236+
237+
if (unlikely(!req->nr_pages))
238+
return 0;
239+
240+
rcu_read_lock();
241+
xas_for_each(&xas, page, last) {
242+
if (xas_retry(&xas, page))
243+
continue;
244+
245+
BUG_ON(PageCompound(page));
246+
BUG_ON(page->mapping != mapping);
247+
248+
if (!afs_dir_check_page(dvnode, page, req->file_size)) {
249+
afs_dir_dump(dvnode, req);
250+
ret = -EIO;
251+
break;
252+
}
253+
}
254+
255+
rcu_read_unlock();
256+
return ret;
189257
}
190258

191259
/*
@@ -214,81 +282,82 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
214282
{
215283
struct afs_read *req;
216284
loff_t i_size;
217-
int nr_pages, nr_inline, i, n;
218-
int ret = -ENOMEM;
285+
int nr_pages, i, n;
286+
int ret;
287+
288+
_enter("");
219289

220-
retry:
290+
req = kzalloc(sizeof(*req), GFP_KERNEL);
291+
if (!req)
292+
return ERR_PTR(-ENOMEM);
293+
294+
refcount_set(&req->usage, 1);
295+
req->vnode = dvnode;
296+
req->key = key_get(key);
297+
req->cleanup = afs_dir_read_cleanup;
298+
299+
expand:
221300
i_size = i_size_read(&dvnode->vfs_inode);
222-
if (i_size < 2048)
223-
return ERR_PTR(afs_bad(dvnode, afs_file_error_dir_small));
301+
if (i_size < 2048) {
302+
ret = afs_bad(dvnode, afs_file_error_dir_small);
303+
goto error;
304+
}
224305
if (i_size > 2048 * 1024) {
225306
trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
226-
return ERR_PTR(-EFBIG);
307+
ret = -EFBIG;
308+
goto error;
227309
}
228310

229311
_enter("%llu", i_size);
230312

231-
/* Get a request record to hold the page list. We want to hold it
232-
* inline if we can, but we don't want to make an order 1 allocation.
233-
*/
234313
nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE;
235-
nr_inline = nr_pages;
236-
if (nr_inline > (PAGE_SIZE - sizeof(*req)) / sizeof(struct page *))
237-
nr_inline = 0;
238314

239-
req = kzalloc(struct_size(req, array, nr_inline), GFP_KERNEL);
240-
if (!req)
241-
return ERR_PTR(-ENOMEM);
242-
243-
refcount_set(&req->usage, 1);
244-
req->key = key_get(key);
245-
req->nr_pages = nr_pages;
246315
req->actual_len = i_size; /* May change */
247316
req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */
248317
req->data_version = dvnode->status.data_version; /* May change */
249-
if (nr_inline > 0) {
250-
req->pages = req->array;
251-
} else {
252-
req->pages = kcalloc(nr_pages, sizeof(struct page *),
253-
GFP_KERNEL);
254-
if (!req->pages)
255-
goto error;
256-
}
318+
iov_iter_xarray(&req->def_iter, READ, &dvnode->vfs_inode.i_mapping->i_pages,
319+
0, i_size);
320+
req->iter = &req->def_iter;
257321

258-
/* Get a list of all the pages that hold or will hold the directory
259-
* content. We need to fill in any gaps that we might find where the
260-
* memory reclaimer has been at work. If there are any gaps, we will
322+
/* Fill in any gaps that we might find where the memory reclaimer has
323+
* been at work and pin all the pages. If there are any gaps, we will
261324
* need to reread the entire directory contents.
262325
*/
263-
i = 0;
264-
do {
326+
i = req->nr_pages;
327+
while (i < nr_pages) {
328+
struct page *pages[8], *page;
329+
265330
n = find_get_pages_contig(dvnode->vfs_inode.i_mapping, i,
266-
req->nr_pages - i,
267-
req->pages + i);
268-
_debug("find %u at %u/%u", n, i, req->nr_pages);
331+
min_t(unsigned int, nr_pages - i,
332+
ARRAY_SIZE(pages)),
333+
pages);
334+
_debug("find %u at %u/%u", n, i, nr_pages);
335+
269336
if (n == 0) {
270337
gfp_t gfp = dvnode->vfs_inode.i_mapping->gfp_mask;
271338

272339
if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
273340
afs_stat_v(dvnode, n_inval);
274341

275342
ret = -ENOMEM;
276-
req->pages[i] = __page_cache_alloc(gfp);
277-
if (!req->pages[i])
343+
page = __page_cache_alloc(gfp);
344+
if (!page)
278345
goto error;
279-
ret = add_to_page_cache_lru(req->pages[i],
346+
ret = add_to_page_cache_lru(page,
280347
dvnode->vfs_inode.i_mapping,
281348
i, gfp);
282349
if (ret < 0)
283350
goto error;
284351

285-
attach_page_private(req->pages[i], (void *)1);
286-
unlock_page(req->pages[i]);
352+
attach_page_private(page, (void *)1);
353+
unlock_page(page);
354+
req->nr_pages++;
287355
i++;
288356
} else {
357+
req->nr_pages += n;
289358
i += n;
290359
}
291-
} while (i < req->nr_pages);
360+
}
292361

293362
/* If we're going to reload, we need to lock all the pages to prevent
294363
* races.
@@ -312,12 +381,17 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
312381

313382
task_io_account_read(PAGE_SIZE * req->nr_pages);
314383

315-
if (req->len < req->file_size)
316-
goto content_has_grown;
384+
if (req->len < req->file_size) {
385+
/* The content has grown, so we need to expand the
386+
* buffer.
387+
*/
388+
up_write(&dvnode->validate_lock);
389+
goto expand;
390+
}
317391

318392
/* Validate the data we just read. */
319-
ret = -EIO;
320-
if (!afs_dir_check_pages(dvnode, req))
393+
ret = afs_dir_check(dvnode, req);
394+
if (ret < 0)
321395
goto error_unlock;
322396

323397
// TODO: Trim excess pages
@@ -335,11 +409,6 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
335409
afs_put_read(req);
336410
_leave(" = %d", ret);
337411
return ERR_PTR(ret);
338-
339-
content_has_grown:
340-
up_write(&dvnode->validate_lock);
341-
afs_put_read(req);
342-
goto retry;
343412
}
344413

345414
/*
@@ -449,6 +518,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
449518
struct afs_read *req;
450519
struct page *page;
451520
unsigned blkoff, limit;
521+
void __rcu **slot;
452522
int ret;
453523

454524
_enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos);
@@ -473,9 +543,15 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
473543
blkoff = ctx->pos & ~(sizeof(union afs_xdr_dir_block) - 1);
474544

475545
/* Fetch the appropriate page from the directory and re-add it
476-
* to the LRU.
546+
* to the LRU. We have all the pages pinned with an extra ref.
477547
*/
478-
page = req->pages[blkoff / PAGE_SIZE];
548+
rcu_read_lock();
549+
page = NULL;
550+
slot = radix_tree_lookup_slot(&dvnode->vfs_inode.i_mapping->i_pages,
551+
blkoff / PAGE_SIZE);
552+
if (slot)
553+
page = radix_tree_deref_slot(slot);
554+
rcu_read_unlock();
479555
if (!page) {
480556
ret = afs_bad(dvnode, afs_file_error_dir_missing_page);
481557
break;

0 commit comments

Comments
 (0)