Skip to content

Commit fafe1e3

Browse files
committed
Merge tag 'afs-netfs-lib-20210426' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs
Pull AFS updates from David Howells: "Use the new netfs lib. Begin the process of overhauling the use of the fscache API by AFS and the introduction of support for features such as Transparent Huge Pages (THPs). - Add some support for THPs, including using core VM helper functions to find details of pages. - Use the ITER_XARRAY I/O iterator to mediate access to the pagecache as this handles THPs and doesn't require allocation of large bvec arrays. - Delegate address_space read/pre-write I/O methods for AFS to the netfs helper library. A method is provided to the library that allows it to issue a read against the server. This includes a change in use for PG_fscache (it now indicates a DIO write in progress from the marked page), so a number of waits need to be deployed for it. - Split the core AFS writeback function to make it easier to modify in future patches to handle writing to the cache. [This might feasibly make more sense moved out into my fscache-iter branch]. I've tested these with "xfstests -g quick" against an AFS volume (xfstests needs patching to make it work). With this, AFS without a cache passes all expected xfstests; with a cache, there's an extra failure, but that's also there before these patches. Fixing that probably requires a greater overhaul (as can be found on my fscache-iter branch, but that's for a later time). Thanks should go to Marc Dionne and Jeff Altman of AuriStor for exercising the patches in their test farm also" Link: https://lore.kernel.org/lkml/[email protected]/ * tag 'afs-netfs-lib-20210426' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs: afs: Use the netfs_write_begin() helper afs: Use new netfs lib read helper API afs: Use the fs operation ops to handle FetchData completion afs: Prepare for use of THPs afs: Extract writeback extension into its own function afs: Wait on PG_fscache before modifying/releasing a page afs: Use ITER_XARRAY for writing afs: Set up the iov_iter before calling afs_extract_data() afs: Log remote unmarshalling errors afs: Don't truncate iter during data fetch afs: Move key to afs_read struct afs: Print the operation debug_id when logging an unexpected data version afs: Pass page into dirty region helpers to provide THP size afs: Disable use of the fscache I/O routines
2 parents 820c4ba + 3003bbd commit fafe1e3

File tree

13 files changed

+805
-1056
lines changed

13 files changed

+805
-1056
lines changed

fs/afs/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ config AFS_FS
44
depends on INET
55
select AF_RXRPC
66
select DNS_RESOLVER
7+
select NETFS_SUPPORT
78
help
89
If you say Y here, you will get an experimental Andrew File System
910
driver. It currently only supports unsecured read-only AFS access.

fs/afs/dir.c

Lines changed: 151 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,35 @@ struct afs_lookup_cookie {
102102
struct afs_fid fids[50];
103103
};
104104

105+
/*
106+
* Drop the refs that we're holding on the pages we were reading into. We've
107+
* got refs on the first nr_pages pages.
108+
*/
109+
static void afs_dir_read_cleanup(struct afs_read *req)
110+
{
111+
struct address_space *mapping = req->vnode->vfs_inode.i_mapping;
112+
struct page *page;
113+
pgoff_t last = req->nr_pages - 1;
114+
115+
XA_STATE(xas, &mapping->i_pages, 0);
116+
117+
if (unlikely(!req->nr_pages))
118+
return;
119+
120+
rcu_read_lock();
121+
xas_for_each(&xas, page, last) {
122+
if (xas_retry(&xas, page))
123+
continue;
124+
BUG_ON(xa_is_value(page));
125+
BUG_ON(PageCompound(page));
126+
ASSERTCMP(page->mapping, ==, mapping);
127+
128+
put_page(page);
129+
}
130+
131+
rcu_read_unlock();
132+
}
133+
105134
/*
106135
* check that a directory page is valid
107136
*/
@@ -127,7 +156,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
127156
qty /= sizeof(union afs_xdr_dir_block);
128157

129158
/* check them */
130-
dbuf = kmap(page);
159+
dbuf = kmap_atomic(page);
131160
for (tmp = 0; tmp < qty; tmp++) {
132161
if (dbuf->blocks[tmp].hdr.magic != AFS_DIR_MAGIC) {
133162
printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n",
@@ -146,7 +175,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
146175
((u8 *)&dbuf->blocks[tmp])[AFS_DIR_BLOCK_SIZE - 1] = 0;
147176
}
148177

149-
kunmap(page);
178+
kunmap_atomic(dbuf);
150179

151180
checked:
152181
afs_stat_v(dvnode, n_read_dir);
@@ -157,35 +186,74 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
157186
}
158187

159188
/*
160-
* Check the contents of a directory that we've just read.
189+
* Dump the contents of a directory.
161190
*/
162-
static bool afs_dir_check_pages(struct afs_vnode *dvnode, struct afs_read *req)
191+
static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req)
163192
{
164193
struct afs_xdr_dir_page *dbuf;
165-
unsigned int i, j, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block);
194+
struct address_space *mapping = dvnode->vfs_inode.i_mapping;
195+
struct page *page;
196+
unsigned int i, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block);
197+
pgoff_t last = req->nr_pages - 1;
166198

167-
for (i = 0; i < req->nr_pages; i++)
168-
if (!afs_dir_check_page(dvnode, req->pages[i], req->actual_len))
169-
goto bad;
170-
return true;
199+
XA_STATE(xas, &mapping->i_pages, 0);
171200

172-
bad:
173-
pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx r=%llx\n",
201+
pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx\n",
174202
dvnode->fid.vid, dvnode->fid.vnode,
175-
req->file_size, req->len, req->actual_len, req->remain);
176-
pr_warn("DIR %llx %x %x %x\n",
177-
req->pos, req->index, req->nr_pages, req->offset);
203+
req->file_size, req->len, req->actual_len);
204+
pr_warn("DIR %llx %x %zx %zx\n",
205+
req->pos, req->nr_pages,
206+
req->iter->iov_offset, iov_iter_count(req->iter));
178207

179-
for (i = 0; i < req->nr_pages; i++) {
180-
dbuf = kmap(req->pages[i]);
181-
for (j = 0; j < qty; j++) {
182-
union afs_xdr_dir_block *block = &dbuf->blocks[j];
208+
xas_for_each(&xas, page, last) {
209+
if (xas_retry(&xas, page))
210+
continue;
211+
212+
BUG_ON(PageCompound(page));
213+
BUG_ON(page->mapping != mapping);
214+
215+
dbuf = kmap_atomic(page);
216+
for (i = 0; i < qty; i++) {
217+
union afs_xdr_dir_block *block = &dbuf->blocks[i];
183218

184-
pr_warn("[%02x] %32phN\n", i * qty + j, block);
219+
pr_warn("[%02lx] %32phN\n", page->index * qty + i, block);
185220
}
186-
kunmap(req->pages[i]);
221+
kunmap_atomic(dbuf);
187222
}
188-
return false;
223+
}
224+
225+
/*
226+
* Check all the pages in a directory. All the pages are held pinned.
227+
*/
228+
static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req)
229+
{
230+
struct address_space *mapping = dvnode->vfs_inode.i_mapping;
231+
struct page *page;
232+
pgoff_t last = req->nr_pages - 1;
233+
int ret = 0;
234+
235+
XA_STATE(xas, &mapping->i_pages, 0);
236+
237+
if (unlikely(!req->nr_pages))
238+
return 0;
239+
240+
rcu_read_lock();
241+
xas_for_each(&xas, page, last) {
242+
if (xas_retry(&xas, page))
243+
continue;
244+
245+
BUG_ON(PageCompound(page));
246+
BUG_ON(page->mapping != mapping);
247+
248+
if (!afs_dir_check_page(dvnode, page, req->file_size)) {
249+
afs_dir_dump(dvnode, req);
250+
ret = -EIO;
251+
break;
252+
}
253+
}
254+
255+
rcu_read_unlock();
256+
return ret;
189257
}
190258

191259
/*
@@ -214,80 +282,82 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
214282
{
215283
struct afs_read *req;
216284
loff_t i_size;
217-
int nr_pages, nr_inline, i, n;
218-
int ret = -ENOMEM;
285+
int nr_pages, i, n;
286+
int ret;
287+
288+
_enter("");
219289

220-
retry:
290+
req = kzalloc(sizeof(*req), GFP_KERNEL);
291+
if (!req)
292+
return ERR_PTR(-ENOMEM);
293+
294+
refcount_set(&req->usage, 1);
295+
req->vnode = dvnode;
296+
req->key = key_get(key);
297+
req->cleanup = afs_dir_read_cleanup;
298+
299+
expand:
221300
i_size = i_size_read(&dvnode->vfs_inode);
222-
if (i_size < 2048)
223-
return ERR_PTR(afs_bad(dvnode, afs_file_error_dir_small));
301+
if (i_size < 2048) {
302+
ret = afs_bad(dvnode, afs_file_error_dir_small);
303+
goto error;
304+
}
224305
if (i_size > 2048 * 1024) {
225306
trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
226-
return ERR_PTR(-EFBIG);
307+
ret = -EFBIG;
308+
goto error;
227309
}
228310

229311
_enter("%llu", i_size);
230312

231-
/* Get a request record to hold the page list. We want to hold it
232-
* inline if we can, but we don't want to make an order 1 allocation.
233-
*/
234313
nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE;
235-
nr_inline = nr_pages;
236-
if (nr_inline > (PAGE_SIZE - sizeof(*req)) / sizeof(struct page *))
237-
nr_inline = 0;
238314

239-
req = kzalloc(struct_size(req, array, nr_inline), GFP_KERNEL);
240-
if (!req)
241-
return ERR_PTR(-ENOMEM);
242-
243-
refcount_set(&req->usage, 1);
244-
req->nr_pages = nr_pages;
245315
req->actual_len = i_size; /* May change */
246316
req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */
247317
req->data_version = dvnode->status.data_version; /* May change */
248-
if (nr_inline > 0) {
249-
req->pages = req->array;
250-
} else {
251-
req->pages = kcalloc(nr_pages, sizeof(struct page *),
252-
GFP_KERNEL);
253-
if (!req->pages)
254-
goto error;
255-
}
318+
iov_iter_xarray(&req->def_iter, READ, &dvnode->vfs_inode.i_mapping->i_pages,
319+
0, i_size);
320+
req->iter = &req->def_iter;
256321

257-
/* Get a list of all the pages that hold or will hold the directory
258-
* content. We need to fill in any gaps that we might find where the
259-
* memory reclaimer has been at work. If there are any gaps, we will
322+
/* Fill in any gaps that we might find where the memory reclaimer has
323+
* been at work and pin all the pages. If there are any gaps, we will
260324
* need to reread the entire directory contents.
261325
*/
262-
i = 0;
263-
do {
326+
i = req->nr_pages;
327+
while (i < nr_pages) {
328+
struct page *pages[8], *page;
329+
264330
n = find_get_pages_contig(dvnode->vfs_inode.i_mapping, i,
265-
req->nr_pages - i,
266-
req->pages + i);
267-
_debug("find %u at %u/%u", n, i, req->nr_pages);
331+
min_t(unsigned int, nr_pages - i,
332+
ARRAY_SIZE(pages)),
333+
pages);
334+
_debug("find %u at %u/%u", n, i, nr_pages);
335+
268336
if (n == 0) {
269337
gfp_t gfp = dvnode->vfs_inode.i_mapping->gfp_mask;
270338

271339
if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
272340
afs_stat_v(dvnode, n_inval);
273341

274342
ret = -ENOMEM;
275-
req->pages[i] = __page_cache_alloc(gfp);
276-
if (!req->pages[i])
343+
page = __page_cache_alloc(gfp);
344+
if (!page)
277345
goto error;
278-
ret = add_to_page_cache_lru(req->pages[i],
346+
ret = add_to_page_cache_lru(page,
279347
dvnode->vfs_inode.i_mapping,
280348
i, gfp);
281349
if (ret < 0)
282350
goto error;
283351

284-
attach_page_private(req->pages[i], (void *)1);
285-
unlock_page(req->pages[i]);
352+
attach_page_private(page, (void *)1);
353+
unlock_page(page);
354+
req->nr_pages++;
286355
i++;
287356
} else {
357+
req->nr_pages += n;
288358
i += n;
289359
}
290-
} while (i < req->nr_pages);
360+
}
291361

292362
/* If we're going to reload, we need to lock all the pages to prevent
293363
* races.
@@ -305,18 +375,23 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
305375

306376
if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
307377
trace_afs_reload_dir(dvnode);
308-
ret = afs_fetch_data(dvnode, key, req);
378+
ret = afs_fetch_data(dvnode, req);
309379
if (ret < 0)
310380
goto error_unlock;
311381

312382
task_io_account_read(PAGE_SIZE * req->nr_pages);
313383

314-
if (req->len < req->file_size)
315-
goto content_has_grown;
384+
if (req->len < req->file_size) {
385+
/* The content has grown, so we need to expand the
386+
* buffer.
387+
*/
388+
up_write(&dvnode->validate_lock);
389+
goto expand;
390+
}
316391

317392
/* Validate the data we just read. */
318-
ret = -EIO;
319-
if (!afs_dir_check_pages(dvnode, req))
393+
ret = afs_dir_check(dvnode, req);
394+
if (ret < 0)
320395
goto error_unlock;
321396

322397
// TODO: Trim excess pages
@@ -334,11 +409,6 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
334409
afs_put_read(req);
335410
_leave(" = %d", ret);
336411
return ERR_PTR(ret);
337-
338-
content_has_grown:
339-
up_write(&dvnode->validate_lock);
340-
afs_put_read(req);
341-
goto retry;
342412
}
343413

344414
/*
@@ -448,6 +518,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
448518
struct afs_read *req;
449519
struct page *page;
450520
unsigned blkoff, limit;
521+
void __rcu **slot;
451522
int ret;
452523

453524
_enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos);
@@ -472,9 +543,15 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
472543
blkoff = ctx->pos & ~(sizeof(union afs_xdr_dir_block) - 1);
473544

474545
/* Fetch the appropriate page from the directory and re-add it
475-
* to the LRU.
546+
* to the LRU. We have all the pages pinned with an extra ref.
476547
*/
477-
page = req->pages[blkoff / PAGE_SIZE];
548+
rcu_read_lock();
549+
page = NULL;
550+
slot = radix_tree_lookup_slot(&dvnode->vfs_inode.i_mapping->i_pages,
551+
blkoff / PAGE_SIZE);
552+
if (slot)
553+
page = radix_tree_deref_slot(slot);
554+
rcu_read_unlock();
478555
if (!page) {
479556
ret = afs_bad(dvnode, afs_file_error_dir_missing_page);
480557
break;
@@ -2006,6 +2083,6 @@ static void afs_dir_invalidatepage(struct page *page, unsigned int offset,
20062083
afs_stat_v(dvnode, n_inval);
20072084

20082085
/* we clean up only if the entire page is being invalidated */
2009-
if (offset == 0 && length == PAGE_SIZE)
2086+
if (offset == 0 && length == thp_size(page))
20102087
detach_page_private(page);
20112088
}

0 commit comments

Comments
 (0)