@@ -102,6 +102,35 @@ struct afs_lookup_cookie {
102
102
struct afs_fid fids [50 ];
103
103
};
104
104
105
+ /*
106
+ * Drop the refs that we're holding on the pages we were reading into. We've
107
+ * got refs on the first nr_pages pages.
108
+ */
109
+ static void afs_dir_read_cleanup (struct afs_read * req )
110
+ {
111
+ struct address_space * mapping = req -> vnode -> vfs_inode .i_mapping ;
112
+ struct page * page ;
113
+ pgoff_t last = req -> nr_pages - 1 ;
114
+
115
+ XA_STATE (xas , & mapping -> i_pages , 0 );
116
+
117
+ if (unlikely (!req -> nr_pages ))
118
+ return ;
119
+
120
+ rcu_read_lock ();
121
+ xas_for_each (& xas , page , last ) {
122
+ if (xas_retry (& xas , page ))
123
+ continue ;
124
+ BUG_ON (xa_is_value (page ));
125
+ BUG_ON (PageCompound (page ));
126
+ ASSERTCMP (page -> mapping , = = , mapping );
127
+
128
+ put_page (page );
129
+ }
130
+
131
+ rcu_read_unlock ();
132
+ }
133
+
105
134
/*
106
135
* check that a directory page is valid
107
136
*/
@@ -127,7 +156,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
127
156
qty /= sizeof (union afs_xdr_dir_block );
128
157
129
158
/* check them */
130
- dbuf = kmap (page );
159
+ dbuf = kmap_atomic (page );
131
160
for (tmp = 0 ; tmp < qty ; tmp ++ ) {
132
161
if (dbuf -> blocks [tmp ].hdr .magic != AFS_DIR_MAGIC ) {
133
162
printk ("kAFS: %s(%lx): bad magic %d/%d is %04hx\n" ,
@@ -146,7 +175,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
146
175
((u8 * )& dbuf -> blocks [tmp ])[AFS_DIR_BLOCK_SIZE - 1 ] = 0 ;
147
176
}
148
177
149
- kunmap ( page );
178
+ kunmap_atomic ( dbuf );
150
179
151
180
checked :
152
181
afs_stat_v (dvnode , n_read_dir );
@@ -157,35 +186,74 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
157
186
}
158
187
159
188
/*
160
- * Check the contents of a directory that we've just read .
189
+ * Dump the contents of a directory.
161
190
*/
162
- static bool afs_dir_check_pages (struct afs_vnode * dvnode , struct afs_read * req )
191
+ static void afs_dir_dump (struct afs_vnode * dvnode , struct afs_read * req )
163
192
{
164
193
struct afs_xdr_dir_page * dbuf ;
165
- unsigned int i , j , qty = PAGE_SIZE / sizeof (union afs_xdr_dir_block );
194
+ struct address_space * mapping = dvnode -> vfs_inode .i_mapping ;
195
+ struct page * page ;
196
+ unsigned int i , qty = PAGE_SIZE / sizeof (union afs_xdr_dir_block );
197
+ pgoff_t last = req -> nr_pages - 1 ;
166
198
167
- for (i = 0 ; i < req -> nr_pages ; i ++ )
168
- if (!afs_dir_check_page (dvnode , req -> pages [i ], req -> actual_len ))
169
- goto bad ;
170
- return true;
199
+ XA_STATE (xas , & mapping -> i_pages , 0 );
171
200
172
- bad :
173
- pr_warn ("DIR %llx:%llx f=%llx l=%llx al=%llx r=%llx\n" ,
201
+ pr_warn ("DIR %llx:%llx f=%llx l=%llx al=%llx\n" ,
174
202
dvnode -> fid .vid , dvnode -> fid .vnode ,
175
- req -> file_size , req -> len , req -> actual_len , req -> remain );
176
- pr_warn ("DIR %llx %x %x %x\n" ,
177
- req -> pos , req -> index , req -> nr_pages , req -> offset );
203
+ req -> file_size , req -> len , req -> actual_len );
204
+ pr_warn ("DIR %llx %x %zx %zx\n" ,
205
+ req -> pos , req -> nr_pages ,
206
+ req -> iter -> iov_offset , iov_iter_count (req -> iter ));
178
207
179
- for (i = 0 ; i < req -> nr_pages ; i ++ ) {
180
- dbuf = kmap (req -> pages [i ]);
181
- for (j = 0 ; j < qty ; j ++ ) {
182
- union afs_xdr_dir_block * block = & dbuf -> blocks [j ];
208
+ xas_for_each (& xas , page , last ) {
209
+ if (xas_retry (& xas , page ))
210
+ continue ;
211
+
212
+ BUG_ON (PageCompound (page ));
213
+ BUG_ON (page -> mapping != mapping );
214
+
215
+ dbuf = kmap_atomic (page );
216
+ for (i = 0 ; i < qty ; i ++ ) {
217
+ union afs_xdr_dir_block * block = & dbuf -> blocks [i ];
183
218
184
- pr_warn ("[%02x ] %32phN\n" , i * qty + j , block );
219
+ pr_warn ("[%02lx ] %32phN\n" , page -> index * qty + i , block );
185
220
}
186
- kunmap ( req -> pages [ i ] );
221
+ kunmap_atomic ( dbuf );
187
222
}
188
- return false;
223
+ }
224
+
225
+ /*
226
+ * Check all the pages in a directory. All the pages are held pinned.
227
+ */
228
+ static int afs_dir_check (struct afs_vnode * dvnode , struct afs_read * req )
229
+ {
230
+ struct address_space * mapping = dvnode -> vfs_inode .i_mapping ;
231
+ struct page * page ;
232
+ pgoff_t last = req -> nr_pages - 1 ;
233
+ int ret = 0 ;
234
+
235
+ XA_STATE (xas , & mapping -> i_pages , 0 );
236
+
237
+ if (unlikely (!req -> nr_pages ))
238
+ return 0 ;
239
+
240
+ rcu_read_lock ();
241
+ xas_for_each (& xas , page , last ) {
242
+ if (xas_retry (& xas , page ))
243
+ continue ;
244
+
245
+ BUG_ON (PageCompound (page ));
246
+ BUG_ON (page -> mapping != mapping );
247
+
248
+ if (!afs_dir_check_page (dvnode , page , req -> file_size )) {
249
+ afs_dir_dump (dvnode , req );
250
+ ret = - EIO ;
251
+ break ;
252
+ }
253
+ }
254
+
255
+ rcu_read_unlock ();
256
+ return ret ;
189
257
}
190
258
191
259
/*
@@ -214,80 +282,82 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
214
282
{
215
283
struct afs_read * req ;
216
284
loff_t i_size ;
217
- int nr_pages , nr_inline , i , n ;
218
- int ret = - ENOMEM ;
285
+ int nr_pages , i , n ;
286
+ int ret ;
287
+
288
+ _enter ("" );
219
289
220
- retry :
290
+ req = kzalloc (sizeof (* req ), GFP_KERNEL );
291
+ if (!req )
292
+ return ERR_PTR (- ENOMEM );
293
+
294
+ refcount_set (& req -> usage , 1 );
295
+ req -> vnode = dvnode ;
296
+ req -> key = key_get (key );
297
+ req -> cleanup = afs_dir_read_cleanup ;
298
+
299
+ expand :
221
300
i_size = i_size_read (& dvnode -> vfs_inode );
222
- if (i_size < 2048 )
223
- return ERR_PTR (afs_bad (dvnode , afs_file_error_dir_small ));
301
+ if (i_size < 2048 ) {
302
+ ret = afs_bad (dvnode , afs_file_error_dir_small );
303
+ goto error ;
304
+ }
224
305
if (i_size > 2048 * 1024 ) {
225
306
trace_afs_file_error (dvnode , - EFBIG , afs_file_error_dir_big );
226
- return ERR_PTR (- EFBIG );
307
+ ret = - EFBIG ;
308
+ goto error ;
227
309
}
228
310
229
311
_enter ("%llu" , i_size );
230
312
231
- /* Get a request record to hold the page list. We want to hold it
232
- * inline if we can, but we don't want to make an order 1 allocation.
233
- */
234
313
nr_pages = (i_size + PAGE_SIZE - 1 ) / PAGE_SIZE ;
235
- nr_inline = nr_pages ;
236
- if (nr_inline > (PAGE_SIZE - sizeof (* req )) / sizeof (struct page * ))
237
- nr_inline = 0 ;
238
314
239
- req = kzalloc (struct_size (req , array , nr_inline ), GFP_KERNEL );
240
- if (!req )
241
- return ERR_PTR (- ENOMEM );
242
-
243
- refcount_set (& req -> usage , 1 );
244
- req -> nr_pages = nr_pages ;
245
315
req -> actual_len = i_size ; /* May change */
246
316
req -> len = nr_pages * PAGE_SIZE ; /* We can ask for more than there is */
247
317
req -> data_version = dvnode -> status .data_version ; /* May change */
248
- if (nr_inline > 0 ) {
249
- req -> pages = req -> array ;
250
- } else {
251
- req -> pages = kcalloc (nr_pages , sizeof (struct page * ),
252
- GFP_KERNEL );
253
- if (!req -> pages )
254
- goto error ;
255
- }
318
+ iov_iter_xarray (& req -> def_iter , READ , & dvnode -> vfs_inode .i_mapping -> i_pages ,
319
+ 0 , i_size );
320
+ req -> iter = & req -> def_iter ;
256
321
257
- /* Get a list of all the pages that hold or will hold the directory
258
- * content. We need to fill in any gaps that we might find where the
259
- * memory reclaimer has been at work. If there are any gaps, we will
322
+ /* Fill in any gaps that we might find where the memory reclaimer has
323
+ * been at work and pin all the pages. If there are any gaps, we will
260
324
* need to reread the entire directory contents.
261
325
*/
262
- i = 0 ;
263
- do {
326
+ i = req -> nr_pages ;
327
+ while (i < nr_pages ) {
328
+ struct page * pages [8 ], * page ;
329
+
264
330
n = find_get_pages_contig (dvnode -> vfs_inode .i_mapping , i ,
265
- req -> nr_pages - i ,
266
- req -> pages + i );
267
- _debug ("find %u at %u/%u" , n , i , req -> nr_pages );
331
+ min_t (unsigned int , nr_pages - i ,
332
+ ARRAY_SIZE (pages )),
333
+ pages );
334
+ _debug ("find %u at %u/%u" , n , i , nr_pages );
335
+
268
336
if (n == 0 ) {
269
337
gfp_t gfp = dvnode -> vfs_inode .i_mapping -> gfp_mask ;
270
338
271
339
if (test_and_clear_bit (AFS_VNODE_DIR_VALID , & dvnode -> flags ))
272
340
afs_stat_v (dvnode , n_inval );
273
341
274
342
ret = - ENOMEM ;
275
- req -> pages [ i ] = __page_cache_alloc (gfp );
276
- if (!req -> pages [ i ] )
343
+ page = __page_cache_alloc (gfp );
344
+ if (!page )
277
345
goto error ;
278
- ret = add_to_page_cache_lru (req -> pages [ i ] ,
346
+ ret = add_to_page_cache_lru (page ,
279
347
dvnode -> vfs_inode .i_mapping ,
280
348
i , gfp );
281
349
if (ret < 0 )
282
350
goto error ;
283
351
284
- attach_page_private (req -> pages [i ], (void * )1 );
285
- unlock_page (req -> pages [i ]);
352
+ attach_page_private (page , (void * )1 );
353
+ unlock_page (page );
354
+ req -> nr_pages ++ ;
286
355
i ++ ;
287
356
} else {
357
+ req -> nr_pages += n ;
288
358
i += n ;
289
359
}
290
- } while ( i < req -> nr_pages );
360
+ }
291
361
292
362
/* If we're going to reload, we need to lock all the pages to prevent
293
363
* races.
@@ -305,18 +375,23 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
305
375
306
376
if (!test_bit (AFS_VNODE_DIR_VALID , & dvnode -> flags )) {
307
377
trace_afs_reload_dir (dvnode );
308
- ret = afs_fetch_data (dvnode , key , req );
378
+ ret = afs_fetch_data (dvnode , req );
309
379
if (ret < 0 )
310
380
goto error_unlock ;
311
381
312
382
task_io_account_read (PAGE_SIZE * req -> nr_pages );
313
383
314
- if (req -> len < req -> file_size )
315
- goto content_has_grown ;
384
+ if (req -> len < req -> file_size ) {
385
+ /* The content has grown, so we need to expand the
386
+ * buffer.
387
+ */
388
+ up_write (& dvnode -> validate_lock );
389
+ goto expand ;
390
+ }
316
391
317
392
/* Validate the data we just read. */
318
- ret = - EIO ;
319
- if (! afs_dir_check_pages ( dvnode , req ) )
393
+ ret = afs_dir_check ( dvnode , req ) ;
394
+ if (ret < 0 )
320
395
goto error_unlock ;
321
396
322
397
// TODO: Trim excess pages
@@ -334,11 +409,6 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
334
409
afs_put_read (req );
335
410
_leave (" = %d" , ret );
336
411
return ERR_PTR (ret );
337
-
338
- content_has_grown :
339
- up_write (& dvnode -> validate_lock );
340
- afs_put_read (req );
341
- goto retry ;
342
412
}
343
413
344
414
/*
@@ -448,6 +518,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
448
518
struct afs_read * req ;
449
519
struct page * page ;
450
520
unsigned blkoff , limit ;
521
+ void __rcu * * slot ;
451
522
int ret ;
452
523
453
524
_enter ("{%lu},%u,," , dir -> i_ino , (unsigned )ctx -> pos );
@@ -472,9 +543,15 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
472
543
blkoff = ctx -> pos & ~(sizeof (union afs_xdr_dir_block ) - 1 );
473
544
474
545
/* Fetch the appropriate page from the directory and re-add it
475
- * to the LRU.
546
+ * to the LRU. We have all the pages pinned with an extra ref.
476
547
*/
477
- page = req -> pages [blkoff / PAGE_SIZE ];
548
+ rcu_read_lock ();
549
+ page = NULL ;
550
+ slot = radix_tree_lookup_slot (& dvnode -> vfs_inode .i_mapping -> i_pages ,
551
+ blkoff / PAGE_SIZE );
552
+ if (slot )
553
+ page = radix_tree_deref_slot (slot );
554
+ rcu_read_unlock ();
478
555
if (!page ) {
479
556
ret = afs_bad (dvnode , afs_file_error_dir_missing_page );
480
557
break ;
@@ -2006,6 +2083,6 @@ static void afs_dir_invalidatepage(struct page *page, unsigned int offset,
2006
2083
afs_stat_v (dvnode , n_inval );
2007
2084
2008
2085
/* we clean up only if the entire page is being invalidated */
2009
- if (offset == 0 && length == PAGE_SIZE )
2086
+ if (offset == 0 && length == thp_size ( page ) )
2010
2087
detach_page_private (page );
2011
2088
}
0 commit comments