@@ -102,6 +102,35 @@ struct afs_lookup_cookie {
102
102
struct afs_fid fids [50 ];
103
103
};
104
104
105
+ /*
106
+ * Drop the refs that we're holding on the pages we were reading into. We've
107
+ * got refs on the first nr_pages pages.
108
+ */
109
+ static void afs_dir_read_cleanup (struct afs_read * req )
110
+ {
111
+ struct address_space * mapping = req -> vnode -> vfs_inode .i_mapping ;
112
+ struct page * page ;
113
+ pgoff_t last = req -> nr_pages - 1 ;
114
+
115
+ XA_STATE (xas , & mapping -> i_pages , 0 );
116
+
117
+ if (unlikely (!req -> nr_pages ))
118
+ return ;
119
+
120
+ rcu_read_lock ();
121
+ xas_for_each (& xas , page , last ) {
122
+ if (xas_retry (& xas , page ))
123
+ continue ;
124
+ BUG_ON (xa_is_value (page ));
125
+ BUG_ON (PageCompound (page ));
126
+ ASSERTCMP (page -> mapping , = = , mapping );
127
+
128
+ put_page (page );
129
+ }
130
+
131
+ rcu_read_unlock ();
132
+ }
133
+
105
134
/*
106
135
* check that a directory page is valid
107
136
*/
@@ -127,7 +156,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
127
156
qty /= sizeof (union afs_xdr_dir_block );
128
157
129
158
/* check them */
130
- dbuf = kmap (page );
159
+ dbuf = kmap_atomic (page );
131
160
for (tmp = 0 ; tmp < qty ; tmp ++ ) {
132
161
if (dbuf -> blocks [tmp ].hdr .magic != AFS_DIR_MAGIC ) {
133
162
printk ("kAFS: %s(%lx): bad magic %d/%d is %04hx\n" ,
@@ -146,7 +175,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
146
175
((u8 * )& dbuf -> blocks [tmp ])[AFS_DIR_BLOCK_SIZE - 1 ] = 0 ;
147
176
}
148
177
149
- kunmap ( page );
178
+ kunmap_atomic ( dbuf );
150
179
151
180
checked :
152
181
afs_stat_v (dvnode , n_read_dir );
@@ -157,35 +186,74 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
157
186
}
158
187
159
188
/*
160
- * Check the contents of a directory that we've just read .
189
+ * Dump the contents of a directory.
161
190
*/
162
- static bool afs_dir_check_pages (struct afs_vnode * dvnode , struct afs_read * req )
191
+ static void afs_dir_dump (struct afs_vnode * dvnode , struct afs_read * req )
163
192
{
164
193
struct afs_xdr_dir_page * dbuf ;
165
- unsigned int i , j , qty = PAGE_SIZE / sizeof (union afs_xdr_dir_block );
194
+ struct address_space * mapping = dvnode -> vfs_inode .i_mapping ;
195
+ struct page * page ;
196
+ unsigned int i , qty = PAGE_SIZE / sizeof (union afs_xdr_dir_block );
197
+ pgoff_t last = req -> nr_pages - 1 ;
166
198
167
- for (i = 0 ; i < req -> nr_pages ; i ++ )
168
- if (!afs_dir_check_page (dvnode , req -> pages [i ], req -> actual_len ))
169
- goto bad ;
170
- return true;
199
+ XA_STATE (xas , & mapping -> i_pages , 0 );
171
200
172
- bad :
173
- pr_warn ("DIR %llx:%llx f=%llx l=%llx al=%llx r=%llx\n" ,
201
+ pr_warn ("DIR %llx:%llx f=%llx l=%llx al=%llx\n" ,
174
202
dvnode -> fid .vid , dvnode -> fid .vnode ,
175
- req -> file_size , req -> len , req -> actual_len , req -> remain );
176
- pr_warn ("DIR %llx %x %x %x\n" ,
177
- req -> pos , req -> index , req -> nr_pages , req -> offset );
203
+ req -> file_size , req -> len , req -> actual_len );
204
+ pr_warn ("DIR %llx %x %zx %zx\n" ,
205
+ req -> pos , req -> nr_pages ,
206
+ req -> iter -> iov_offset , iov_iter_count (req -> iter ));
178
207
179
- for (i = 0 ; i < req -> nr_pages ; i ++ ) {
180
- dbuf = kmap (req -> pages [i ]);
181
- for (j = 0 ; j < qty ; j ++ ) {
182
- union afs_xdr_dir_block * block = & dbuf -> blocks [j ];
208
+ xas_for_each (& xas , page , last ) {
209
+ if (xas_retry (& xas , page ))
210
+ continue ;
211
+
212
+ BUG_ON (PageCompound (page ));
213
+ BUG_ON (page -> mapping != mapping );
214
+
215
+ dbuf = kmap_atomic (page );
216
+ for (i = 0 ; i < qty ; i ++ ) {
217
+ union afs_xdr_dir_block * block = & dbuf -> blocks [i ];
183
218
184
- pr_warn ("[%02x ] %32phN\n" , i * qty + j , block );
219
+ pr_warn ("[%02lx ] %32phN\n" , page -> index * qty + i , block );
185
220
}
186
- kunmap ( req -> pages [ i ] );
221
+ kunmap_atomic ( dbuf );
187
222
}
188
- return false;
223
+ }
224
+
225
+ /*
226
+ * Check all the pages in a directory. All the pages are held pinned.
227
+ */
228
+ static int afs_dir_check (struct afs_vnode * dvnode , struct afs_read * req )
229
+ {
230
+ struct address_space * mapping = dvnode -> vfs_inode .i_mapping ;
231
+ struct page * page ;
232
+ pgoff_t last = req -> nr_pages - 1 ;
233
+ int ret = 0 ;
234
+
235
+ XA_STATE (xas , & mapping -> i_pages , 0 );
236
+
237
+ if (unlikely (!req -> nr_pages ))
238
+ return 0 ;
239
+
240
+ rcu_read_lock ();
241
+ xas_for_each (& xas , page , last ) {
242
+ if (xas_retry (& xas , page ))
243
+ continue ;
244
+
245
+ BUG_ON (PageCompound (page ));
246
+ BUG_ON (page -> mapping != mapping );
247
+
248
+ if (!afs_dir_check_page (dvnode , page , req -> file_size )) {
249
+ afs_dir_dump (dvnode , req );
250
+ ret = - EIO ;
251
+ break ;
252
+ }
253
+ }
254
+
255
+ rcu_read_unlock ();
256
+ return ret ;
189
257
}
190
258
191
259
/*
@@ -214,81 +282,82 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
214
282
{
215
283
struct afs_read * req ;
216
284
loff_t i_size ;
217
- int nr_pages , nr_inline , i , n ;
218
- int ret = - ENOMEM ;
285
+ int nr_pages , i , n ;
286
+ int ret ;
287
+
288
+ _enter ("" );
219
289
220
- retry :
290
+ req = kzalloc (sizeof (* req ), GFP_KERNEL );
291
+ if (!req )
292
+ return ERR_PTR (- ENOMEM );
293
+
294
+ refcount_set (& req -> usage , 1 );
295
+ req -> vnode = dvnode ;
296
+ req -> key = key_get (key );
297
+ req -> cleanup = afs_dir_read_cleanup ;
298
+
299
+ expand :
221
300
i_size = i_size_read (& dvnode -> vfs_inode );
222
- if (i_size < 2048 )
223
- return ERR_PTR (afs_bad (dvnode , afs_file_error_dir_small ));
301
+ if (i_size < 2048 ) {
302
+ ret = afs_bad (dvnode , afs_file_error_dir_small );
303
+ goto error ;
304
+ }
224
305
if (i_size > 2048 * 1024 ) {
225
306
trace_afs_file_error (dvnode , - EFBIG , afs_file_error_dir_big );
226
- return ERR_PTR (- EFBIG );
307
+ ret = - EFBIG ;
308
+ goto error ;
227
309
}
228
310
229
311
_enter ("%llu" , i_size );
230
312
231
- /* Get a request record to hold the page list. We want to hold it
232
- * inline if we can, but we don't want to make an order 1 allocation.
233
- */
234
313
nr_pages = (i_size + PAGE_SIZE - 1 ) / PAGE_SIZE ;
235
- nr_inline = nr_pages ;
236
- if (nr_inline > (PAGE_SIZE - sizeof (* req )) / sizeof (struct page * ))
237
- nr_inline = 0 ;
238
314
239
- req = kzalloc (struct_size (req , array , nr_inline ), GFP_KERNEL );
240
- if (!req )
241
- return ERR_PTR (- ENOMEM );
242
-
243
- refcount_set (& req -> usage , 1 );
244
- req -> key = key_get (key );
245
- req -> nr_pages = nr_pages ;
246
315
req -> actual_len = i_size ; /* May change */
247
316
req -> len = nr_pages * PAGE_SIZE ; /* We can ask for more than there is */
248
317
req -> data_version = dvnode -> status .data_version ; /* May change */
249
- if (nr_inline > 0 ) {
250
- req -> pages = req -> array ;
251
- } else {
252
- req -> pages = kcalloc (nr_pages , sizeof (struct page * ),
253
- GFP_KERNEL );
254
- if (!req -> pages )
255
- goto error ;
256
- }
318
+ iov_iter_xarray (& req -> def_iter , READ , & dvnode -> vfs_inode .i_mapping -> i_pages ,
319
+ 0 , i_size );
320
+ req -> iter = & req -> def_iter ;
257
321
258
- /* Get a list of all the pages that hold or will hold the directory
259
- * content. We need to fill in any gaps that we might find where the
260
- * memory reclaimer has been at work. If there are any gaps, we will
322
+ /* Fill in any gaps that we might find where the memory reclaimer has
323
+ * been at work and pin all the pages. If there are any gaps, we will
261
324
* need to reread the entire directory contents.
262
325
*/
263
- i = 0 ;
264
- do {
326
+ i = req -> nr_pages ;
327
+ while (i < nr_pages ) {
328
+ struct page * pages [8 ], * page ;
329
+
265
330
n = find_get_pages_contig (dvnode -> vfs_inode .i_mapping , i ,
266
- req -> nr_pages - i ,
267
- req -> pages + i );
268
- _debug ("find %u at %u/%u" , n , i , req -> nr_pages );
331
+ min_t (unsigned int , nr_pages - i ,
332
+ ARRAY_SIZE (pages )),
333
+ pages );
334
+ _debug ("find %u at %u/%u" , n , i , nr_pages );
335
+
269
336
if (n == 0 ) {
270
337
gfp_t gfp = dvnode -> vfs_inode .i_mapping -> gfp_mask ;
271
338
272
339
if (test_and_clear_bit (AFS_VNODE_DIR_VALID , & dvnode -> flags ))
273
340
afs_stat_v (dvnode , n_inval );
274
341
275
342
ret = - ENOMEM ;
276
- req -> pages [ i ] = __page_cache_alloc (gfp );
277
- if (!req -> pages [ i ] )
343
+ page = __page_cache_alloc (gfp );
344
+ if (!page )
278
345
goto error ;
279
- ret = add_to_page_cache_lru (req -> pages [ i ] ,
346
+ ret = add_to_page_cache_lru (page ,
280
347
dvnode -> vfs_inode .i_mapping ,
281
348
i , gfp );
282
349
if (ret < 0 )
283
350
goto error ;
284
351
285
- attach_page_private (req -> pages [i ], (void * )1 );
286
- unlock_page (req -> pages [i ]);
352
+ attach_page_private (page , (void * )1 );
353
+ unlock_page (page );
354
+ req -> nr_pages ++ ;
287
355
i ++ ;
288
356
} else {
357
+ req -> nr_pages += n ;
289
358
i += n ;
290
359
}
291
- } while ( i < req -> nr_pages );
360
+ }
292
361
293
362
/* If we're going to reload, we need to lock all the pages to prevent
294
363
* races.
@@ -312,12 +381,17 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
312
381
313
382
task_io_account_read (PAGE_SIZE * req -> nr_pages );
314
383
315
- if (req -> len < req -> file_size )
316
- goto content_has_grown ;
384
+ if (req -> len < req -> file_size ) {
385
+ /* The content has grown, so we need to expand the
386
+ * buffer.
387
+ */
388
+ up_write (& dvnode -> validate_lock );
389
+ goto expand ;
390
+ }
317
391
318
392
/* Validate the data we just read. */
319
- ret = - EIO ;
320
- if (! afs_dir_check_pages ( dvnode , req ) )
393
+ ret = afs_dir_check ( dvnode , req ) ;
394
+ if (ret < 0 )
321
395
goto error_unlock ;
322
396
323
397
// TODO: Trim excess pages
@@ -335,11 +409,6 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
335
409
afs_put_read (req );
336
410
_leave (" = %d" , ret );
337
411
return ERR_PTR (ret );
338
-
339
- content_has_grown :
340
- up_write (& dvnode -> validate_lock );
341
- afs_put_read (req );
342
- goto retry ;
343
412
}
344
413
345
414
/*
@@ -449,6 +518,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
449
518
struct afs_read * req ;
450
519
struct page * page ;
451
520
unsigned blkoff , limit ;
521
+ void __rcu * * slot ;
452
522
int ret ;
453
523
454
524
_enter ("{%lu},%u,," , dir -> i_ino , (unsigned )ctx -> pos );
@@ -473,9 +543,15 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
473
543
blkoff = ctx -> pos & ~(sizeof (union afs_xdr_dir_block ) - 1 );
474
544
475
545
/* Fetch the appropriate page from the directory and re-add it
476
- * to the LRU.
546
+ * to the LRU. We have all the pages pinned with an extra ref.
477
547
*/
478
- page = req -> pages [blkoff / PAGE_SIZE ];
548
+ rcu_read_lock ();
549
+ page = NULL ;
550
+ slot = radix_tree_lookup_slot (& dvnode -> vfs_inode .i_mapping -> i_pages ,
551
+ blkoff / PAGE_SIZE );
552
+ if (slot )
553
+ page = radix_tree_deref_slot (slot );
554
+ rcu_read_unlock ();
479
555
if (!page ) {
480
556
ret = afs_bad (dvnode , afs_file_error_dir_missing_page );
481
557
break ;
0 commit comments