@@ -102,6 +102,35 @@ struct afs_lookup_cookie {
102102 struct afs_fid fids [50 ];
103103};
104104
105+ /*
106+ * Drop the refs that we're holding on the pages we were reading into. We've
107+ * got refs on the first nr_pages pages.
108+ */
109+ static void afs_dir_read_cleanup (struct afs_read * req )
110+ {
111+ struct address_space * mapping = req -> vnode -> vfs_inode .i_mapping ;
112+ struct page * page ;
113+ pgoff_t last = req -> nr_pages - 1 ;
114+
115+ XA_STATE (xas , & mapping -> i_pages , 0 );
116+
117+ if (unlikely (!req -> nr_pages ))
118+ return ;
119+
120+ rcu_read_lock ();
121+ xas_for_each (& xas , page , last ) {
122+ if (xas_retry (& xas , page ))
123+ continue ;
124+ BUG_ON (xa_is_value (page ));
125+ BUG_ON (PageCompound (page ));
126+ ASSERTCMP (page -> mapping , = = , mapping );
127+
128+ put_page (page );
129+ }
130+
131+ rcu_read_unlock ();
132+ }
133+
105134/*
106135 * check that a directory page is valid
107136 */
@@ -127,7 +156,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
127156 qty /= sizeof (union afs_xdr_dir_block );
128157
129158 /* check them */
130- dbuf = kmap (page );
159+ dbuf = kmap_atomic (page );
131160 for (tmp = 0 ; tmp < qty ; tmp ++ ) {
132161 if (dbuf -> blocks [tmp ].hdr .magic != AFS_DIR_MAGIC ) {
133162 printk ("kAFS: %s(%lx): bad magic %d/%d is %04hx\n" ,
@@ -146,7 +175,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
146175 ((u8 * )& dbuf -> blocks [tmp ])[AFS_DIR_BLOCK_SIZE - 1 ] = 0 ;
147176 }
148177
149- kunmap ( page );
178+ kunmap_atomic ( dbuf );
150179
151180checked :
152181 afs_stat_v (dvnode , n_read_dir );
@@ -157,35 +186,74 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
157186}
158187
159188/*
160- * Check the contents of a directory that we've just read .
189+ * Dump the contents of a directory.
161190 */
162- static bool afs_dir_check_pages (struct afs_vnode * dvnode , struct afs_read * req )
191+ static void afs_dir_dump (struct afs_vnode * dvnode , struct afs_read * req )
163192{
164193 struct afs_xdr_dir_page * dbuf ;
165- unsigned int i , j , qty = PAGE_SIZE / sizeof (union afs_xdr_dir_block );
194+ struct address_space * mapping = dvnode -> vfs_inode .i_mapping ;
195+ struct page * page ;
196+ unsigned int i , qty = PAGE_SIZE / sizeof (union afs_xdr_dir_block );
197+ pgoff_t last = req -> nr_pages - 1 ;
166198
167- for (i = 0 ; i < req -> nr_pages ; i ++ )
168- if (!afs_dir_check_page (dvnode , req -> pages [i ], req -> actual_len ))
169- goto bad ;
170- return true;
199+ XA_STATE (xas , & mapping -> i_pages , 0 );
171200
172- bad :
173- pr_warn ("DIR %llx:%llx f=%llx l=%llx al=%llx r=%llx\n" ,
201+ pr_warn ("DIR %llx:%llx f=%llx l=%llx al=%llx\n" ,
174202 dvnode -> fid .vid , dvnode -> fid .vnode ,
175- req -> file_size , req -> len , req -> actual_len , req -> remain );
176- pr_warn ("DIR %llx %x %x %x\n" ,
177- req -> pos , req -> index , req -> nr_pages , req -> offset );
203+ req -> file_size , req -> len , req -> actual_len );
204+ pr_warn ("DIR %llx %x %zx %zx\n" ,
205+ req -> pos , req -> nr_pages ,
206+ req -> iter -> iov_offset , iov_iter_count (req -> iter ));
178207
179- for (i = 0 ; i < req -> nr_pages ; i ++ ) {
180- dbuf = kmap (req -> pages [i ]);
181- for (j = 0 ; j < qty ; j ++ ) {
182- union afs_xdr_dir_block * block = & dbuf -> blocks [j ];
208+ xas_for_each (& xas , page , last ) {
209+ if (xas_retry (& xas , page ))
210+ continue ;
211+
212+ BUG_ON (PageCompound (page ));
213+ BUG_ON (page -> mapping != mapping );
214+
215+ dbuf = kmap_atomic (page );
216+ for (i = 0 ; i < qty ; i ++ ) {
217+ union afs_xdr_dir_block * block = & dbuf -> blocks [i ];
183218
184- pr_warn ("[%02x ] %32phN\n" , i * qty + j , block );
219+ pr_warn ("[%02lx ] %32phN\n" , page -> index * qty + i , block );
185220 }
186- kunmap ( req -> pages [ i ] );
221+ kunmap_atomic ( dbuf );
187222 }
188- return false;
223+ }
224+
225+ /*
226+ * Check all the pages in a directory. All the pages are held pinned.
227+ */
228+ static int afs_dir_check (struct afs_vnode * dvnode , struct afs_read * req )
229+ {
230+ struct address_space * mapping = dvnode -> vfs_inode .i_mapping ;
231+ struct page * page ;
232+ pgoff_t last = req -> nr_pages - 1 ;
233+ int ret = 0 ;
234+
235+ XA_STATE (xas , & mapping -> i_pages , 0 );
236+
237+ if (unlikely (!req -> nr_pages ))
238+ return 0 ;
239+
240+ rcu_read_lock ();
241+ xas_for_each (& xas , page , last ) {
242+ if (xas_retry (& xas , page ))
243+ continue ;
244+
245+ BUG_ON (PageCompound (page ));
246+ BUG_ON (page -> mapping != mapping );
247+
248+ if (!afs_dir_check_page (dvnode , page , req -> file_size )) {
249+ afs_dir_dump (dvnode , req );
250+ ret = - EIO ;
251+ break ;
252+ }
253+ }
254+
255+ rcu_read_unlock ();
256+ return ret ;
189257}
190258
191259/*
@@ -214,80 +282,82 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
214282{
215283 struct afs_read * req ;
216284 loff_t i_size ;
217- int nr_pages , nr_inline , i , n ;
218- int ret = - ENOMEM ;
285+ int nr_pages , i , n ;
286+ int ret ;
287+
288+ _enter ("" );
219289
220- retry :
290+ req = kzalloc (sizeof (* req ), GFP_KERNEL );
291+ if (!req )
292+ return ERR_PTR (- ENOMEM );
293+
294+ refcount_set (& req -> usage , 1 );
295+ req -> vnode = dvnode ;
296+ req -> key = key_get (key );
297+ req -> cleanup = afs_dir_read_cleanup ;
298+
299+ expand :
221300 i_size = i_size_read (& dvnode -> vfs_inode );
222- if (i_size < 2048 )
223- return ERR_PTR (afs_bad (dvnode , afs_file_error_dir_small ));
301+ if (i_size < 2048 ) {
302+ ret = afs_bad (dvnode , afs_file_error_dir_small );
303+ goto error ;
304+ }
224305 if (i_size > 2048 * 1024 ) {
225306 trace_afs_file_error (dvnode , - EFBIG , afs_file_error_dir_big );
226- return ERR_PTR (- EFBIG );
307+ ret = - EFBIG ;
308+ goto error ;
227309 }
228310
229311 _enter ("%llu" , i_size );
230312
231- /* Get a request record to hold the page list. We want to hold it
232- * inline if we can, but we don't want to make an order 1 allocation.
233- */
234313 nr_pages = (i_size + PAGE_SIZE - 1 ) / PAGE_SIZE ;
235- nr_inline = nr_pages ;
236- if (nr_inline > (PAGE_SIZE - sizeof (* req )) / sizeof (struct page * ))
237- nr_inline = 0 ;
238314
239- req = kzalloc (struct_size (req , array , nr_inline ), GFP_KERNEL );
240- if (!req )
241- return ERR_PTR (- ENOMEM );
242-
243- refcount_set (& req -> usage , 1 );
244- req -> nr_pages = nr_pages ;
245315 req -> actual_len = i_size ; /* May change */
246316 req -> len = nr_pages * PAGE_SIZE ; /* We can ask for more than there is */
247317 req -> data_version = dvnode -> status .data_version ; /* May change */
248- if (nr_inline > 0 ) {
249- req -> pages = req -> array ;
250- } else {
251- req -> pages = kcalloc (nr_pages , sizeof (struct page * ),
252- GFP_KERNEL );
253- if (!req -> pages )
254- goto error ;
255- }
318+ iov_iter_xarray (& req -> def_iter , READ , & dvnode -> vfs_inode .i_mapping -> i_pages ,
319+ 0 , i_size );
320+ req -> iter = & req -> def_iter ;
256321
257- /* Get a list of all the pages that hold or will hold the directory
258- * content. We need to fill in any gaps that we might find where the
259- * memory reclaimer has been at work. If there are any gaps, we will
322+ /* Fill in any gaps that we might find where the memory reclaimer has
323+ * been at work and pin all the pages. If there are any gaps, we will
260324 * need to reread the entire directory contents.
261325 */
262- i = 0 ;
263- do {
326+ i = req -> nr_pages ;
327+ while (i < nr_pages ) {
328+ struct page * pages [8 ], * page ;
329+
264330 n = find_get_pages_contig (dvnode -> vfs_inode .i_mapping , i ,
265- req -> nr_pages - i ,
266- req -> pages + i );
267- _debug ("find %u at %u/%u" , n , i , req -> nr_pages );
331+ min_t (unsigned int , nr_pages - i ,
332+ ARRAY_SIZE (pages )),
333+ pages );
334+ _debug ("find %u at %u/%u" , n , i , nr_pages );
335+
268336 if (n == 0 ) {
269337 gfp_t gfp = dvnode -> vfs_inode .i_mapping -> gfp_mask ;
270338
271339 if (test_and_clear_bit (AFS_VNODE_DIR_VALID , & dvnode -> flags ))
272340 afs_stat_v (dvnode , n_inval );
273341
274342 ret = - ENOMEM ;
275- req -> pages [ i ] = __page_cache_alloc (gfp );
276- if (!req -> pages [ i ] )
343+ page = __page_cache_alloc (gfp );
344+ if (!page )
277345 goto error ;
278- ret = add_to_page_cache_lru (req -> pages [ i ] ,
346+ ret = add_to_page_cache_lru (page ,
279347 dvnode -> vfs_inode .i_mapping ,
280348 i , gfp );
281349 if (ret < 0 )
282350 goto error ;
283351
284- attach_page_private (req -> pages [i ], (void * )1 );
285- unlock_page (req -> pages [i ]);
352+ attach_page_private (page , (void * )1 );
353+ unlock_page (page );
354+ req -> nr_pages ++ ;
286355 i ++ ;
287356 } else {
357+ req -> nr_pages += n ;
288358 i += n ;
289359 }
290- } while ( i < req -> nr_pages );
360+ }
291361
292362 /* If we're going to reload, we need to lock all the pages to prevent
293363 * races.
@@ -305,18 +375,23 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
305375
306376 if (!test_bit (AFS_VNODE_DIR_VALID , & dvnode -> flags )) {
307377 trace_afs_reload_dir (dvnode );
308- ret = afs_fetch_data (dvnode , key , req );
378+ ret = afs_fetch_data (dvnode , req );
309379 if (ret < 0 )
310380 goto error_unlock ;
311381
312382 task_io_account_read (PAGE_SIZE * req -> nr_pages );
313383
314- if (req -> len < req -> file_size )
315- goto content_has_grown ;
384+ if (req -> len < req -> file_size ) {
385+ /* The content has grown, so we need to expand the
386+ * buffer.
387+ */
388+ up_write (& dvnode -> validate_lock );
389+ goto expand ;
390+ }
316391
317392 /* Validate the data we just read. */
318- ret = - EIO ;
319- if (! afs_dir_check_pages ( dvnode , req ) )
393+ ret = afs_dir_check ( dvnode , req ) ;
394+ if (ret < 0 )
320395 goto error_unlock ;
321396
322397 // TODO: Trim excess pages
@@ -334,11 +409,6 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
334409 afs_put_read (req );
335410 _leave (" = %d" , ret );
336411 return ERR_PTR (ret );
337-
338- content_has_grown :
339- up_write (& dvnode -> validate_lock );
340- afs_put_read (req );
341- goto retry ;
342412}
343413
344414/*
@@ -448,6 +518,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
448518 struct afs_read * req ;
449519 struct page * page ;
450520 unsigned blkoff , limit ;
521+ void __rcu * * slot ;
451522 int ret ;
452523
453524 _enter ("{%lu},%u,," , dir -> i_ino , (unsigned )ctx -> pos );
@@ -472,9 +543,15 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
472543 blkoff = ctx -> pos & ~(sizeof (union afs_xdr_dir_block ) - 1 );
473544
474545 /* Fetch the appropriate page from the directory and re-add it
475- * to the LRU.
546+ * to the LRU. We have all the pages pinned with an extra ref.
476547 */
477- page = req -> pages [blkoff / PAGE_SIZE ];
548+ rcu_read_lock ();
549+ page = NULL ;
550+ slot = radix_tree_lookup_slot (& dvnode -> vfs_inode .i_mapping -> i_pages ,
551+ blkoff / PAGE_SIZE );
552+ if (slot )
553+ page = radix_tree_deref_slot (slot );
554+ rcu_read_unlock ();
478555 if (!page ) {
479556 ret = afs_bad (dvnode , afs_file_error_dir_missing_page );
480557 break ;
@@ -2006,6 +2083,6 @@ static void afs_dir_invalidatepage(struct page *page, unsigned int offset,
20062083 afs_stat_v (dvnode , n_inval );
20072084
20082085 /* we clean up only if the entire page is being invalidated */
2009- if (offset == 0 && length == PAGE_SIZE )
2086+ if (offset == 0 && length == thp_size ( page ) )
20102087 detach_page_private (page );
20112088}
0 commit comments