@@ -55,13 +55,6 @@ static inline bool xfs_buf_is_uncached(struct xfs_buf *bp)
55
55
return bp -> b_rhash_key == XFS_BUF_DADDR_NULL ;
56
56
}
57
57
58
- static inline int
59
- xfs_buf_vmap_len (
60
- struct xfs_buf * bp )
61
- {
62
- return (bp -> b_page_count * PAGE_SIZE );
63
- }
64
-
65
58
/*
66
59
* When we mark a buffer stale, we remove the buffer from the LRU and clear the
67
60
* b_lru_ref count so that the buffer is freed immediately when the buffer
@@ -190,29 +183,6 @@ _xfs_buf_alloc(
190
183
return 0 ;
191
184
}
192
185
193
- static void
194
- xfs_buf_free_pages (
195
- struct xfs_buf * bp )
196
- {
197
- uint i ;
198
-
199
- ASSERT (bp -> b_flags & _XBF_PAGES );
200
-
201
- if (is_vmalloc_addr (bp -> b_addr ))
202
- vm_unmap_ram (bp -> b_addr , bp -> b_page_count );
203
-
204
- for (i = 0 ; i < bp -> b_page_count ; i ++ ) {
205
- if (bp -> b_pages [i ])
206
- folio_put (page_folio (bp -> b_pages [i ]));
207
- }
208
- mm_account_reclaimed_pages (howmany (BBTOB (bp -> b_length ), PAGE_SIZE ));
209
-
210
- if (bp -> b_pages != bp -> b_page_array )
211
- kfree (bp -> b_pages );
212
- bp -> b_pages = NULL ;
213
- bp -> b_flags &= ~_XBF_PAGES ;
214
- }
215
-
216
186
static void
217
187
xfs_buf_free_callback (
218
188
struct callback_head * cb )
@@ -227,16 +197,23 @@ static void
227
197
xfs_buf_free (
228
198
struct xfs_buf * bp )
229
199
{
200
+ unsigned int size = BBTOB (bp -> b_length );
201
+
230
202
trace_xfs_buf_free (bp , _RET_IP_ );
231
203
232
204
ASSERT (list_empty (& bp -> b_lru ));
233
205
206
+ if (!xfs_buftarg_is_mem (bp -> b_target ) && size >= PAGE_SIZE )
207
+ mm_account_reclaimed_pages (howmany (size , PAGE_SHIFT ));
208
+
234
209
if (xfs_buftarg_is_mem (bp -> b_target ))
235
210
xmbuf_unmap_page (bp );
236
- else if (bp -> b_flags & _XBF_PAGES )
237
- xfs_buf_free_pages (bp );
211
+ else if (is_vmalloc_addr ( bp -> b_addr ) )
212
+ vfree (bp -> b_addr );
238
213
else if (bp -> b_flags & _XBF_KMEM )
239
214
kfree (bp -> b_addr );
215
+ else
216
+ folio_put (virt_to_folio (bp -> b_addr ));
240
217
241
218
call_rcu (& bp -> b_rcu , xfs_buf_free_callback );
242
219
}
@@ -264,9 +241,6 @@ xfs_buf_alloc_kmem(
264
241
bp -> b_addr = NULL ;
265
242
return - ENOMEM ;
266
243
}
267
- bp -> b_pages = bp -> b_page_array ;
268
- bp -> b_pages [0 ] = kmem_to_page (bp -> b_addr );
269
- bp -> b_page_count = 1 ;
270
244
bp -> b_flags |= _XBF_KMEM ;
271
245
return 0 ;
272
246
}
@@ -287,9 +261,9 @@ xfs_buf_alloc_kmem(
287
261
* by the rest of the code - the buffer memory spans a single contiguous memory
288
262
* region that we don't have to map and unmap to access the data directly.
289
263
*
290
- * The third type of buffer is the multi-page buffer. These are always made
291
- * up of single pages so that they can be fed to vmap_ram() to return a
292
- * contiguous memory region we can access the data through .
264
+ * The third type of buffer is the vmalloc()d buffer. This provides the buffer
265
+ * with the required contiguous memory region but backed by discontiguous
266
+ * physical pages .
293
267
*/
294
268
static int
295
269
xfs_buf_alloc_backing_mem (
@@ -299,7 +273,6 @@ xfs_buf_alloc_backing_mem(
299
273
size_t size = BBTOB (bp -> b_length );
300
274
gfp_t gfp_mask = GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOWARN ;
301
275
struct folio * folio ;
302
- long filled = 0 ;
303
276
304
277
if (xfs_buftarg_is_mem (bp -> b_target ))
305
278
return xmbuf_map_page (bp );
@@ -351,98 +324,18 @@ xfs_buf_alloc_backing_mem(
351
324
goto fallback ;
352
325
}
353
326
bp -> b_addr = folio_address (folio );
354
- bp -> b_page_array [0 ] = & folio -> page ;
355
- bp -> b_pages = bp -> b_page_array ;
356
- bp -> b_page_count = 1 ;
357
- bp -> b_flags |= _XBF_PAGES ;
358
327
return 0 ;
359
328
360
329
fallback :
361
- /* Fall back to allocating an array of single page folios. */
362
- bp -> b_page_count = DIV_ROUND_UP (size , PAGE_SIZE );
363
- if (bp -> b_page_count <= XB_PAGES ) {
364
- bp -> b_pages = bp -> b_page_array ;
365
- } else {
366
- bp -> b_pages = kzalloc (sizeof (struct page * ) * bp -> b_page_count ,
367
- gfp_mask );
368
- if (!bp -> b_pages )
369
- return - ENOMEM ;
370
- }
371
- bp -> b_flags |= _XBF_PAGES ;
372
-
373
- /*
374
- * Bulk filling of pages can take multiple calls. Not filling the entire
375
- * array is not an allocation failure, so don't back off if we get at
376
- * least one extra page.
377
- */
378
330
for (;;) {
379
- long last = filled ;
380
-
381
- filled = alloc_pages_bulk (gfp_mask , bp -> b_page_count ,
382
- bp -> b_pages );
383
- if (filled == bp -> b_page_count ) {
384
- XFS_STATS_INC (bp -> b_mount , xb_page_found );
331
+ bp -> b_addr = __vmalloc (size , gfp_mask );
332
+ if (bp -> b_addr )
385
333
break ;
386
- }
387
-
388
- if (filled != last )
389
- continue ;
390
-
391
- if (flags & XBF_READ_AHEAD ) {
392
- xfs_buf_free_pages (bp );
334
+ if (flags & XBF_READ_AHEAD )
393
335
return - ENOMEM ;
394
- }
395
-
396
336
XFS_STATS_INC (bp -> b_mount , xb_page_retries );
397
337
memalloc_retry_wait (gfp_mask );
398
338
}
399
- return 0 ;
400
- }
401
-
402
- /*
403
- * Map buffer into kernel address-space if necessary.
404
- */
405
- STATIC int
406
- _xfs_buf_map_pages (
407
- struct xfs_buf * bp ,
408
- xfs_buf_flags_t flags )
409
- {
410
- ASSERT (bp -> b_flags & _XBF_PAGES );
411
- if (bp -> b_page_count == 1 ) {
412
- /* A single page buffer is always mappable */
413
- bp -> b_addr = page_address (bp -> b_pages [0 ]);
414
- } else {
415
- int retried = 0 ;
416
- unsigned nofs_flag ;
417
-
418
- /*
419
- * vm_map_ram() will allocate auxiliary structures (e.g.
420
- * pagetables) with GFP_KERNEL, yet we often under a scoped nofs
421
- * context here. Mixing GFP_KERNEL with GFP_NOFS allocations
422
- * from the same call site that can be run from both above and
423
- * below memory reclaim causes lockdep false positives. Hence we
424
- * always need to force this allocation to nofs context because
425
- * we can't pass __GFP_NOLOCKDEP down to auxillary structures to
426
- * prevent false positive lockdep reports.
427
- *
428
- * XXX(dgc): I think dquot reclaim is the only place we can get
429
- * to this function from memory reclaim context now. If we fix
430
- * that like we've fixed inode reclaim to avoid writeback from
431
- * reclaim, this nofs wrapping can go away.
432
- */
433
- nofs_flag = memalloc_nofs_save ();
434
- do {
435
- bp -> b_addr = vm_map_ram (bp -> b_pages , bp -> b_page_count ,
436
- -1 );
437
- if (bp -> b_addr )
438
- break ;
439
- vm_unmap_aliases ();
440
- } while (retried ++ <= 1 );
441
- memalloc_nofs_restore (nofs_flag );
442
-
443
- if (!bp -> b_addr )
444
- return - ENOMEM ;
445
- }
446
339
447
340
return 0 ;
448
341
}
@@ -562,7 +455,7 @@ xfs_buf_find_lock(
562
455
return - ENOENT ;
563
456
}
564
457
ASSERT ((bp -> b_flags & _XBF_DELWRI_Q ) == 0 );
565
- bp -> b_flags &= _XBF_KMEM | _XBF_PAGES ;
458
+ bp -> b_flags &= _XBF_KMEM ;
566
459
bp -> b_ops = NULL ;
567
460
}
568
461
return 0 ;
@@ -748,18 +641,6 @@ xfs_buf_get_map(
748
641
xfs_perag_put (pag );
749
642
}
750
643
751
- /* We do not hold a perag reference anymore. */
752
- if (!bp -> b_addr ) {
753
- error = _xfs_buf_map_pages (bp , flags );
754
- if (unlikely (error )) {
755
- xfs_warn_ratelimited (btp -> bt_mount ,
756
- "%s: failed to map %u pages" , __func__ ,
757
- bp -> b_page_count );
758
- xfs_buf_relse (bp );
759
- return error ;
760
- }
761
- }
762
-
763
644
/*
764
645
* Clear b_error if this is a lookup from a caller that doesn't expect
765
646
* valid data to be found in the buffer.
@@ -1002,14 +883,6 @@ xfs_buf_get_uncached(
1002
883
if (error )
1003
884
goto fail_free_buf ;
1004
885
1005
- if (!bp -> b_addr )
1006
- error = _xfs_buf_map_pages (bp , 0 );
1007
- if (unlikely (error )) {
1008
- xfs_warn (target -> bt_mount ,
1009
- "%s: failed to map pages" , __func__ );
1010
- goto fail_free_buf ;
1011
- }
1012
-
1013
886
trace_xfs_buf_get_uncached (bp , _RET_IP_ );
1014
887
* bpp = bp ;
1015
888
return 0 ;
@@ -1343,7 +1216,7 @@ __xfs_buf_ioend(
1343
1216
if (bp -> b_flags & XBF_READ ) {
1344
1217
if (!bp -> b_error && is_vmalloc_addr (bp -> b_addr ))
1345
1218
invalidate_kernel_vmap_range (bp -> b_addr ,
1346
- xfs_buf_vmap_len ( bp ));
1219
+ roundup ( BBTOB ( bp -> b_length ), PAGE_SIZE ));
1347
1220
if (!bp -> b_error && bp -> b_ops )
1348
1221
bp -> b_ops -> verify_read (bp );
1349
1222
if (!bp -> b_error )
@@ -1504,29 +1377,48 @@ static void
1504
1377
xfs_buf_submit_bio (
1505
1378
struct xfs_buf * bp )
1506
1379
{
1507
- unsigned int size = BBTOB (bp -> b_length );
1508
- unsigned int map = 0 , p ;
1380
+ unsigned int map = 0 ;
1509
1381
struct blk_plug plug ;
1510
1382
struct bio * bio ;
1511
1383
1512
- bio = bio_alloc ( bp -> b_target -> bt_bdev , bp -> b_page_count ,
1513
- xfs_buf_bio_op (bp ), GFP_NOIO );
1514
- bio -> bi_private = bp ;
1515
- bio -> bi_end_io = xfs_buf_bio_end_io ;
1384
+ if ( is_vmalloc_addr ( bp -> b_addr )) {
1385
+ unsigned int size = BBTOB (bp -> b_length );
1386
+ unsigned int alloc_size = roundup ( size , PAGE_SIZE ) ;
1387
+ void * data = bp -> b_addr ;
1516
1388
1517
- if (bp -> b_page_count == 1 ) {
1518
- __bio_add_page (bio , virt_to_page (bp -> b_addr ), size ,
1519
- offset_in_page (bp -> b_addr ));
1520
- } else {
1521
- for (p = 0 ; p < bp -> b_page_count ; p ++ )
1522
- __bio_add_page (bio , bp -> b_pages [p ], PAGE_SIZE , 0 );
1523
- bio -> bi_iter .bi_size = size ; /* limit to the actual size used */
1389
+ bio = bio_alloc (bp -> b_target -> bt_bdev , alloc_size >> PAGE_SHIFT ,
1390
+ xfs_buf_bio_op (bp ), GFP_NOIO );
1391
+
1392
+ do {
1393
+ unsigned int len = min (size , PAGE_SIZE );
1524
1394
1525
- if (is_vmalloc_addr (bp -> b_addr ))
1526
- flush_kernel_vmap_range (bp -> b_addr ,
1527
- xfs_buf_vmap_len (bp ));
1395
+ ASSERT (offset_in_page (data ) == 0 );
1396
+ __bio_add_page (bio , vmalloc_to_page (data ), len , 0 );
1397
+ data += len ;
1398
+ size -= len ;
1399
+ } while (size );
1400
+
1401
+ flush_kernel_vmap_range (bp -> b_addr , alloc_size );
1402
+ } else {
1403
+ /*
1404
+ * Single folio or slab allocation. Must be contiguous and thus
1405
+ * only a single bvec is needed.
1406
+ *
1407
+ * This uses the page based bio add helper for now as that is
1408
+ * the lowest common denominator between folios and slab
1409
+ * allocations. To be replaced with a better block layer
1410
+ * helper soon (hopefully).
1411
+ */
1412
+ bio = bio_alloc (bp -> b_target -> bt_bdev , 1 , xfs_buf_bio_op (bp ),
1413
+ GFP_NOIO );
1414
+ __bio_add_page (bio , virt_to_page (bp -> b_addr ),
1415
+ BBTOB (bp -> b_length ),
1416
+ offset_in_page (bp -> b_addr ));
1528
1417
}
1529
1418
1419
+ bio -> bi_private = bp ;
1420
+ bio -> bi_end_io = xfs_buf_bio_end_io ;
1421
+
1530
1422
/*
1531
1423
* If there is more than one map segment, split out a new bio for each
1532
1424
* map except of the last one. The last map is handled by the
0 commit comments