Skip to content

Commit e287463

Browse files
Christoph Hellwigcmaiolino
authored andcommitted
xfs: use vmalloc instead of vm_map_area for buffer backing memory
The fallback buffer allocation path currently open codes a suboptimal version of vmalloc to allocate pages that are then mapped into vmalloc space. Switch to using vmalloc instead, which uses all the optimizations in the common vmalloc code, and removes the need to track the backing pages in the xfs_buf structure. Signed-off-by: Christoph Hellwig <[email protected]> Reviewed-by: Darrick J. Wong <[email protected]> Signed-off-by: Carlos Maiolino <[email protected]>
1 parent fd87851 commit e287463

File tree

3 files changed

+53
-177
lines changed

3 files changed

+53
-177
lines changed

fs/xfs/xfs_buf.c

Lines changed: 52 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,6 @@ static inline bool xfs_buf_is_uncached(struct xfs_buf *bp)
5555
return bp->b_rhash_key == XFS_BUF_DADDR_NULL;
5656
}
5757

58-
static inline int
59-
xfs_buf_vmap_len(
60-
struct xfs_buf *bp)
61-
{
62-
return (bp->b_page_count * PAGE_SIZE);
63-
}
64-
6558
/*
6659
* When we mark a buffer stale, we remove the buffer from the LRU and clear the
6760
* b_lru_ref count so that the buffer is freed immediately when the buffer
@@ -190,29 +183,6 @@ _xfs_buf_alloc(
190183
return 0;
191184
}
192185

193-
static void
194-
xfs_buf_free_pages(
195-
struct xfs_buf *bp)
196-
{
197-
uint i;
198-
199-
ASSERT(bp->b_flags & _XBF_PAGES);
200-
201-
if (is_vmalloc_addr(bp->b_addr))
202-
vm_unmap_ram(bp->b_addr, bp->b_page_count);
203-
204-
for (i = 0; i < bp->b_page_count; i++) {
205-
if (bp->b_pages[i])
206-
folio_put(page_folio(bp->b_pages[i]));
207-
}
208-
mm_account_reclaimed_pages(howmany(BBTOB(bp->b_length), PAGE_SIZE));
209-
210-
if (bp->b_pages != bp->b_page_array)
211-
kfree(bp->b_pages);
212-
bp->b_pages = NULL;
213-
bp->b_flags &= ~_XBF_PAGES;
214-
}
215-
216186
static void
217187
xfs_buf_free_callback(
218188
struct callback_head *cb)
@@ -227,16 +197,23 @@ static void
227197
xfs_buf_free(
228198
struct xfs_buf *bp)
229199
{
200+
unsigned int size = BBTOB(bp->b_length);
201+
230202
trace_xfs_buf_free(bp, _RET_IP_);
231203

232204
ASSERT(list_empty(&bp->b_lru));
233205

206+
if (!xfs_buftarg_is_mem(bp->b_target) && size >= PAGE_SIZE)
207+
mm_account_reclaimed_pages(howmany(size, PAGE_SHIFT));
208+
234209
if (xfs_buftarg_is_mem(bp->b_target))
235210
xmbuf_unmap_page(bp);
236-
else if (bp->b_flags & _XBF_PAGES)
237-
xfs_buf_free_pages(bp);
211+
else if (is_vmalloc_addr(bp->b_addr))
212+
vfree(bp->b_addr);
238213
else if (bp->b_flags & _XBF_KMEM)
239214
kfree(bp->b_addr);
215+
else
216+
folio_put(virt_to_folio(bp->b_addr));
240217

241218
call_rcu(&bp->b_rcu, xfs_buf_free_callback);
242219
}
@@ -264,9 +241,6 @@ xfs_buf_alloc_kmem(
264241
bp->b_addr = NULL;
265242
return -ENOMEM;
266243
}
267-
bp->b_pages = bp->b_page_array;
268-
bp->b_pages[0] = kmem_to_page(bp->b_addr);
269-
bp->b_page_count = 1;
270244
bp->b_flags |= _XBF_KMEM;
271245
return 0;
272246
}
@@ -287,9 +261,9 @@ xfs_buf_alloc_kmem(
287261
* by the rest of the code - the buffer memory spans a single contiguous memory
288262
* region that we don't have to map and unmap to access the data directly.
289263
*
290-
* The third type of buffer is the multi-page buffer. These are always made
291-
* up of single pages so that they can be fed to vmap_ram() to return a
292-
* contiguous memory region we can access the data through.
264+
* The third type of buffer is the vmalloc()d buffer. This provides the buffer
265+
* with the required contiguous memory region but backed by discontiguous
266+
* physical pages.
293267
*/
294268
static int
295269
xfs_buf_alloc_backing_mem(
@@ -299,7 +273,6 @@ xfs_buf_alloc_backing_mem(
299273
size_t size = BBTOB(bp->b_length);
300274
gfp_t gfp_mask = GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOWARN;
301275
struct folio *folio;
302-
long filled = 0;
303276

304277
if (xfs_buftarg_is_mem(bp->b_target))
305278
return xmbuf_map_page(bp);
@@ -351,98 +324,18 @@ xfs_buf_alloc_backing_mem(
351324
goto fallback;
352325
}
353326
bp->b_addr = folio_address(folio);
354-
bp->b_page_array[0] = &folio->page;
355-
bp->b_pages = bp->b_page_array;
356-
bp->b_page_count = 1;
357-
bp->b_flags |= _XBF_PAGES;
358327
return 0;
359328

360329
fallback:
361-
/* Fall back to allocating an array of single page folios. */
362-
bp->b_page_count = DIV_ROUND_UP(size, PAGE_SIZE);
363-
if (bp->b_page_count <= XB_PAGES) {
364-
bp->b_pages = bp->b_page_array;
365-
} else {
366-
bp->b_pages = kzalloc(sizeof(struct page *) * bp->b_page_count,
367-
gfp_mask);
368-
if (!bp->b_pages)
369-
return -ENOMEM;
370-
}
371-
bp->b_flags |= _XBF_PAGES;
372-
373-
/*
374-
* Bulk filling of pages can take multiple calls. Not filling the entire
375-
* array is not an allocation failure, so don't back off if we get at
376-
* least one extra page.
377-
*/
378330
for (;;) {
379-
long last = filled;
380-
381-
filled = alloc_pages_bulk(gfp_mask, bp->b_page_count,
382-
bp->b_pages);
383-
if (filled == bp->b_page_count) {
384-
XFS_STATS_INC(bp->b_mount, xb_page_found);
331+
bp->b_addr = __vmalloc(size, gfp_mask);
332+
if (bp->b_addr)
385333
break;
386-
}
387-
388-
if (filled != last)
389-
continue;
390-
391-
if (flags & XBF_READ_AHEAD) {
392-
xfs_buf_free_pages(bp);
334+
if (flags & XBF_READ_AHEAD)
393335
return -ENOMEM;
394-
}
395-
396336
XFS_STATS_INC(bp->b_mount, xb_page_retries);
397337
memalloc_retry_wait(gfp_mask);
398338
}
399-
return 0;
400-
}
401-
402-
/*
403-
* Map buffer into kernel address-space if necessary.
404-
*/
405-
STATIC int
406-
_xfs_buf_map_pages(
407-
struct xfs_buf *bp,
408-
xfs_buf_flags_t flags)
409-
{
410-
ASSERT(bp->b_flags & _XBF_PAGES);
411-
if (bp->b_page_count == 1) {
412-
/* A single page buffer is always mappable */
413-
bp->b_addr = page_address(bp->b_pages[0]);
414-
} else {
415-
int retried = 0;
416-
unsigned nofs_flag;
417-
418-
/*
419-
* vm_map_ram() will allocate auxiliary structures (e.g.
420-
* pagetables) with GFP_KERNEL, yet we often under a scoped nofs
421-
* context here. Mixing GFP_KERNEL with GFP_NOFS allocations
422-
* from the same call site that can be run from both above and
423-
* below memory reclaim causes lockdep false positives. Hence we
424-
* always need to force this allocation to nofs context because
425-
* we can't pass __GFP_NOLOCKDEP down to auxillary structures to
426-
* prevent false positive lockdep reports.
427-
*
428-
* XXX(dgc): I think dquot reclaim is the only place we can get
429-
* to this function from memory reclaim context now. If we fix
430-
* that like we've fixed inode reclaim to avoid writeback from
431-
* reclaim, this nofs wrapping can go away.
432-
*/
433-
nofs_flag = memalloc_nofs_save();
434-
do {
435-
bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
436-
-1);
437-
if (bp->b_addr)
438-
break;
439-
vm_unmap_aliases();
440-
} while (retried++ <= 1);
441-
memalloc_nofs_restore(nofs_flag);
442-
443-
if (!bp->b_addr)
444-
return -ENOMEM;
445-
}
446339

447340
return 0;
448341
}
@@ -562,7 +455,7 @@ xfs_buf_find_lock(
562455
return -ENOENT;
563456
}
564457
ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
565-
bp->b_flags &= _XBF_KMEM | _XBF_PAGES;
458+
bp->b_flags &= _XBF_KMEM;
566459
bp->b_ops = NULL;
567460
}
568461
return 0;
@@ -748,18 +641,6 @@ xfs_buf_get_map(
748641
xfs_perag_put(pag);
749642
}
750643

751-
/* We do not hold a perag reference anymore. */
752-
if (!bp->b_addr) {
753-
error = _xfs_buf_map_pages(bp, flags);
754-
if (unlikely(error)) {
755-
xfs_warn_ratelimited(btp->bt_mount,
756-
"%s: failed to map %u pages", __func__,
757-
bp->b_page_count);
758-
xfs_buf_relse(bp);
759-
return error;
760-
}
761-
}
762-
763644
/*
764645
* Clear b_error if this is a lookup from a caller that doesn't expect
765646
* valid data to be found in the buffer.
@@ -1002,14 +883,6 @@ xfs_buf_get_uncached(
1002883
if (error)
1003884
goto fail_free_buf;
1004885

1005-
if (!bp->b_addr)
1006-
error = _xfs_buf_map_pages(bp, 0);
1007-
if (unlikely(error)) {
1008-
xfs_warn(target->bt_mount,
1009-
"%s: failed to map pages", __func__);
1010-
goto fail_free_buf;
1011-
}
1012-
1013886
trace_xfs_buf_get_uncached(bp, _RET_IP_);
1014887
*bpp = bp;
1015888
return 0;
@@ -1343,7 +1216,7 @@ __xfs_buf_ioend(
13431216
if (bp->b_flags & XBF_READ) {
13441217
if (!bp->b_error && is_vmalloc_addr(bp->b_addr))
13451218
invalidate_kernel_vmap_range(bp->b_addr,
1346-
xfs_buf_vmap_len(bp));
1219+
roundup(BBTOB(bp->b_length), PAGE_SIZE));
13471220
if (!bp->b_error && bp->b_ops)
13481221
bp->b_ops->verify_read(bp);
13491222
if (!bp->b_error)
@@ -1504,29 +1377,48 @@ static void
15041377
xfs_buf_submit_bio(
15051378
struct xfs_buf *bp)
15061379
{
1507-
unsigned int size = BBTOB(bp->b_length);
1508-
unsigned int map = 0, p;
1380+
unsigned int map = 0;
15091381
struct blk_plug plug;
15101382
struct bio *bio;
15111383

1512-
bio = bio_alloc(bp->b_target->bt_bdev, bp->b_page_count,
1513-
xfs_buf_bio_op(bp), GFP_NOIO);
1514-
bio->bi_private = bp;
1515-
bio->bi_end_io = xfs_buf_bio_end_io;
1384+
if (is_vmalloc_addr(bp->b_addr)) {
1385+
unsigned int size = BBTOB(bp->b_length);
1386+
unsigned int alloc_size = roundup(size, PAGE_SIZE);
1387+
void *data = bp->b_addr;
15161388

1517-
if (bp->b_page_count == 1) {
1518-
__bio_add_page(bio, virt_to_page(bp->b_addr), size,
1519-
offset_in_page(bp->b_addr));
1520-
} else {
1521-
for (p = 0; p < bp->b_page_count; p++)
1522-
__bio_add_page(bio, bp->b_pages[p], PAGE_SIZE, 0);
1523-
bio->bi_iter.bi_size = size; /* limit to the actual size used */
1389+
bio = bio_alloc(bp->b_target->bt_bdev, alloc_size >> PAGE_SHIFT,
1390+
xfs_buf_bio_op(bp), GFP_NOIO);
1391+
1392+
do {
1393+
unsigned int len = min(size, PAGE_SIZE);
15241394

1525-
if (is_vmalloc_addr(bp->b_addr))
1526-
flush_kernel_vmap_range(bp->b_addr,
1527-
xfs_buf_vmap_len(bp));
1395+
ASSERT(offset_in_page(data) == 0);
1396+
__bio_add_page(bio, vmalloc_to_page(data), len, 0);
1397+
data += len;
1398+
size -= len;
1399+
} while (size);
1400+
1401+
flush_kernel_vmap_range(bp->b_addr, alloc_size);
1402+
} else {
1403+
/*
1404+
* Single folio or slab allocation. Must be contiguous and thus
1405+
* only a single bvec is needed.
1406+
*
1407+
* This uses the page based bio add helper for now as that is
1408+
* the lowest common denominator between folios and slab
1409+
* allocations. To be replaced with a better block layer
1410+
* helper soon (hopefully).
1411+
*/
1412+
bio = bio_alloc(bp->b_target->bt_bdev, 1, xfs_buf_bio_op(bp),
1413+
GFP_NOIO);
1414+
__bio_add_page(bio, virt_to_page(bp->b_addr),
1415+
BBTOB(bp->b_length),
1416+
offset_in_page(bp->b_addr));
15281417
}
15291418

1419+
bio->bi_private = bp;
1420+
bio->bi_end_io = xfs_buf_bio_end_io;
1421+
15301422
/*
15311423
* If there is more than one map segment, split out a new bio for each
15321424
* map except of the last one. The last map is handled by the

fs/xfs/xfs_buf.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ struct xfs_buf;
3636
#define _XBF_LOGRECOVERY (1u << 18)/* log recovery buffer */
3737

3838
/* flags used only internally */
39-
#define _XBF_PAGES (1u << 20)/* backed by refcounted pages */
4039
#define _XBF_KMEM (1u << 21)/* backed by heap memory */
4140
#define _XBF_DELWRI_Q (1u << 22)/* buffer on a delwri queue */
4241

@@ -61,7 +60,6 @@ typedef unsigned int xfs_buf_flags_t;
6160
{ XBF_STALE, "STALE" }, \
6261
{ XBF_WRITE_FAIL, "WRITE_FAIL" }, \
6362
{ _XBF_LOGRECOVERY, "LOG_RECOVERY" }, \
64-
{ _XBF_PAGES, "PAGES" }, \
6563
{ _XBF_KMEM, "KMEM" }, \
6664
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \
6765
/* The following interface flags should never be set */ \
@@ -122,8 +120,6 @@ struct xfs_buftarg {
122120
struct xfs_buf_cache bt_cache[];
123121
};
124122

125-
#define XB_PAGES 2
126-
127123
struct xfs_buf_map {
128124
xfs_daddr_t bm_bn; /* block number for I/O */
129125
int bm_len; /* size of I/O */
@@ -185,13 +181,10 @@ struct xfs_buf {
185181
struct xfs_buf_log_item *b_log_item;
186182
struct list_head b_li_list; /* Log items list head */
187183
struct xfs_trans *b_transp;
188-
struct page **b_pages; /* array of page pointers */
189-
struct page *b_page_array[XB_PAGES]; /* inline pages */
190184
struct xfs_buf_map *b_maps; /* compound buffer map */
191185
struct xfs_buf_map __b_map; /* inline compound buffer map */
192186
int b_map_count;
193187
atomic_t b_pin_count; /* pin count */
194-
unsigned int b_page_count; /* size of page array */
195188
int b_error; /* error code on I/O */
196189
void (*b_iodone)(struct xfs_buf *bp);
197190

fs/xfs/xfs_buf_mem.c

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -169,9 +169,6 @@ xmbuf_map_page(
169169
unlock_page(page);
170170

171171
bp->b_addr = page_address(page);
172-
bp->b_pages = bp->b_page_array;
173-
bp->b_pages[0] = page;
174-
bp->b_page_count = 1;
175172
return 0;
176173
}
177174

@@ -180,16 +177,10 @@ void
180177
xmbuf_unmap_page(
181178
struct xfs_buf *bp)
182179
{
183-
struct page *page = bp->b_pages[0];
184-
185180
ASSERT(xfs_buftarg_is_mem(bp->b_target));
186181

187-
put_page(page);
188-
182+
put_page(virt_to_page(bp->b_addr));
189183
bp->b_addr = NULL;
190-
bp->b_pages[0] = NULL;
191-
bp->b_pages = NULL;
192-
bp->b_page_count = 0;
193184
}
194185

195186
/* Is this a valid daddr within the buftarg? */

0 commit comments

Comments
 (0)