Skip to content

Commit bba7d68

Browse files
committed
Merge tag 'xfs-5.16-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Darrick Wong: "This cycle we've worked on fixing bugs and improving XFS' memory footprint. The most notable fixes include: fixing a corruption warning (and free space accounting skew) if copy on write fails; fixing slab cache misuse if SLOB is enabled, which apparently was broken for years without anybody noticing; and fixing a potential race with online shrinkfs. Otherwise, the bulk of the changes here involve setting up separate slab caches for frequently used items such as btree cursors and log intent items, and compacting the structures to reduce memory usage of those items substantially. This also sets us up to support larger btrees in future kernels. We also switch parts of online fsck to allocate scrub context information from the heap instead of using stack space. Summary: - Bug fixes and cleanups for kernel memory allocation usage, this time without touching the mm code. - Refactor the log recovery mechanism that preserves held resources across a transaction roll so that it uses the exact same mechanism that we use for that during regular runtime. - Fix bugs and tighten checking around btree heights. - Remove more old typedefs. - Fix perag reference leaks when racing with growfs. - Remove unused fields from xfs_btree_cur. - Allocate various scrub structures on the heap to reduce stack usage. - Pack xfs_btree_cur fields and rearrange to support arbitrary heights. - Compute maximum possible heights for each btree height, and use that to set up slab caches for each btree type. - Finally remove kmem_zone_t, since these have always been struct kmem_cache on Linux. - Compact the structures used to coordinate work intent items. - Set up slab caches for each work intent item type. - Rename the "bmap_add_free" function to "free_extent_later", which more accurately describes what it does. - Fix corruption warning on unmount when a CoW preallocation covers a data fork delalloc reservation but then the CoW fails. - Add some more minor code improvements" * tag 'xfs-5.16-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (45 commits) xfs: use swap() to make code cleaner xfs: Remove duplicated include in xfs_super xfs: punch out data fork delalloc blocks on COW writeback failure xfs: remove unused parameter from refcount code xfs: reduce the size of struct xfs_extent_free_item xfs: rename xfs_bmap_add_free to xfs_free_extent_later xfs: create slab caches for frequently-used deferred items xfs: compact deferred intent item structures xfs: rename _zone variables to _cache xfs: remove kmem_zone typedef xfs: use separate btree cursor cache for each btree type xfs: compute absolute maximum nlevels for each btree type xfs: kill XFS_BTREE_MAXLEVELS xfs: compute the maximum height of the rmap btree when reflink enabled xfs: clean up xfs_btree_{calc_size,compute_maxlevels} xfs: compute maximum AG btree height for critical reservation calculation xfs: rename m_ag_maxlevels to m_allocbt_maxlevels xfs: dynamically allocate cursors based on maxlevels xfs: encode the max btree height in the cursor xfs: refactor btree cursor allocation function ...
2 parents a64a325 + 2a09b57 commit bba7d68

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+1649
-900
lines changed

fs/xfs/kmem.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,6 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags)
7272
/*
7373
* Zone interfaces
7474
*/
75-
76-
#define kmem_zone kmem_cache
77-
#define kmem_zone_t struct kmem_cache
78-
7975
static inline struct page *
8076
kmem_to_page(void *addr)
8177
{

fs/xfs/libxfs/xfs_ag.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -850,7 +850,7 @@ xfs_ag_shrink_space(
850850
if (err2 != -ENOSPC)
851851
goto resv_err;
852852

853-
__xfs_bmap_add_free(*tpp, args.fsbno, delta, NULL, true);
853+
__xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, true);
854854

855855
/*
856856
* Roll the transaction before trying to re-init the per-ag

fs/xfs/libxfs/xfs_ag.h

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -116,23 +116,29 @@ void xfs_perag_put(struct xfs_perag *pag);
116116

117117
/*
118118
* Perag iteration APIs
119-
*
120-
* XXX: for_each_perag_range() usage really needs an iterator to clean up when
121-
* we terminate at end_agno because we may have taken a reference to the perag
122-
* beyond end_agno. Right now callers have to be careful to catch and clean that
123-
* up themselves. This is not necessary for the callers of for_each_perag() and
124-
* for_each_perag_from() because they terminate at sb_agcount where there are
125-
* no perag structures in tree beyond end_agno.
126119
*/
127-
#define for_each_perag_range(mp, next_agno, end_agno, pag) \
128-
for ((pag) = xfs_perag_get((mp), (next_agno)); \
129-
(pag) != NULL && (next_agno) <= (end_agno); \
130-
(next_agno) = (pag)->pag_agno + 1, \
131-
xfs_perag_put(pag), \
132-
(pag) = xfs_perag_get((mp), (next_agno)))
120+
static inline struct xfs_perag *
121+
xfs_perag_next(
122+
struct xfs_perag *pag,
123+
xfs_agnumber_t *agno,
124+
xfs_agnumber_t end_agno)
125+
{
126+
struct xfs_mount *mp = pag->pag_mount;
127+
128+
*agno = pag->pag_agno + 1;
129+
xfs_perag_put(pag);
130+
if (*agno > end_agno)
131+
return NULL;
132+
return xfs_perag_get(mp, *agno);
133+
}
134+
135+
#define for_each_perag_range(mp, agno, end_agno, pag) \
136+
for ((pag) = xfs_perag_get((mp), (agno)); \
137+
(pag) != NULL; \
138+
(pag) = xfs_perag_next((pag), &(agno), (end_agno)))
133139

134-
#define for_each_perag_from(mp, next_agno, pag) \
135-
for_each_perag_range((mp), (next_agno), (mp)->m_sb.sb_agcount, (pag))
140+
#define for_each_perag_from(mp, agno, pag) \
141+
for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag))
136142

137143

138144
#define for_each_perag(mp, agno, pag) \

fs/xfs/libxfs/xfs_ag_resv.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ xfs_ag_resv_critical(
9191
trace_xfs_ag_resv_critical(pag, type, avail);
9292

9393
/* Critically low if less than 10% or max btree height remains. */
94-
return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS,
94+
return XFS_TEST_ERROR(avail < orig / 10 ||
95+
avail < pag->pag_mount->m_agbtree_maxlevels,
9596
pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL);
9697
}
9798

fs/xfs/libxfs/xfs_alloc.c

Lines changed: 97 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
#include "xfs_ag_resv.h"
2828
#include "xfs_bmap.h"
2929

30-
extern kmem_zone_t *xfs_bmap_free_item_zone;
30+
struct kmem_cache *xfs_extfree_item_cache;
3131

3232
struct workqueue_struct *xfs_alloc_wq;
3333

@@ -426,8 +426,8 @@ xfs_alloc_fix_len(
426426
*/
427427
STATIC int /* error code */
428428
xfs_alloc_fixup_trees(
429-
xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
430-
xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
429+
struct xfs_btree_cur *cnt_cur, /* cursor for by-size btree */
430+
struct xfs_btree_cur *bno_cur, /* cursor for by-block btree */
431431
xfs_agblock_t fbno, /* starting block of free extent */
432432
xfs_extlen_t flen, /* length of free extent */
433433
xfs_agblock_t rbno, /* starting block of returned extent */
@@ -488,8 +488,8 @@ xfs_alloc_fixup_trees(
488488
struct xfs_btree_block *bnoblock;
489489
struct xfs_btree_block *cntblock;
490490

491-
bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
492-
cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
491+
bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp);
492+
cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp);
493493

494494
if (XFS_IS_CORRUPT(mp,
495495
bnoblock->bb_numrecs !=
@@ -1200,8 +1200,8 @@ xfs_alloc_ag_vextent_exact(
12001200
xfs_alloc_arg_t *args) /* allocation argument structure */
12011201
{
12021202
struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
1203-
xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
1204-
xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
1203+
struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
1204+
struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
12051205
int error;
12061206
xfs_agblock_t fbno; /* start block of found extent */
12071207
xfs_extlen_t flen; /* length of found extent */
@@ -1512,7 +1512,7 @@ xfs_alloc_ag_vextent_lastblock(
15121512
* than minlen.
15131513
*/
15141514
if (*len || args->alignment > 1) {
1515-
acur->cnt->bc_ptrs[0] = 1;
1515+
acur->cnt->bc_levels[0].ptr = 1;
15161516
do {
15171517
error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
15181518
if (error)
@@ -1658,8 +1658,8 @@ xfs_alloc_ag_vextent_size(
16581658
xfs_alloc_arg_t *args) /* allocation argument structure */
16591659
{
16601660
struct xfs_agf *agf = args->agbp->b_addr;
1661-
xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1662-
xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
1661+
struct xfs_btree_cur *bno_cur; /* cursor for bno btree */
1662+
struct xfs_btree_cur *cnt_cur; /* cursor for cnt btree */
16631663
int error; /* error result */
16641664
xfs_agblock_t fbno; /* start of found freespace */
16651665
xfs_extlen_t flen; /* length of found freespace */
@@ -2190,14 +2190,15 @@ xfs_free_ag_extent(
21902190
*/
21912191

21922192
/*
2193-
* Compute and fill in value of m_ag_maxlevels.
2193+
* Compute and fill in value of m_alloc_maxlevels.
21942194
*/
21952195
void
21962196
xfs_alloc_compute_maxlevels(
21972197
xfs_mount_t *mp) /* file system mount structure */
21982198
{
2199-
mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
2199+
mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
22002200
(mp->m_sb.sb_agblocks + 1) / 2);
2201+
ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk());
22012202
}
22022203

22032204
/*
@@ -2255,14 +2256,14 @@ xfs_alloc_min_freelist(
22552256
const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
22562257
unsigned int min_free;
22572258

2258-
ASSERT(mp->m_ag_maxlevels > 0);
2259+
ASSERT(mp->m_alloc_maxlevels > 0);
22592260

22602261
/* space needed by-bno freespace btree */
22612262
min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
2262-
mp->m_ag_maxlevels);
2263+
mp->m_alloc_maxlevels);
22632264
/* space needed by-size freespace btree */
22642265
min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
2265-
mp->m_ag_maxlevels);
2266+
mp->m_alloc_maxlevels);
22662267
/* space needed reverse mapping used space btree */
22672268
if (xfs_has_rmapbt(mp))
22682269
min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
@@ -2439,7 +2440,7 @@ xfs_agfl_reset(
24392440

24402441
/*
24412442
* Defer an AGFL block free. This is effectively equivalent to
2442-
* xfs_bmap_add_free() with some special handling particular to AGFL blocks.
2443+
* xfs_free_extent_later() with some special handling particular to AGFL blocks.
24432444
*
24442445
* Deferring AGFL frees helps prevent log reservation overruns due to too many
24452446
* allocation operations in a transaction. AGFL frees are prone to this problem
@@ -2458,21 +2459,74 @@ xfs_defer_agfl_block(
24582459
struct xfs_mount *mp = tp->t_mountp;
24592460
struct xfs_extent_free_item *new; /* new element */
24602461

2461-
ASSERT(xfs_bmap_free_item_zone != NULL);
2462+
ASSERT(xfs_extfree_item_cache != NULL);
24622463
ASSERT(oinfo != NULL);
24632464

2464-
new = kmem_cache_alloc(xfs_bmap_free_item_zone,
2465+
new = kmem_cache_zalloc(xfs_extfree_item_cache,
24652466
GFP_KERNEL | __GFP_NOFAIL);
24662467
new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
24672468
new->xefi_blockcount = 1;
2468-
new->xefi_oinfo = *oinfo;
2469-
new->xefi_skip_discard = false;
2469+
new->xefi_owner = oinfo->oi_owner;
24702470

24712471
trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
24722472

24732473
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
24742474
}
24752475

2476+
/*
2477+
* Add the extent to the list of extents to be free at transaction end.
2478+
* The list is maintained sorted (by block number).
2479+
*/
2480+
void
2481+
__xfs_free_extent_later(
2482+
struct xfs_trans *tp,
2483+
xfs_fsblock_t bno,
2484+
xfs_filblks_t len,
2485+
const struct xfs_owner_info *oinfo,
2486+
bool skip_discard)
2487+
{
2488+
struct xfs_extent_free_item *new; /* new element */
2489+
#ifdef DEBUG
2490+
struct xfs_mount *mp = tp->t_mountp;
2491+
xfs_agnumber_t agno;
2492+
xfs_agblock_t agbno;
2493+
2494+
ASSERT(bno != NULLFSBLOCK);
2495+
ASSERT(len > 0);
2496+
ASSERT(len <= MAXEXTLEN);
2497+
ASSERT(!isnullstartblock(bno));
2498+
agno = XFS_FSB_TO_AGNO(mp, bno);
2499+
agbno = XFS_FSB_TO_AGBNO(mp, bno);
2500+
ASSERT(agno < mp->m_sb.sb_agcount);
2501+
ASSERT(agbno < mp->m_sb.sb_agblocks);
2502+
ASSERT(len < mp->m_sb.sb_agblocks);
2503+
ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
2504+
#endif
2505+
ASSERT(xfs_extfree_item_cache != NULL);
2506+
2507+
new = kmem_cache_zalloc(xfs_extfree_item_cache,
2508+
GFP_KERNEL | __GFP_NOFAIL);
2509+
new->xefi_startblock = bno;
2510+
new->xefi_blockcount = (xfs_extlen_t)len;
2511+
if (skip_discard)
2512+
new->xefi_flags |= XFS_EFI_SKIP_DISCARD;
2513+
if (oinfo) {
2514+
ASSERT(oinfo->oi_offset == 0);
2515+
2516+
if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
2517+
new->xefi_flags |= XFS_EFI_ATTR_FORK;
2518+
if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
2519+
new->xefi_flags |= XFS_EFI_BMBT_BLOCK;
2520+
new->xefi_owner = oinfo->oi_owner;
2521+
} else {
2522+
new->xefi_owner = XFS_RMAP_OWN_NULL;
2523+
}
2524+
trace_xfs_bmap_free_defer(tp->t_mountp,
2525+
XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
2526+
XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
2527+
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
2528+
}
2529+
24762530
#ifdef DEBUG
24772531
/*
24782532
* Check if an AGF has a free extent record whose length is equal to
@@ -2903,13 +2957,16 @@ xfs_agf_verify(
29032957

29042958
if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
29052959
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
2906-
be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > mp->m_ag_maxlevels ||
2907-
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > mp->m_ag_maxlevels)
2960+
be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) >
2961+
mp->m_alloc_maxlevels ||
2962+
be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) >
2963+
mp->m_alloc_maxlevels)
29082964
return __this_address;
29092965

29102966
if (xfs_has_rmapbt(mp) &&
29112967
(be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
2912-
be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > mp->m_rmap_maxlevels))
2968+
be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
2969+
mp->m_rmap_maxlevels))
29132970
return __this_address;
29142971

29152972
if (xfs_has_rmapbt(mp) &&
@@ -3495,3 +3552,20 @@ xfs_agfl_walk(
34953552

34963553
return 0;
34973554
}
3555+
3556+
int __init
3557+
xfs_extfree_intent_init_cache(void)
3558+
{
3559+
xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent",
3560+
sizeof(struct xfs_extent_free_item),
3561+
0, 0, NULL);
3562+
3563+
return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM;
3564+
}
3565+
3566+
void
3567+
xfs_extfree_intent_destroy_cache(void)
3568+
{
3569+
kmem_cache_destroy(xfs_extfree_item_cache);
3570+
xfs_extfree_item_cache = NULL;
3571+
}

fs/xfs/libxfs/xfs_alloc.h

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp,
9898
struct xfs_perag *pag);
9999

100100
/*
101-
* Compute and fill in value of m_ag_maxlevels.
101+
* Compute and fill in value of m_alloc_maxlevels.
102102
*/
103103
void
104104
xfs_alloc_compute_maxlevels(
@@ -248,4 +248,40 @@ xfs_buf_to_agfl_bno(
248248
return bp->b_addr;
249249
}
250250

251+
void __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
252+
xfs_filblks_t len, const struct xfs_owner_info *oinfo,
253+
bool skip_discard);
254+
255+
/*
256+
* List of extents to be free "later".
257+
* The list is kept sorted on xbf_startblock.
258+
*/
259+
struct xfs_extent_free_item {
260+
struct list_head xefi_list;
261+
uint64_t xefi_owner;
262+
xfs_fsblock_t xefi_startblock;/* starting fs block number */
263+
xfs_extlen_t xefi_blockcount;/* number of blocks in extent */
264+
unsigned int xefi_flags;
265+
};
266+
267+
#define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */
268+
#define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */
269+
#define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */
270+
271+
static inline void
272+
xfs_free_extent_later(
273+
struct xfs_trans *tp,
274+
xfs_fsblock_t bno,
275+
xfs_filblks_t len,
276+
const struct xfs_owner_info *oinfo)
277+
{
278+
__xfs_free_extent_later(tp, bno, len, oinfo, false);
279+
}
280+
281+
282+
extern struct kmem_cache *xfs_extfree_item_cache;
283+
284+
int __init xfs_extfree_intent_init_cache(void);
285+
void xfs_extfree_intent_destroy_cache(void);
286+
251287
#endif /* __XFS_ALLOC_H__ */

0 commit comments

Comments
 (0)