Skip to content

Commit 9ec6912

Browse files
author
Darrick J. Wong
committed
xfs: compute the maximum height of the rmap btree when reflink enabled
Instead of assuming that the hardcoded XFS_BTREE_MAXLEVELS value is big enough to handle the maximally tall rmap btree when all blocks are in use and maximally shared, let's compute the maximum height assuming the rmapbt consumes as many blocks as possible. Signed-off-by: Darrick J. Wong <[email protected]> Reviewed-by: Chandan Babu R <[email protected]> Reviewed-by: Dave Chinner <[email protected]>
1 parent 1b236ad commit 9ec6912

File tree

5 files changed

+85
-18
lines changed

5 files changed

+85
-18
lines changed

fs/xfs/libxfs/xfs_btree.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4553,6 +4553,39 @@ xfs_btree_calc_size(
45534553
return blocks;
45544554
}
45554555

4556+
/*
4557+
* Given a number of available blocks for the btree to consume with records and
4558+
* pointers, calculate the height of the tree needed to index all the records
4559+
* that space can hold based on the number of pointers each interior node
4560+
* holds.
4561+
*
4562+
* We start by assuming a single level tree consumes a single block, then track
4563+
* the number of blocks each node level consumes until we no longer have space
4564+
* to store the next node level. At this point, we are indexing all the leaf
4565+
* blocks in the space, and there's no more free space to split the tree any
4566+
* further. That's our maximum btree height.
4567+
*/
4568+
unsigned int
4569+
xfs_btree_space_to_height(
4570+
const unsigned int *limits,
4571+
unsigned long long leaf_blocks)
4572+
{
4573+
unsigned long long node_blocks = limits[1];
4574+
unsigned long long blocks_left = leaf_blocks - 1;
4575+
unsigned int height = 1;
4576+
4577+
if (leaf_blocks < 1)
4578+
return 0;
4579+
4580+
while (node_blocks < blocks_left) {
4581+
blocks_left -= node_blocks;
4582+
node_blocks *= limits[1];
4583+
height++;
4584+
}
4585+
4586+
return height;
4587+
}
4588+
45564589
/*
45574590
* Query a regular btree for all records overlapping a given interval.
45584591
* Start with a LE lookup of the key of low_rec and return all records

fs/xfs/libxfs/xfs_btree.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,8 @@ unsigned int xfs_btree_compute_maxlevels(const unsigned int *limits,
491491
unsigned long long records);
492492
unsigned long long xfs_btree_calc_size(const unsigned int *limits,
493493
unsigned long long records);
494+
unsigned int xfs_btree_space_to_height(const unsigned int *limits,
495+
unsigned long long blocks);
494496

495497
/*
496498
* Return codes for the query range iterator function are 0 to continue

fs/xfs/libxfs/xfs_rmap_btree.c

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -540,26 +540,35 @@ void
540540
xfs_rmapbt_compute_maxlevels(
541541
struct xfs_mount *mp)
542542
{
543-
/*
544-
* On a non-reflink filesystem, the maximum number of rmap
545-
* records is the number of blocks in the AG, hence the max
546-
* rmapbt height is log_$maxrecs($agblocks). However, with
547-
* reflink each AG block can have up to 2^32 (per the refcount
548-
* record format) owners, which means that theoretically we
549-
* could face up to 2^64 rmap records.
550-
*
551-
* That effectively means that the max rmapbt height must be
552-
* XFS_BTREE_MAXLEVELS. "Fortunately" we'll run out of AG
553-
* blocks to feed the rmapbt long before the rmapbt reaches
554-
* maximum height. The reflink code uses ag_resv_critical to
555-
* disallow reflinking when less than 10% of the per-AG metadata
556-
* block reservation since the fallback is a regular file copy.
557-
*/
558-
if (xfs_has_reflink(mp))
559-
mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS;
560-
else
543+
if (!xfs_has_rmapbt(mp)) {
544+
mp->m_rmap_maxlevels = 0;
545+
return;
546+
}
547+
548+
if (xfs_has_reflink(mp)) {
549+
/*
550+
* Compute the asymptotic maxlevels for an rmap btree on a
551+
* filesystem that supports reflink.
552+
*
553+
* On a reflink filesystem, each AG block can have up to 2^32
554+
* (per the refcount record format) owners, which means that
555+
* theoretically we could face up to 2^64 rmap records.
556+
* However, we're likely to run out of blocks in the AG long
557+
* before that happens, which means that we must compute the
558+
* max height based on what the btree will look like if it
559+
* consumes almost all the blocks in the AG due to maximal
560+
* sharing factor.
561+
*/
562+
mp->m_rmap_maxlevels = xfs_btree_space_to_height(mp->m_rmap_mnr,
563+
mp->m_sb.sb_agblocks);
564+
} else {
565+
/*
566+
* If there's no block sharing, compute the maximum rmapbt
567+
* height assuming one rmap record per AG block.
568+
*/
561569
mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(
562570
mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
571+
}
563572
}
564573

565574
/* Calculate the refcount btree size for some records. */

fs/xfs/libxfs/xfs_trans_resv.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -814,6 +814,19 @@ xfs_trans_resv_calc(
814814
struct xfs_mount *mp,
815815
struct xfs_trans_resv *resp)
816816
{
817+
unsigned int rmap_maxlevels = mp->m_rmap_maxlevels;
818+
819+
/*
820+
* In the early days of rmap+reflink, we always set the rmap maxlevels
821+
* to 9 even if the AG was small enough that it would never grow to
822+
* that height. Transaction reservation sizes influence the minimum
823+
* log size calculation, which influences the size of the log that mkfs
824+
* creates. Use the old value here to ensure that newly formatted
825+
* small filesystems will mount on older kernels.
826+
*/
827+
if (xfs_has_rmapbt(mp) && xfs_has_reflink(mp))
828+
mp->m_rmap_maxlevels = XFS_OLD_REFLINK_RMAP_MAXLEVELS;
829+
817830
/*
818831
* The following transactions are logged in physical format and
819832
* require a permanent reservation on space.
@@ -916,4 +929,7 @@ xfs_trans_resv_calc(
916929
resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp);
917930
resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp);
918931
resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp);
932+
933+
/* Put everything back the way it was. This goes at the end. */
934+
mp->m_rmap_maxlevels = rmap_maxlevels;
919935
}

fs/xfs/libxfs/xfs_trans_space.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@
1717
/* Adding one rmap could split every level up to the top of the tree. */
1818
#define XFS_RMAPADD_SPACE_RES(mp) ((mp)->m_rmap_maxlevels)
1919

20+
/*
21+
* Note that we historically set m_rmap_maxlevels to 9 when reflink is enabled,
22+
* so we must preserve this behavior to avoid changing the transaction space
23+
* reservations and minimum log size calculations for existing filesystems.
24+
*/
25+
#define XFS_OLD_REFLINK_RMAP_MAXLEVELS 9
26+
2027
/* Blocks we might need to add "b" rmaps to a tree. */
2128
#define XFS_NRMAPADD_SPACE_RES(mp, b)\
2229
(((b + XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) - 1) / \

0 commit comments

Comments
 (0)