Skip to content

Commit b037c4e

Browse files
author
Darrick J. Wong
committed
xfs: reduce transaction reservations with reflink
Before to the introduction of deferred refcount operations, reflink would try to cram refcount btree updates into the same transaction as an allocation or a free event. Mainline XFS has never actually done that, but we never refactored the transaction reservations to reflect that we now do all refcount updates in separate transactions. Fix this to reduce the transaction reservation size even farther, so that between this patch and the previous one, we reduce the tr_write and tr_itruncate sizes by 66%. Signed-off-by: Darrick J. Wong <[email protected]> Reviewed-by: Dave Chinner <[email protected]> Reviewed-by: Christoph Hellwig <[email protected]>
1 parent 4ecf9e7 commit b037c4e

File tree

4 files changed

+138
-17
lines changed

4 files changed

+138
-17
lines changed

fs/xfs/libxfs/xfs_log_rlimit.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,18 @@ xfs_log_calc_trans_resv_for_minlogblocks(
8080
resv->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
8181
}
8282

83+
/*
84+
* In the early days of reflink, we did not use deferred refcount
85+
* update log items, so log reservations must be recomputed using the
86+
* old calculations.
87+
*/
88+
resv->tr_write.tr_logres =
89+
xfs_calc_write_reservation_minlogsize(mp);
90+
resv->tr_itruncate.tr_logres =
91+
xfs_calc_itruncate_reservation_minlogsize(mp);
92+
resv->tr_qm_dqalloc.tr_logres =
93+
xfs_calc_qm_dqalloc_reservation_minlogsize(mp);
94+
8395
/* Put everything back the way it was. This goes at the end. */
8496
mp->m_rmap_maxlevels = rmap_maxlevels;
8597
}

fs/xfs/libxfs/xfs_refcount.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -886,8 +886,13 @@ xfs_refcount_still_have_space(
886886
{
887887
unsigned long overhead;
888888

889-
overhead = cur->bc_ag.refc.shape_changes *
890-
xfs_allocfree_log_count(cur->bc_mp, 1);
889+
/*
890+
* Worst case estimate: full splits of the free space and rmap btrees
891+
* to handle each of the shape changes to the refcount btree.
892+
*/
893+
overhead = xfs_allocfree_log_count(cur->bc_mp,
894+
cur->bc_ag.refc.shape_changes);
895+
overhead += cur->bc_mp->m_refc_maxlevels;
891896
overhead *= cur->bc_mp->m_sb.sb_blocksize;
892897

893898
/*

fs/xfs/libxfs/xfs_trans_resv.c

Lines changed: 115 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,7 @@ xfs_calc_buf_res(
5656
* Per-extent log reservation for the btree changes involved in freeing or
5757
* allocating an extent. In classic XFS there were two trees that will be
5858
* modified (bnobt + cntbt). With rmap enabled, there are three trees
59-
* (rmapbt). With reflink, there are four trees (refcountbt). The number of
60-
* blocks reserved is based on the formula:
59+
* (rmapbt). The number of blocks reserved is based on the formula:
6160
*
6261
* num trees * ((2 blocks/level * max depth) - 1)
6362
*
@@ -73,12 +72,23 @@ xfs_allocfree_log_count(
7372
blocks = num_ops * 2 * (2 * mp->m_alloc_maxlevels - 1);
7473
if (xfs_has_rmapbt(mp))
7574
blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1);
76-
if (xfs_has_reflink(mp))
77-
blocks += num_ops * (2 * mp->m_refc_maxlevels - 1);
7875

7976
return blocks;
8077
}
8178

79+
/*
80+
* Per-extent log reservation for refcount btree changes. These are never done
81+
* in the same transaction as an allocation or a free, so we compute them
82+
* separately.
83+
*/
84+
static unsigned int
85+
xfs_refcountbt_block_count(
86+
struct xfs_mount *mp,
87+
unsigned int num_ops)
88+
{
89+
return num_ops * (2 * mp->m_refc_maxlevels - 1);
90+
}
91+
8292
/*
8393
* Logging inodes is really tricksy. They are logged in memory format,
8494
* which means that what we write into the log doesn't directly translate into
@@ -233,6 +243,28 @@ xfs_rtalloc_log_count(
233243
* register overflow from temporaries in the calculations.
234244
*/
235245

246+
/*
247+
* Compute the log reservation required to handle the refcount update
248+
* transaction. Refcount updates are always done via deferred log items.
249+
*
250+
* This is calculated as:
251+
* Data device refcount updates (t1):
252+
* the agfs of the ags containing the blocks: nr_ops * sector size
253+
* the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
254+
*/
255+
static unsigned int
256+
xfs_calc_refcountbt_reservation(
257+
struct xfs_mount *mp,
258+
unsigned int nr_ops)
259+
{
260+
unsigned int blksz = XFS_FSB_TO_B(mp, 1);
261+
262+
if (!xfs_has_reflink(mp))
263+
return 0;
264+
265+
return xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) +
266+
xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops), blksz);
267+
}
236268

237269
/*
238270
* In a write transaction we can allocate a maximum of 2
@@ -255,12 +287,14 @@ xfs_rtalloc_log_count(
255287
* the agfls of the ags containing the blocks: 2 * sector size
256288
* the super block free block counter: sector size
257289
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
290+
* And any refcount updates that happen in a separate transaction (t4).
258291
*/
259292
STATIC uint
260293
xfs_calc_write_reservation(
261-
struct xfs_mount *mp)
294+
struct xfs_mount *mp,
295+
bool for_minlogsize)
262296
{
263-
unsigned int t1, t2, t3;
297+
unsigned int t1, t2, t3, t4;
264298
unsigned int blksz = XFS_FSB_TO_B(mp, 1);
265299

266300
t1 = xfs_calc_inode_res(mp, 1) +
@@ -282,7 +316,36 @@ xfs_calc_write_reservation(
282316
t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
283317
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
284318

285-
return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
319+
/*
320+
* In the early days of reflink, we included enough reservation to log
321+
* two refcountbt splits for each transaction. The codebase runs
322+
* refcountbt updates in separate transactions now, so to compute the
323+
* minimum log size, add the refcountbtree splits back to t1 and t3 and
324+
* do not account them separately as t4. Reflink did not support
325+
* realtime when the reservations were established, so no adjustment to
326+
* t2 is needed.
327+
*/
328+
if (for_minlogsize) {
329+
unsigned int adj = 0;
330+
331+
if (xfs_has_reflink(mp))
332+
adj = xfs_calc_buf_res(
333+
xfs_refcountbt_block_count(mp, 2),
334+
blksz);
335+
t1 += adj;
336+
t3 += adj;
337+
return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
338+
}
339+
340+
t4 = xfs_calc_refcountbt_reservation(mp, 1);
341+
return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
342+
}
343+
344+
unsigned int
345+
xfs_calc_write_reservation_minlogsize(
346+
struct xfs_mount *mp)
347+
{
348+
return xfs_calc_write_reservation(mp, true);
286349
}
287350

288351
/*
@@ -304,12 +367,14 @@ xfs_calc_write_reservation(
304367
* the realtime summary: 2 exts * 1 block
305368
* worst case split in allocation btrees per extent assuming 2 extents:
306369
* 2 exts * 2 trees * (2 * max depth - 1) * block size
370+
* And any refcount updates that happen in a separate transaction (t4).
307371
*/
308372
STATIC uint
309373
xfs_calc_itruncate_reservation(
310-
struct xfs_mount *mp)
374+
struct xfs_mount *mp,
375+
bool for_minlogsize)
311376
{
312-
unsigned int t1, t2, t3;
377+
unsigned int t1, t2, t3, t4;
313378
unsigned int blksz = XFS_FSB_TO_B(mp, 1);
314379

315380
t1 = xfs_calc_inode_res(mp, 1) +
@@ -326,7 +391,33 @@ xfs_calc_itruncate_reservation(
326391
t3 = 0;
327392
}
328393

329-
return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
394+
/*
395+
* In the early days of reflink, we included enough reservation to log
396+
* four refcountbt splits in the same transaction as bnobt/cntbt
397+
* updates. The codebase runs refcountbt updates in separate
398+
* transactions now, so to compute the minimum log size, add the
399+
* refcount btree splits back here and do not compute them separately
400+
* as t4. Reflink did not support realtime when the reservations were
401+
* established, so do not adjust t3.
402+
*/
403+
if (for_minlogsize) {
404+
if (xfs_has_reflink(mp))
405+
t2 += xfs_calc_buf_res(
406+
xfs_refcountbt_block_count(mp, 4),
407+
blksz);
408+
409+
return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
410+
}
411+
412+
t4 = xfs_calc_refcountbt_reservation(mp, 2);
413+
return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
414+
}
415+
416+
unsigned int
417+
xfs_calc_itruncate_reservation_minlogsize(
418+
struct xfs_mount *mp)
419+
{
420+
return xfs_calc_itruncate_reservation(mp, true);
330421
}
331422

332423
/*
@@ -792,13 +883,21 @@ xfs_calc_qm_setqlim_reservation(void)
792883
*/
793884
STATIC uint
794885
xfs_calc_qm_dqalloc_reservation(
795-
struct xfs_mount *mp)
886+
struct xfs_mount *mp,
887+
bool for_minlogsize)
796888
{
797-
return xfs_calc_write_reservation(mp) +
889+
return xfs_calc_write_reservation(mp, for_minlogsize) +
798890
xfs_calc_buf_res(1,
799891
XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
800892
}
801893

894+
unsigned int
895+
xfs_calc_qm_dqalloc_reservation_minlogsize(
896+
struct xfs_mount *mp)
897+
{
898+
return xfs_calc_qm_dqalloc_reservation(mp, true);
899+
}
900+
802901
/*
803902
* Syncing the incore super block changes to disk.
804903
* the super block to reflect the changes: sector size
@@ -821,11 +920,11 @@ xfs_trans_resv_calc(
821920
* The following transactions are logged in physical format and
822921
* require a permanent reservation on space.
823922
*/
824-
resp->tr_write.tr_logres = xfs_calc_write_reservation(mp);
923+
resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false);
825924
resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
826925
resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
827926

828-
resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp);
927+
resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false);
829928
resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
830929
resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
831930

@@ -882,7 +981,8 @@ xfs_trans_resv_calc(
882981
resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
883982
resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
884983

885-
resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp);
984+
resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp,
985+
false);
886986
resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
887987
resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
888988

fs/xfs/libxfs/xfs_trans_resv.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,4 +98,8 @@ struct xfs_trans_resv {
9898
void xfs_trans_resv_calc(struct xfs_mount *mp, struct xfs_trans_resv *resp);
9999
uint xfs_allocfree_log_count(struct xfs_mount *mp, uint num_ops);
100100

101+
unsigned int xfs_calc_itruncate_reservation_minlogsize(struct xfs_mount *mp);
102+
unsigned int xfs_calc_write_reservation_minlogsize(struct xfs_mount *mp);
103+
unsigned int xfs_calc_qm_dqalloc_reservation_minlogsize(struct xfs_mount *mp);
104+
101105
#endif /* __XFS_TRANS_RESV_H__ */

0 commit comments

Comments
 (0)