Skip to content

Commit 0c438dc

Browse files
johnpgarryDarrick J. Wong
authored andcommitted
xfs: add xfs_calc_atomic_write_unit_max()
Now that CoW-based atomic writes are supported, update the max size of an atomic write for the data device. The limit of a CoW-based atomic write will be the limit of the number of logitems which can fit into a single transaction. In addition, the max atomic write size needs to be aligned to the agsize. Limit the size of atomic writes to the greatest power-of-two factor of the agsize so that allocations for an atomic write will always be aligned compatibly with the alignment requirements of the storage. Function xfs_atomic_write_logitems() is added to find the limit the number of log items which can fit in a single transaction. Amend the max atomic write computation to create a new transaction reservation type, and compute the maximum size of an atomic write completion (in fsblocks) based on this new transaction reservation. Initially, tr_atomic_write is a clone of tr_itruncate, which provides a reasonable level of parallelism. In the next patch, we'll add a mount option so that sysadmins can configure their own limits. [djwong: use a new reservation type for atomic write ioends, refactor group limit calculations] Reviewed-by: Darrick J. Wong <[email protected]> Signed-off-by: Darrick J. Wong <[email protected]> [jpg: rounddown power-of-2 always] Reviewed-by: Christoph Hellwig <[email protected]> Signed-off-by: John Garry <[email protected]>
1 parent 9baeac3 commit 0c438dc

File tree

7 files changed

+263
-0
lines changed

7 files changed

+263
-0
lines changed

fs/xfs/libxfs/xfs_trans_resv.c

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@
2222
#include "xfs_rtbitmap.h"
2323
#include "xfs_attr_item.h"
2424
#include "xfs_log.h"
25+
#include "xfs_defer.h"
26+
#include "xfs_bmap_item.h"
27+
#include "xfs_extfree_item.h"
28+
#include "xfs_rmap_item.h"
29+
#include "xfs_refcount_item.h"
30+
#include "xfs_trace.h"
2531

2632
#define _ALLOC true
2733
#define _FREE false
@@ -1394,3 +1400,91 @@ xfs_trans_resv_calc(
13941400
*/
13951401
xfs_calc_default_atomic_ioend_reservation(mp, resp);
13961402
}
1403+
1404+
/*
1405+
* Return the per-extent and fixed transaction reservation sizes needed to
1406+
* complete an atomic write.
1407+
*/
1408+
STATIC unsigned int
1409+
xfs_calc_atomic_write_ioend_geometry(
1410+
struct xfs_mount *mp,
1411+
unsigned int *step_size)
1412+
{
1413+
const unsigned int efi = xfs_efi_log_space(1);
1414+
const unsigned int efd = xfs_efd_log_space(1);
1415+
const unsigned int rui = xfs_rui_log_space(1);
1416+
const unsigned int rud = xfs_rud_log_space();
1417+
const unsigned int cui = xfs_cui_log_space(1);
1418+
const unsigned int cud = xfs_cud_log_space();
1419+
const unsigned int bui = xfs_bui_log_space(1);
1420+
const unsigned int bud = xfs_bud_log_space();
1421+
1422+
/*
1423+
* Maximum overhead to complete an atomic write ioend in software:
1424+
* remove data fork extent + remove cow fork extent + map extent into
1425+
* data fork.
1426+
*
1427+
* tx0: Creates a BUI and a CUI and that's all it needs.
1428+
*
1429+
* tx1: Roll to finish the BUI. Need space for the BUD, an RUI, and
1430+
* enough space to relog the CUI (== CUI + CUD).
1431+
*
1432+
* tx2: Roll again to finish the RUI. Need space for the RUD and space
1433+
* to relog the CUI.
1434+
*
1435+
* tx3: Roll again, need space for the CUD and possibly a new EFI.
1436+
*
1437+
* tx4: Roll again, need space for an EFD.
1438+
*
1439+
* If the extent referenced by the pair of BUI/CUI items is not the one
1440+
* being currently processed, then we need to reserve space to relog
1441+
* both items.
1442+
*/
1443+
const unsigned int tx0 = bui + cui;
1444+
const unsigned int tx1 = bud + rui + cui + cud;
1445+
const unsigned int tx2 = rud + cui + cud;
1446+
const unsigned int tx3 = cud + efi;
1447+
const unsigned int tx4 = efd;
1448+
const unsigned int relog = bui + bud + cui + cud;
1449+
1450+
const unsigned int per_intent = max(max3(tx0, tx1, tx2),
1451+
max3(tx3, tx4, relog));
1452+
1453+
/* Overhead to finish one step of each intent item type */
1454+
const unsigned int f1 = xfs_calc_finish_efi_reservation(mp, 1);
1455+
const unsigned int f2 = xfs_calc_finish_rui_reservation(mp, 1);
1456+
const unsigned int f3 = xfs_calc_finish_cui_reservation(mp, 1);
1457+
const unsigned int f4 = xfs_calc_finish_bui_reservation(mp, 1);
1458+
1459+
/* We only finish one item per transaction in a chain */
1460+
*step_size = max(f4, max3(f1, f2, f3));
1461+
1462+
return per_intent;
1463+
}
1464+
1465+
/*
1466+
* Compute the maximum size (in fsblocks) of atomic writes that we can complete
1467+
* given the existing log reservations.
1468+
*/
1469+
xfs_extlen_t
1470+
xfs_calc_max_atomic_write_fsblocks(
1471+
struct xfs_mount *mp)
1472+
{
1473+
const struct xfs_trans_res *resv = &M_RES(mp)->tr_atomic_ioend;
1474+
unsigned int per_intent = 0;
1475+
unsigned int step_size = 0;
1476+
unsigned int ret = 0;
1477+
1478+
if (resv->tr_logres > 0) {
1479+
per_intent = xfs_calc_atomic_write_ioend_geometry(mp,
1480+
&step_size);
1481+
1482+
if (resv->tr_logres >= step_size)
1483+
ret = (resv->tr_logres - step_size) / per_intent;
1484+
}
1485+
1486+
trace_xfs_calc_max_atomic_write_fsblocks(mp, per_intent, step_size,
1487+
resv->tr_logres, ret);
1488+
1489+
return ret;
1490+
}

fs/xfs/libxfs/xfs_trans_resv.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,4 +121,6 @@ unsigned int xfs_calc_itruncate_reservation_minlogsize(struct xfs_mount *mp);
121121
unsigned int xfs_calc_write_reservation_minlogsize(struct xfs_mount *mp);
122122
unsigned int xfs_calc_qm_dqalloc_reservation_minlogsize(struct xfs_mount *mp);
123123

124+
xfs_extlen_t xfs_calc_max_atomic_write_fsblocks(struct xfs_mount *mp);
125+
124126
#endif /* __XFS_TRANS_RESV_H__ */

fs/xfs/xfs_mount.c

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,82 @@ xfs_agbtree_compute_maxlevels(
666666
mp->m_agbtree_maxlevels = max(levels, mp->m_refc_maxlevels);
667667
}
668668

669+
/* Maximum atomic write IO size that the kernel allows. */
670+
static inline xfs_extlen_t xfs_calc_atomic_write_max(struct xfs_mount *mp)
671+
{
672+
return rounddown_pow_of_two(XFS_B_TO_FSB(mp, MAX_RW_COUNT));
673+
}
674+
675+
static inline unsigned int max_pow_of_two_factor(const unsigned int nr)
676+
{
677+
return 1 << (ffs(nr) - 1);
678+
}
679+
680+
/*
681+
* If the data device advertises atomic write support, limit the size of data
682+
* device atomic writes to the greatest power-of-two factor of the AG size so
683+
* that every atomic write unit aligns with the start of every AG. This is
684+
* required so that the per-AG allocations for an atomic write will always be
685+
* aligned compatibly with the alignment requirements of the storage.
686+
*
687+
* If the data device doesn't advertise atomic writes, then there are no
688+
* alignment restrictions and the largest out-of-place write we can do
689+
* ourselves is the number of blocks that user files can allocate from any AG.
690+
*/
691+
static inline xfs_extlen_t xfs_calc_perag_awu_max(struct xfs_mount *mp)
692+
{
693+
if (mp->m_ddev_targp->bt_bdev_awu_min > 0)
694+
return max_pow_of_two_factor(mp->m_sb.sb_agblocks);
695+
return rounddown_pow_of_two(mp->m_ag_max_usable);
696+
}
697+
698+
/*
699+
* Reflink on the realtime device requires rtgroups, and atomic writes require
700+
* reflink.
701+
*
702+
* If the realtime device advertises atomic write support, limit the size of
703+
* data device atomic writes to the greatest power-of-two factor of the rtgroup
704+
* size so that every atomic write unit aligns with the start of every rtgroup.
705+
* This is required so that the per-rtgroup allocations for an atomic write
706+
* will always be aligned compatibly with the alignment requirements of the
707+
* storage.
708+
*
709+
* If the rt device doesn't advertise atomic writes, then there are no
710+
* alignment restrictions and the largest out-of-place write we can do
711+
* ourselves is the number of blocks that user files can allocate from any
712+
* rtgroup.
713+
*/
714+
static inline xfs_extlen_t xfs_calc_rtgroup_awu_max(struct xfs_mount *mp)
715+
{
716+
struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG];
717+
718+
if (rgs->blocks == 0)
719+
return 0;
720+
if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_bdev_awu_min > 0)
721+
return max_pow_of_two_factor(rgs->blocks);
722+
return rounddown_pow_of_two(rgs->blocks);
723+
}
724+
725+
/* Compute the maximum atomic write unit size for each section. */
726+
static inline void
727+
xfs_calc_atomic_write_unit_max(
728+
struct xfs_mount *mp)
729+
{
730+
struct xfs_groups *ags = &mp->m_groups[XG_TYPE_AG];
731+
struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG];
732+
733+
const xfs_extlen_t max_write = xfs_calc_atomic_write_max(mp);
734+
const xfs_extlen_t max_ioend = xfs_reflink_max_atomic_cow(mp);
735+
const xfs_extlen_t max_agsize = xfs_calc_perag_awu_max(mp);
736+
const xfs_extlen_t max_rgsize = xfs_calc_rtgroup_awu_max(mp);
737+
738+
ags->awu_max = min3(max_write, max_ioend, max_agsize);
739+
rgs->awu_max = min3(max_write, max_ioend, max_rgsize);
740+
741+
trace_xfs_calc_atomic_write_unit_max(mp, max_write, max_ioend,
742+
max_agsize, max_rgsize);
743+
}
744+
669745
/* Compute maximum possible height for realtime btree types for this fs. */
670746
static inline void
671747
xfs_rtbtree_compute_maxlevels(
@@ -1082,6 +1158,13 @@ xfs_mountfs(
10821158
xfs_zone_gc_start(mp);
10831159
}
10841160

1161+
/*
1162+
* Pre-calculate atomic write unit max. This involves computations
1163+
* derived from transaction reservations, so we must do this after the
1164+
* log is fully initialized.
1165+
*/
1166+
xfs_calc_atomic_write_unit_max(mp);
1167+
10851168
return 0;
10861169

10871170
out_agresv:

fs/xfs/xfs_mount.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,12 @@ struct xfs_groups {
119119
* SMR hard drives.
120120
*/
121121
xfs_fsblock_t start_fsb;
122+
123+
/*
124+
* Maximum length of an atomic write for files stored in this
125+
* collection of allocation groups, in fsblocks.
126+
*/
127+
xfs_extlen_t awu_max;
122128
};
123129

124130
struct xfs_freecounter {

fs/xfs/xfs_reflink.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,6 +1040,22 @@ xfs_reflink_end_atomic_cow(
10401040
return error;
10411041
}
10421042

1043+
/* Compute the largest atomic write that we can complete through software. */
1044+
xfs_extlen_t
1045+
xfs_reflink_max_atomic_cow(
1046+
struct xfs_mount *mp)
1047+
{
1048+
/* We cannot do any atomic writes without out of place writes. */
1049+
if (!xfs_can_sw_atomic_write(mp))
1050+
return 0;
1051+
1052+
/*
1053+
* Atomic write limits must always be a power-of-2, according to
1054+
* generic_atomic_write_valid.
1055+
*/
1056+
return rounddown_pow_of_two(xfs_calc_max_atomic_write_fsblocks(mp));
1057+
}
1058+
10431059
/*
10441060
* Free all CoW staging blocks that are still referenced by the ondisk refcount
10451061
* metadata. The ondisk metadata does not track which inode created the

fs/xfs/xfs_reflink.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,6 @@ extern int xfs_reflink_update_dest(struct xfs_inode *dest, xfs_off_t newlen,
6868

6969
bool xfs_reflink_supports_rextsize(struct xfs_mount *mp, unsigned int rextsize);
7070

71+
xfs_extlen_t xfs_reflink_max_atomic_cow(struct xfs_mount *mp);
72+
7173
#endif /* __XFS_REFLINK_H */

fs/xfs/xfs_trace.h

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,66 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
170170
DEFINE_ATTR_LIST_EVENT(xfs_attr_leaf_list);
171171
DEFINE_ATTR_LIST_EVENT(xfs_attr_node_list);
172172

173+
TRACE_EVENT(xfs_calc_atomic_write_unit_max,
174+
TP_PROTO(struct xfs_mount *mp, unsigned int max_write,
175+
unsigned int max_ioend, unsigned int max_agsize,
176+
unsigned int max_rgsize),
177+
TP_ARGS(mp, max_write, max_ioend, max_agsize, max_rgsize),
178+
TP_STRUCT__entry(
179+
__field(dev_t, dev)
180+
__field(unsigned int, max_write)
181+
__field(unsigned int, max_ioend)
182+
__field(unsigned int, max_agsize)
183+
__field(unsigned int, max_rgsize)
184+
__field(unsigned int, data_awu_max)
185+
__field(unsigned int, rt_awu_max)
186+
),
187+
TP_fast_assign(
188+
__entry->dev = mp->m_super->s_dev;
189+
__entry->max_write = max_write;
190+
__entry->max_ioend = max_ioend;
191+
__entry->max_agsize = max_agsize;
192+
__entry->max_rgsize = max_rgsize;
193+
__entry->data_awu_max = mp->m_groups[XG_TYPE_AG].awu_max;
194+
__entry->rt_awu_max = mp->m_groups[XG_TYPE_RTG].awu_max;
195+
),
196+
TP_printk("dev %d:%d max_write %u max_ioend %u max_agsize %u max_rgsize %u data_awu_max %u rt_awu_max %u",
197+
MAJOR(__entry->dev), MINOR(__entry->dev),
198+
__entry->max_write,
199+
__entry->max_ioend,
200+
__entry->max_agsize,
201+
__entry->max_rgsize,
202+
__entry->data_awu_max,
203+
__entry->rt_awu_max)
204+
);
205+
206+
TRACE_EVENT(xfs_calc_max_atomic_write_fsblocks,
207+
TP_PROTO(struct xfs_mount *mp, unsigned int per_intent,
208+
unsigned int step_size, unsigned int logres,
209+
unsigned int blockcount),
210+
TP_ARGS(mp, per_intent, step_size, logres, blockcount),
211+
TP_STRUCT__entry(
212+
__field(dev_t, dev)
213+
__field(unsigned int, per_intent)
214+
__field(unsigned int, step_size)
215+
__field(unsigned int, logres)
216+
__field(unsigned int, blockcount)
217+
),
218+
TP_fast_assign(
219+
__entry->dev = mp->m_super->s_dev;
220+
__entry->per_intent = per_intent;
221+
__entry->step_size = step_size;
222+
__entry->logres = logres;
223+
__entry->blockcount = blockcount;
224+
),
225+
TP_printk("dev %d:%d per_intent %u step_size %u logres %u blockcount %u",
226+
MAJOR(__entry->dev), MINOR(__entry->dev),
227+
__entry->per_intent,
228+
__entry->step_size,
229+
__entry->logres,
230+
__entry->blockcount)
231+
);
232+
173233
TRACE_EVENT(xlog_intent_recovery_failed,
174234
TP_PROTO(struct xfs_mount *mp, const struct xfs_defer_op_type *ops,
175235
int error),

0 commit comments

Comments
 (0)