Skip to content

Commit 4528b90

Browse files
author
Darrick J. Wong
committed
xfs: allow sysadmins to specify a maximum atomic write limit at mount time
Introduce a mount option to allow sysadmins to specify the maximum size of an atomic write. If the filesystem can work with the supplied value, that becomes the new guaranteed maximum. The value mustn't be too big for the existing filesystem geometry (max write size, max AG/rtgroup size). We dynamically recompute the tr_atomic_write transaction reservation based on the given block size, check that the current log size isn't less than the new minimum log size constraints, and set a new maximum. The actual software atomic write max is still computed based off of tr_atomic_ioend the same way it has for the past few commits. Note also that xfs_calc_atomic_write_log_geometry is non-static because mkfs will need that. Signed-off-by: Darrick J. Wong <[email protected]> Signed-off-by: John Garry <[email protected]> Reviewed-by: John Garry <[email protected]>
1 parent 9dffc58 commit 4528b90

File tree

7 files changed

+259
-2
lines changed

7 files changed

+259
-2
lines changed

Documentation/admin-guide/xfs.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,17 @@ When mounting an XFS filesystem, the following options are accepted.
151151
optional, and the log section can be separate from the data
152152
section or contained within it.
153153

154+
max_atomic_write=value
155+
Set the maximum size of an atomic write. The size may be
156+
specified in bytes, in kilobytes with a "k" suffix, in megabytes
157+
with a "m" suffix, or in gigabytes with a "g" suffix. The size
158+
cannot be larger than the maximum write size, larger than the
159+
size of any allocation group, or larger than the size of a
160+
remapping operation that the log can complete atomically.
161+
162+
The default value is to set the maximum I/O completion size
163+
to allow each CPU to handle one at a time.
164+
154165
max_open_zones=value
155166
Specify the max number of zones to keep open for writing on a
156167
zoned rt device. Many open zones aids file data separation

fs/xfs/libxfs/xfs_trans_resv.c

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1488,3 +1488,72 @@ xfs_calc_max_atomic_write_fsblocks(
14881488

14891489
return ret;
14901490
}
1491+
1492+
/*
1493+
* Compute the log blocks and transaction reservation needed to complete an
1494+
* atomic write of a given number of blocks. Worst case, each block requires
1495+
* separate handling. A return value of 0 means something went wrong.
1496+
*/
1497+
xfs_extlen_t
1498+
xfs_calc_atomic_write_log_geometry(
1499+
struct xfs_mount *mp,
1500+
xfs_extlen_t blockcount,
1501+
unsigned int *new_logres)
1502+
{
1503+
struct xfs_trans_res *curr_res = &M_RES(mp)->tr_atomic_ioend;
1504+
uint old_logres = curr_res->tr_logres;
1505+
unsigned int per_intent, step_size;
1506+
unsigned int logres;
1507+
xfs_extlen_t min_logblocks;
1508+
1509+
ASSERT(blockcount > 0);
1510+
1511+
xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
1512+
1513+
per_intent = xfs_calc_atomic_write_ioend_geometry(mp, &step_size);
1514+
1515+
/* Check for overflows */
1516+
if (check_mul_overflow(blockcount, per_intent, &logres) ||
1517+
check_add_overflow(logres, step_size, &logres))
1518+
return 0;
1519+
1520+
curr_res->tr_logres = logres;
1521+
min_logblocks = xfs_log_calc_minimum_size(mp);
1522+
curr_res->tr_logres = old_logres;
1523+
1524+
trace_xfs_calc_max_atomic_write_log_geometry(mp, per_intent, step_size,
1525+
blockcount, min_logblocks, logres);
1526+
1527+
*new_logres = logres;
1528+
return min_logblocks;
1529+
}
1530+
1531+
/*
1532+
* Compute the transaction reservation needed to complete an out of place
1533+
* atomic write of a given number of blocks.
1534+
*/
1535+
int
1536+
xfs_calc_atomic_write_reservation(
1537+
struct xfs_mount *mp,
1538+
xfs_extlen_t blockcount)
1539+
{
1540+
unsigned int new_logres;
1541+
xfs_extlen_t min_logblocks;
1542+
1543+
/*
1544+
* If the caller doesn't ask for a specific atomic write size, then
1545+
* use the defaults.
1546+
*/
1547+
if (blockcount == 0) {
1548+
xfs_calc_default_atomic_ioend_reservation(mp, M_RES(mp));
1549+
return 0;
1550+
}
1551+
1552+
min_logblocks = xfs_calc_atomic_write_log_geometry(mp, blockcount,
1553+
&new_logres);
1554+
if (!min_logblocks || min_logblocks > mp->m_sb.sb_logblocks)
1555+
return -EINVAL;
1556+
1557+
M_RES(mp)->tr_atomic_ioend.tr_logres = new_logres;
1558+
return 0;
1559+
}

fs/xfs/libxfs/xfs_trans_resv.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,5 +122,9 @@ unsigned int xfs_calc_write_reservation_minlogsize(struct xfs_mount *mp);
122122
unsigned int xfs_calc_qm_dqalloc_reservation_minlogsize(struct xfs_mount *mp);
123123

124124
xfs_extlen_t xfs_calc_max_atomic_write_fsblocks(struct xfs_mount *mp);
125+
xfs_extlen_t xfs_calc_atomic_write_log_geometry(struct xfs_mount *mp,
126+
xfs_extlen_t blockcount, unsigned int *new_logres);
127+
int xfs_calc_atomic_write_reservation(struct xfs_mount *mp,
128+
xfs_extlen_t blockcount);
125129

126130
#endif /* __XFS_TRANS_RESV_H__ */

fs/xfs/xfs_mount.c

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,82 @@ xfs_calc_atomic_write_unit_max(
742742
max_agsize, max_rgsize);
743743
}
744744

745+
/*
746+
* Try to set the atomic write maximum to a new value that we got from
747+
* userspace via mount option.
748+
*/
749+
int
750+
xfs_set_max_atomic_write_opt(
751+
struct xfs_mount *mp,
752+
unsigned long long new_max_bytes)
753+
{
754+
const xfs_filblks_t new_max_fsbs = XFS_B_TO_FSBT(mp, new_max_bytes);
755+
const xfs_extlen_t max_write = xfs_calc_atomic_write_max(mp);
756+
const xfs_extlen_t max_group =
757+
max(mp->m_groups[XG_TYPE_AG].blocks,
758+
mp->m_groups[XG_TYPE_RTG].blocks);
759+
const xfs_extlen_t max_group_write =
760+
max(xfs_calc_perag_awu_max(mp), xfs_calc_rtgroup_awu_max(mp));
761+
int error;
762+
763+
if (new_max_bytes == 0)
764+
goto set_limit;
765+
766+
ASSERT(max_write <= U32_MAX);
767+
768+
/* generic_atomic_write_valid enforces power of two length */
769+
if (!is_power_of_2(new_max_bytes)) {
770+
xfs_warn(mp,
771+
"max atomic write size of %llu bytes is not a power of 2",
772+
new_max_bytes);
773+
return -EINVAL;
774+
}
775+
776+
if (new_max_bytes & mp->m_blockmask) {
777+
xfs_warn(mp,
778+
"max atomic write size of %llu bytes not aligned with fsblock",
779+
new_max_bytes);
780+
return -EINVAL;
781+
}
782+
783+
if (new_max_fsbs > max_write) {
784+
xfs_warn(mp,
785+
"max atomic write size of %lluk cannot be larger than max write size %lluk",
786+
new_max_bytes >> 10,
787+
XFS_FSB_TO_B(mp, max_write) >> 10);
788+
return -EINVAL;
789+
}
790+
791+
if (new_max_fsbs > max_group) {
792+
xfs_warn(mp,
793+
"max atomic write size of %lluk cannot be larger than allocation group size %lluk",
794+
new_max_bytes >> 10,
795+
XFS_FSB_TO_B(mp, max_group) >> 10);
796+
return -EINVAL;
797+
}
798+
799+
if (new_max_fsbs > max_group_write) {
800+
xfs_warn(mp,
801+
"max atomic write size of %lluk cannot be larger than max allocation group write size %lluk",
802+
new_max_bytes >> 10,
803+
XFS_FSB_TO_B(mp, max_group_write) >> 10);
804+
return -EINVAL;
805+
}
806+
807+
set_limit:
808+
error = xfs_calc_atomic_write_reservation(mp, new_max_fsbs);
809+
if (error) {
810+
xfs_warn(mp,
811+
"cannot support completing atomic writes of %lluk",
812+
new_max_bytes >> 10);
813+
return error;
814+
}
815+
816+
xfs_calc_atomic_write_unit_max(mp);
817+
mp->m_awu_max_bytes = new_max_bytes;
818+
return 0;
819+
}
820+
745821
/* Compute maximum possible height for realtime btree types for this fs. */
746822
static inline void
747823
xfs_rtbtree_compute_maxlevels(
@@ -1163,7 +1239,9 @@ xfs_mountfs(
11631239
* derived from transaction reservations, so we must do this after the
11641240
* log is fully initialized.
11651241
*/
1166-
xfs_calc_atomic_write_unit_max(mp);
1242+
error = xfs_set_max_atomic_write_opt(mp, mp->m_awu_max_bytes);
1243+
if (error)
1244+
goto out_agresv;
11671245

11681246
return 0;
11691247

fs/xfs/xfs_mount.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,9 @@ typedef struct xfs_mount {
237237
unsigned int m_max_open_zones;
238238
unsigned int m_zonegc_low_space;
239239

240+
/* max_atomic_write mount option value */
241+
unsigned long long m_awu_max_bytes;
242+
240243
/*
241244
* Bitsets of per-fs metadata that have been checked and/or are sick.
242245
* Callers must hold m_sb_lock to access these two fields.
@@ -804,4 +807,7 @@ static inline void xfs_mod_sb_delalloc(struct xfs_mount *mp, int64_t delta)
804807
percpu_counter_add(&mp->m_delalloc_blks, delta);
805808
}
806809

810+
int xfs_set_max_atomic_write_opt(struct xfs_mount *mp,
811+
unsigned long long new_max_bytes);
812+
807813
#endif /* __XFS_MOUNT_H__ */

fs/xfs/xfs_super.c

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ enum {
111111
Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
112112
Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
113113
Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, Opt_max_open_zones,
114-
Opt_lifetime, Opt_nolifetime,
114+
Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write,
115115
};
116116

117117
static const struct fs_parameter_spec xfs_fs_parameters[] = {
@@ -159,6 +159,7 @@ static const struct fs_parameter_spec xfs_fs_parameters[] = {
159159
fsparam_u32("max_open_zones", Opt_max_open_zones),
160160
fsparam_flag("lifetime", Opt_lifetime),
161161
fsparam_flag("nolifetime", Opt_nolifetime),
162+
fsparam_string("max_atomic_write", Opt_max_atomic_write),
162163
{}
163164
};
164165

@@ -241,6 +242,9 @@ xfs_fs_show_options(
241242

242243
if (mp->m_max_open_zones)
243244
seq_printf(m, ",max_open_zones=%u", mp->m_max_open_zones);
245+
if (mp->m_awu_max_bytes)
246+
seq_printf(m, ",max_atomic_write=%lluk",
247+
mp->m_awu_max_bytes >> 10);
244248

245249
return 0;
246250
}
@@ -1343,6 +1347,42 @@ suffix_kstrtoint(
13431347
return ret;
13441348
}
13451349

1350+
static int
1351+
suffix_kstrtoull(
1352+
const char *s,
1353+
unsigned int base,
1354+
unsigned long long *res)
1355+
{
1356+
int last, shift_left_factor = 0;
1357+
unsigned long long _res;
1358+
char *value;
1359+
int ret = 0;
1360+
1361+
value = kstrdup(s, GFP_KERNEL);
1362+
if (!value)
1363+
return -ENOMEM;
1364+
1365+
last = strlen(value) - 1;
1366+
if (value[last] == 'K' || value[last] == 'k') {
1367+
shift_left_factor = 10;
1368+
value[last] = '\0';
1369+
}
1370+
if (value[last] == 'M' || value[last] == 'm') {
1371+
shift_left_factor = 20;
1372+
value[last] = '\0';
1373+
}
1374+
if (value[last] == 'G' || value[last] == 'g') {
1375+
shift_left_factor = 30;
1376+
value[last] = '\0';
1377+
}
1378+
1379+
if (kstrtoull(value, base, &_res))
1380+
ret = -EINVAL;
1381+
kfree(value);
1382+
*res = _res << shift_left_factor;
1383+
return ret;
1384+
}
1385+
13461386
static inline void
13471387
xfs_fs_warn_deprecated(
13481388
struct fs_context *fc,
@@ -1527,6 +1567,14 @@ xfs_fs_parse_param(
15271567
case Opt_nolifetime:
15281568
parsing_mp->m_features |= XFS_FEAT_NOLIFETIME;
15291569
return 0;
1570+
case Opt_max_atomic_write:
1571+
if (suffix_kstrtoull(param->string, 10,
1572+
&parsing_mp->m_awu_max_bytes)) {
1573+
xfs_warn(parsing_mp,
1574+
"max atomic write size must be positive integer");
1575+
return -EINVAL;
1576+
}
1577+
return 0;
15301578
default:
15311579
xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
15321580
return -EINVAL;
@@ -2137,6 +2185,14 @@ xfs_fs_reconfigure(
21372185
if (error)
21382186
return error;
21392187

2188+
/* Validate new max_atomic_write option before making other changes */
2189+
if (mp->m_awu_max_bytes != new_mp->m_awu_max_bytes) {
2190+
error = xfs_set_max_atomic_write_opt(mp,
2191+
new_mp->m_awu_max_bytes);
2192+
if (error)
2193+
return error;
2194+
}
2195+
21402196
/* inode32 -> inode64 */
21412197
if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) {
21422198
mp->m_features &= ~XFS_FEAT_SMALL_INUMS;

fs/xfs/xfs_trace.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,39 @@ TRACE_EVENT(xfs_calc_max_atomic_write_fsblocks,
230230
__entry->blockcount)
231231
);
232232

233+
TRACE_EVENT(xfs_calc_max_atomic_write_log_geometry,
234+
TP_PROTO(struct xfs_mount *mp, unsigned int per_intent,
235+
unsigned int step_size, unsigned int blockcount,
236+
unsigned int min_logblocks, unsigned int logres),
237+
TP_ARGS(mp, per_intent, step_size, blockcount, min_logblocks, logres),
238+
TP_STRUCT__entry(
239+
__field(dev_t, dev)
240+
__field(unsigned int, per_intent)
241+
__field(unsigned int, step_size)
242+
__field(unsigned int, blockcount)
243+
__field(unsigned int, min_logblocks)
244+
__field(unsigned int, cur_logblocks)
245+
__field(unsigned int, logres)
246+
),
247+
TP_fast_assign(
248+
__entry->dev = mp->m_super->s_dev;
249+
__entry->per_intent = per_intent;
250+
__entry->step_size = step_size;
251+
__entry->blockcount = blockcount;
252+
__entry->min_logblocks = min_logblocks;
253+
__entry->cur_logblocks = mp->m_sb.sb_logblocks;
254+
__entry->logres = logres;
255+
),
256+
TP_printk("dev %d:%d per_intent %u step_size %u blockcount %u min_logblocks %u logblocks %u logres %u",
257+
MAJOR(__entry->dev), MINOR(__entry->dev),
258+
__entry->per_intent,
259+
__entry->step_size,
260+
__entry->blockcount,
261+
__entry->min_logblocks,
262+
__entry->cur_logblocks,
263+
__entry->logres)
264+
);
265+
233266
TRACE_EVENT(xlog_intent_recovery_failed,
234267
TP_PROTO(struct xfs_mount *mp, const struct xfs_defer_op_type *ops,
235268
int error),

0 commit comments

Comments
 (0)