Skip to content

Commit 4abb905

Browse files
committed
Merge tag 'atomic-writes-6.16_2025-05-07' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into atomic_writes
large atomic writes for xfs [v12.1] Currently atomic write support for xfs is limited to writing a single block as we have no way to guarantee alignment and that the write covers a single extent. This series introduces a method to issue atomic writes via a software-based method. The software-based method is used as a fallback for when attempting to issue an atomic write over misaligned or multiple extents. For xfs, this support is based on reflink CoW support. The basic idea of this CoW method is to alloc a range in the CoW fork, write the data, and atomically update the mapping. Initial mysql performance testing has shown this method to perform ok. However, there we are only using 16K atomic writes (and 4K block size), so typically - and thankfully - this software fallback method won't be used often. For other FSes which want large atomics writes and don't support CoW, I think that they can follow the example in [0]. Catherine is currently working on further xfstests for this feature, which we hope to share soon. About 17/17, maybe it can be omitted as there is no strong demand to have it included. Based on bfecc40 (xfs/next-rc, xfs/for-next) xfs: allow ro mounts if rtdev or logdev are read-only [0] https://lore.kernel.org/linux-xfs/[email protected]/ Differences to v12: - add more review tags Differences to v11: - split "xfs: ignore ..." patch - inline sync_blockdev() in xfs_alloc_buftarg() (Christoph) - fix xfs_calc_rtgroup_awu_max() for 0 block count (Darrick) - Add RB tag from Christoph (thanks!) Differences to v10: - add "xfs: only call xfs_setsize_buftarg once ..." by Darrick - symbol renames in "xfs: ignore HW which cannot..." by Darrick Differences to v9: - rework "ignore HW which cannot .." patch by Darrick - Ensure power-of-2 max always for unit min/max when no HW support With a bit of luck, this should all go splendidly. Signed-off-by: "Darrick J. Wong" <[email protected]>
2 parents 23be716 + 4528b90 commit 4abb905

38 files changed

+1351
-127
lines changed

Documentation/admin-guide/xfs.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,17 @@ When mounting an XFS filesystem, the following options are accepted.
151151
optional, and the log section can be separate from the data
152152
section or contained within it.
153153

154+
max_atomic_write=value
155+
Set the maximum size of an atomic write. The size may be
156+
specified in bytes, in kilobytes with a "k" suffix, in megabytes
157+
with a "m" suffix, or in gigabytes with a "g" suffix. The size
158+
cannot be larger than the maximum write size, larger than the
159+
size of any allocation group, or larger than the size of a
160+
remapping operation that the log can complete atomically.
161+
162+
The default value is to set the maximum I/O completion size
163+
to allow each CPU to handle one at a time.
164+
154165
max_open_zones=value
155166
Specify the max number of zones to keep open for writing on a
156167
zoned rt device. Many open zones aids file data separation

block/bdev.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1336,7 +1336,8 @@ void bdev_statx(struct path *path, struct kstat *stat,
13361336

13371337
generic_fill_statx_atomic_writes(stat,
13381338
queue_atomic_write_unit_min_bytes(bd_queue),
1339-
queue_atomic_write_unit_max_bytes(bd_queue));
1339+
queue_atomic_write_unit_max_bytes(bd_queue),
1340+
0);
13401341
}
13411342

13421343
stat->blksize = bdev_io_min(bdev);

fs/ext4/inode.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5692,7 +5692,7 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct path *path,
56925692
awu_max = sbi->s_awu_max;
56935693
}
56945694

5695-
generic_fill_statx_atomic_writes(stat, awu_min, awu_max);
5695+
generic_fill_statx_atomic_writes(stat, awu_min, awu_max, 0);
56965696
}
56975697

56985698
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;

fs/stat.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,13 +136,15 @@ EXPORT_SYMBOL(generic_fill_statx_attr);
136136
* @stat: Where to fill in the attribute flags
137137
* @unit_min: Minimum supported atomic write length in bytes
138138
* @unit_max: Maximum supported atomic write length in bytes
139+
* @unit_max_opt: Optimised maximum supported atomic write length in bytes
139140
*
140141
* Fill in the STATX{_ATTR}_WRITE_ATOMIC flags in the kstat structure from
141142
* atomic write unit_min and unit_max values.
142143
*/
143144
void generic_fill_statx_atomic_writes(struct kstat *stat,
144145
unsigned int unit_min,
145-
unsigned int unit_max)
146+
unsigned int unit_max,
147+
unsigned int unit_max_opt)
146148
{
147149
/* Confirm that the request type is known */
148150
stat->result_mask |= STATX_WRITE_ATOMIC;
@@ -153,6 +155,7 @@ void generic_fill_statx_atomic_writes(struct kstat *stat,
153155
if (unit_min) {
154156
stat->atomic_write_unit_min = unit_min;
155157
stat->atomic_write_unit_max = unit_max;
158+
stat->atomic_write_unit_max_opt = unit_max_opt;
156159
/* Initially only allow 1x segment */
157160
stat->atomic_write_segments_max = 1;
158161

@@ -732,6 +735,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
732735
tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min;
733736
tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max;
734737
tmp.stx_atomic_write_segments_max = stat->atomic_write_segments_max;
738+
tmp.stx_atomic_write_unit_max_opt = stat->atomic_write_unit_max_opt;
735739

736740
return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
737741
}

fs/xfs/libxfs/xfs_bmap.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3312,6 +3312,11 @@ xfs_bmap_compute_alignments(
33123312
align = xfs_get_cowextsz_hint(ap->ip);
33133313
else if (ap->datatype & XFS_ALLOC_USERDATA)
33143314
align = xfs_get_extsz_hint(ap->ip);
3315+
3316+
/* Try to align start block to any minimum allocation alignment */
3317+
if (align > 1 && (ap->flags & XFS_BMAPI_EXTSZALIGN))
3318+
args->alignment = align;
3319+
33153320
if (align) {
33163321
if (xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0,
33173322
ap->eof, 0, ap->conv, &ap->offset,

fs/xfs/libxfs/xfs_bmap.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ struct xfs_bmalloca {
8787
/* Do not update the rmap btree. Used for reconstructing bmbt from rmapbt. */
8888
#define XFS_BMAPI_NORMAP (1u << 10)
8989

90+
/* Try to align allocations to the extent size hint */
91+
#define XFS_BMAPI_EXTSZALIGN (1u << 11)
92+
9093
#define XFS_BMAPI_FLAGS \
9194
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
9295
{ XFS_BMAPI_METADATA, "METADATA" }, \
@@ -98,7 +101,8 @@ struct xfs_bmalloca {
98101
{ XFS_BMAPI_REMAP, "REMAP" }, \
99102
{ XFS_BMAPI_COWFORK, "COWFORK" }, \
100103
{ XFS_BMAPI_NODISCARD, "NODISCARD" }, \
101-
{ XFS_BMAPI_NORMAP, "NORMAP" }
104+
{ XFS_BMAPI_NORMAP, "NORMAP" },\
105+
{ XFS_BMAPI_EXTSZALIGN, "EXTSZALIGN" }
102106

103107

104108
static inline int xfs_bmapi_aflag(int w)

fs/xfs/libxfs/xfs_log_rlimit.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ xfs_log_calc_trans_resv_for_minlogblocks(
9191
*/
9292
if (xfs_want_minlogsize_fixes(&mp->m_sb)) {
9393
xfs_trans_resv_calc(mp, resv);
94+
resv->tr_atomic_ioend = M_RES(mp)->tr_atomic_ioend;
9495
return;
9596
}
9697

@@ -107,6 +108,9 @@ xfs_log_calc_trans_resv_for_minlogblocks(
107108

108109
xfs_trans_resv_calc(mp, resv);
109110

111+
/* Copy the dynamic transaction reservation types from the running fs */
112+
resv->tr_atomic_ioend = M_RES(mp)->tr_atomic_ioend;
113+
110114
if (xfs_has_reflink(mp)) {
111115
/*
112116
* In the early days of reflink, typical log operation counts

0 commit comments

Comments
 (0)