Skip to content

Commit bd1d2c2

Browse files
johnpgarryDarrick J. Wong
authored andcommitted
xfs: add xfs_atomic_write_cow_iomap_begin()
For CoW-based atomic writes, reuse the infrastructure for reflink CoW fork support. Add ->iomap_begin() callback xfs_atomic_write_cow_iomap_begin() to create staging mappings in the CoW fork for atomic write updates. The general steps in the function are as follows: - find extent mapping in the CoW fork for the FS block range being written - if part or full extent is found, proceed to process found extent - if no extent found, map in new blocks to the CoW fork - convert unwritten blocks in extent if required - update iomap extent mapping and return The bulk of this function is quite similar to the processing in xfs_reflink_allocate_cow(), where we try to find an extent mapping; if none exists, then allocate a new extent in the CoW fork, convert unwritten blocks, and return a mapping. Performance testing has shown the XFS_ILOCK_EXCL locking to be quite a bottleneck, so this is an area which could be optimised in future. Christoph Hellwig contributed almost all of the code in xfs_atomic_write_cow_iomap_begin(). Reviewed-by: Darrick J. Wong <[email protected]> [djwong: add a new xfs_can_sw_atomic_write to convey intent better] Signed-off-by: Darrick J. Wong <[email protected]> Reviewed-by: Christoph Hellwig <[email protected]> Signed-off-by: John Garry <[email protected]>
1 parent 0ea88ed commit bd1d2c2

File tree

6 files changed

+159
-1
lines changed

6 files changed

+159
-1
lines changed

fs/xfs/xfs_iomap.c

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,6 +1022,134 @@ const struct iomap_ops xfs_zoned_direct_write_iomap_ops = {
10221022
};
10231023
#endif /* CONFIG_XFS_RT */
10241024

1025+
static int
1026+
xfs_atomic_write_cow_iomap_begin(
1027+
struct inode *inode,
1028+
loff_t offset,
1029+
loff_t length,
1030+
unsigned flags,
1031+
struct iomap *iomap,
1032+
struct iomap *srcmap)
1033+
{
1034+
struct xfs_inode *ip = XFS_I(inode);
1035+
struct xfs_mount *mp = ip->i_mount;
1036+
const xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
1037+
xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length);
1038+
xfs_filblks_t count_fsb = end_fsb - offset_fsb;
1039+
int nmaps = 1;
1040+
xfs_filblks_t resaligned;
1041+
struct xfs_bmbt_irec cmap;
1042+
struct xfs_iext_cursor icur;
1043+
struct xfs_trans *tp;
1044+
unsigned int dblocks = 0, rblocks = 0;
1045+
int error;
1046+
u64 seq;
1047+
1048+
ASSERT(flags & IOMAP_WRITE);
1049+
ASSERT(flags & IOMAP_DIRECT);
1050+
1051+
if (xfs_is_shutdown(mp))
1052+
return -EIO;
1053+
1054+
if (!xfs_can_sw_atomic_write(mp)) {
1055+
ASSERT(xfs_can_sw_atomic_write(mp));
1056+
return -EINVAL;
1057+
}
1058+
1059+
/* blocks are always allocated in this path */
1060+
if (flags & IOMAP_NOWAIT)
1061+
return -EAGAIN;
1062+
1063+
trace_xfs_iomap_atomic_write_cow(ip, offset, length);
1064+
1065+
xfs_ilock(ip, XFS_ILOCK_EXCL);
1066+
1067+
if (!ip->i_cowfp) {
1068+
ASSERT(!xfs_is_reflink_inode(ip));
1069+
xfs_ifork_init_cow(ip);
1070+
}
1071+
1072+
if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap))
1073+
cmap.br_startoff = end_fsb;
1074+
if (cmap.br_startoff <= offset_fsb) {
1075+
xfs_trim_extent(&cmap, offset_fsb, count_fsb);
1076+
goto found;
1077+
}
1078+
1079+
end_fsb = cmap.br_startoff;
1080+
count_fsb = end_fsb - offset_fsb;
1081+
1082+
resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb,
1083+
xfs_get_cowextsz_hint(ip));
1084+
xfs_iunlock(ip, XFS_ILOCK_EXCL);
1085+
1086+
if (XFS_IS_REALTIME_INODE(ip)) {
1087+
dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
1088+
rblocks = resaligned;
1089+
} else {
1090+
dblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
1091+
rblocks = 0;
1092+
}
1093+
1094+
error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, dblocks,
1095+
rblocks, false, &tp);
1096+
if (error)
1097+
return error;
1098+
1099+
/* extent layout could have changed since the unlock, so check again */
1100+
if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap))
1101+
cmap.br_startoff = end_fsb;
1102+
if (cmap.br_startoff <= offset_fsb) {
1103+
xfs_trim_extent(&cmap, offset_fsb, count_fsb);
1104+
xfs_trans_cancel(tp);
1105+
goto found;
1106+
}
1107+
1108+
/*
1109+
* Allocate the entire reservation as unwritten blocks.
1110+
*
1111+
* Use XFS_BMAPI_EXTSZALIGN to hint at aligning new extents according to
1112+
* extszhint, such that there will be a greater chance that future
1113+
* atomic writes to that same range will be aligned (and don't require
1114+
* this COW-based method).
1115+
*/
1116+
error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
1117+
XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC |
1118+
XFS_BMAPI_EXTSZALIGN, 0, &cmap, &nmaps);
1119+
if (error) {
1120+
xfs_trans_cancel(tp);
1121+
goto out_unlock;
1122+
}
1123+
1124+
xfs_inode_set_cowblocks_tag(ip);
1125+
error = xfs_trans_commit(tp);
1126+
if (error)
1127+
goto out_unlock;
1128+
1129+
found:
1130+
if (cmap.br_state != XFS_EXT_NORM) {
1131+
error = xfs_reflink_convert_cow_locked(ip, offset_fsb,
1132+
count_fsb);
1133+
if (error)
1134+
goto out_unlock;
1135+
cmap.br_state = XFS_EXT_NORM;
1136+
}
1137+
1138+
length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
1139+
trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
1140+
seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
1141+
xfs_iunlock(ip, XFS_ILOCK_EXCL);
1142+
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq);
1143+
1144+
out_unlock:
1145+
xfs_iunlock(ip, XFS_ILOCK_EXCL);
1146+
return error;
1147+
}
1148+
1149+
const struct iomap_ops xfs_atomic_write_cow_iomap_ops = {
1150+
.iomap_begin = xfs_atomic_write_cow_iomap_begin,
1151+
};
1152+
10251153
static int
10261154
xfs_dax_write_iomap_end(
10271155
struct inode *inode,

fs/xfs/xfs_iomap.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,5 +56,6 @@ extern const struct iomap_ops xfs_read_iomap_ops;
5656
extern const struct iomap_ops xfs_seek_iomap_ops;
5757
extern const struct iomap_ops xfs_xattr_iomap_ops;
5858
extern const struct iomap_ops xfs_dax_write_iomap_ops;
59+
extern const struct iomap_ops xfs_atomic_write_cow_iomap_ops;
5960

6061
#endif /* __XFS_IOMAP_H__*/

fs/xfs/xfs_mount.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,11 @@ static inline bool xfs_has_nonzoned(const struct xfs_mount *mp)
464464
return !xfs_has_zoned(mp);
465465
}
466466

467+
static inline bool xfs_can_sw_atomic_write(struct xfs_mount *mp)
468+
{
469+
return xfs_has_reflink(mp);
470+
}
471+
467472
/*
468473
* Some features are always on for v5 file systems, allow the compiler to
469474
* eliminiate dead code when building without v4 support.

fs/xfs/xfs_reflink.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ xfs_bmap_trim_cow(
293293
return xfs_reflink_trim_around_shared(ip, imap, shared);
294294
}
295295

296-
static int
296+
int
297297
xfs_reflink_convert_cow_locked(
298298
struct xfs_inode *ip,
299299
xfs_fileoff_t offset_fsb,

fs/xfs/xfs_reflink.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ int xfs_reflink_allocate_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap,
3535
bool convert_now);
3636
extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
3737
xfs_off_t count);
38+
int xfs_reflink_convert_cow_locked(struct xfs_inode *ip,
39+
xfs_fileoff_t offset_fsb, xfs_filblks_t count_fsb);
3840

3941
extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
4042
struct xfs_trans **tpp, xfs_fileoff_t offset_fsb,

fs/xfs/xfs_trace.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1657,6 +1657,28 @@ DEFINE_RW_EVENT(xfs_file_direct_write);
16571657
DEFINE_RW_EVENT(xfs_file_dax_write);
16581658
DEFINE_RW_EVENT(xfs_reflink_bounce_dio_write);
16591659

1660+
TRACE_EVENT(xfs_iomap_atomic_write_cow,
1661+
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
1662+
TP_ARGS(ip, offset, count),
1663+
TP_STRUCT__entry(
1664+
__field(dev_t, dev)
1665+
__field(xfs_ino_t, ino)
1666+
__field(xfs_off_t, offset)
1667+
__field(ssize_t, count)
1668+
),
1669+
TP_fast_assign(
1670+
__entry->dev = VFS_I(ip)->i_sb->s_dev;
1671+
__entry->ino = ip->i_ino;
1672+
__entry->offset = offset;
1673+
__entry->count = count;
1674+
),
1675+
TP_printk("dev %d:%d ino 0x%llx pos 0x%llx bytecount 0x%zx",
1676+
MAJOR(__entry->dev), MINOR(__entry->dev),
1677+
__entry->ino,
1678+
__entry->offset,
1679+
__entry->count)
1680+
)
1681+
16601682
DECLARE_EVENT_CLASS(xfs_imap_class,
16611683
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
16621684
int whichfork, struct xfs_bmbt_irec *irec),

0 commit comments

Comments
 (0)