Skip to content

Commit 9e0933c

Browse files
johnpgarryDarrick J. Wong
authored andcommitted
fs: iomap: Atomic write support
Support direct I/O atomic writes by producing a single bio with REQ_ATOMIC flag set. Initially FSes (XFS) should only support writing a single FS block atomically. As with any atomic write, we should produce a single bio which covers the complete write length. Reviewed-by: Christoph Hellwig <[email protected]> Reviewed-by: "Darrick J. Wong" <[email protected]> Signed-off-by: John Garry <[email protected]> Reviewed-by: Ritesh Harjani (IBM) <[email protected]> [djwong: clarify a couple of things in the docs] Signed-off-by: Darrick J. Wong <[email protected]>
1 parent a570bad commit 9e0933c

File tree

4 files changed

+52
-5
lines changed

4 files changed

+52
-5
lines changed

Documentation/filesystems/iomap/operations.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,21 @@ IOMAP_WRITE`` with any combination of the following enhancements:
513513
if the mapping is unwritten and the filesystem cannot handle zeroing
514514
the unaligned regions without exposing stale contents.
515515

516+
* ``IOMAP_ATOMIC``: This write is being issued with torn-write
517+
protection.
518+
Only a single bio can be created for the write, and the write must
519+
not be split into multiple I/O requests, i.e. flag REQ_ATOMIC must be
520+
set.
521+
The file range to write must be aligned to satisfy the requirements
522+
of both the filesystem and the underlying block device's atomic
523+
commit capabilities.
524+
If filesystem metadata updates are required (e.g. unwritten extent
525+
conversion or copy on write), all updates for the entire file range
526+
must be committed atomically as well.
527+
Only one space mapping is allowed per untorn write.
528+
Untorn writes must be aligned to, and must not be longer than, a
529+
single file block.
530+
516531
Callers commonly hold ``i_rwsem`` in shared or exclusive mode before
517532
calling this function.
518533

fs/iomap/direct-io.c

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
271271
* clearing the WRITE_THROUGH flag in the dio request.
272272
*/
273273
static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
274-
const struct iomap *iomap, bool use_fua)
274+
const struct iomap *iomap, bool use_fua, bool atomic)
275275
{
276276
blk_opf_t opflags = REQ_SYNC | REQ_IDLE;
277277

@@ -283,6 +283,8 @@ static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
283283
opflags |= REQ_FUA;
284284
else
285285
dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
286+
if (atomic)
287+
opflags |= REQ_ATOMIC;
286288

287289
return opflags;
288290
}
@@ -293,7 +295,8 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
293295
const struct iomap *iomap = &iter->iomap;
294296
struct inode *inode = iter->inode;
295297
unsigned int fs_block_size = i_blocksize(inode), pad;
296-
loff_t length = iomap_length(iter);
298+
const loff_t length = iomap_length(iter);
299+
bool atomic = iter->flags & IOMAP_ATOMIC;
297300
loff_t pos = iter->pos;
298301
blk_opf_t bio_opf;
299302
struct bio *bio;
@@ -303,6 +306,9 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
303306
size_t copied = 0;
304307
size_t orig_count;
305308

309+
if (atomic && length != fs_block_size)
310+
return -EINVAL;
311+
306312
if ((pos | length) & (bdev_logical_block_size(iomap->bdev) - 1) ||
307313
!bdev_iter_is_aligned(iomap->bdev, dio->submit.iter))
308314
return -EINVAL;
@@ -382,7 +388,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
382388
* can set up the page vector appropriately for a ZONE_APPEND
383389
* operation.
384390
*/
385-
bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua);
391+
bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua, atomic);
386392

387393
nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS);
388394
do {
@@ -415,6 +421,17 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
415421
}
416422

417423
n = bio->bi_iter.bi_size;
424+
if (WARN_ON_ONCE(atomic && n != length)) {
425+
/*
426+
* This bio should have covered the complete length,
427+
* which it doesn't, so error. We may need to zero out
428+
* the tail (complete FS block), similar to when
429+
* bio_iov_iter_get_pages() returns an error, above.
430+
*/
431+
ret = -EINVAL;
432+
bio_put(bio);
433+
goto zero_tail;
434+
}
418435
if (dio->flags & IOMAP_DIO_WRITE) {
419436
task_io_account_write(n);
420437
} else {
@@ -598,6 +615,9 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
598615
if (iocb->ki_flags & IOCB_NOWAIT)
599616
iomi.flags |= IOMAP_NOWAIT;
600617

618+
if (iocb->ki_flags & IOCB_ATOMIC)
619+
iomi.flags |= IOMAP_ATOMIC;
620+
601621
if (iov_iter_rw(iter) == READ) {
602622
/* reads can always complete inline */
603623
dio->flags |= IOMAP_DIO_INLINE_COMP;
@@ -659,7 +679,17 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
659679
if (ret != -EAGAIN) {
660680
trace_iomap_dio_invalidate_fail(inode, iomi.pos,
661681
iomi.len);
662-
ret = -ENOTBLK;
682+
if (iocb->ki_flags & IOCB_ATOMIC) {
683+
/*
684+
* folio invalidation failed, maybe
685+
* this is transient, unlock and see if
686+
* the caller tries again.
687+
*/
688+
ret = -EAGAIN;
689+
} else {
690+
/* fall back to buffered write */
691+
ret = -ENOTBLK;
692+
}
663693
}
664694
goto out_free_dio;
665695
}

fs/iomap/trace.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued);
9898
{ IOMAP_REPORT, "REPORT" }, \
9999
{ IOMAP_FAULT, "FAULT" }, \
100100
{ IOMAP_DIRECT, "DIRECT" }, \
101-
{ IOMAP_NOWAIT, "NOWAIT" }
101+
{ IOMAP_NOWAIT, "NOWAIT" }, \
102+
{ IOMAP_ATOMIC, "ATOMIC" }
102103

103104
#define IOMAP_F_FLAGS_STRINGS \
104105
{ IOMAP_F_NEW, "NEW" }, \

include/linux/iomap.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ struct iomap_folio_ops {
178178
#else
179179
#define IOMAP_DAX 0
180180
#endif /* CONFIG_FS_DAX */
181+
#define IOMAP_ATOMIC (1 << 9)
181182

182183
struct iomap_ops {
183184
/*

0 commit comments

Comments
 (0)