Skip to content

Commit c039b99

Browse files
goldwynrdjwong
authored andcommitted
iomap: use a srcmap for a read-modify-write I/O
The srcmap is used to identify where the read is to be performed from. It is passed to ->iomap_begin, which can fill it in if we need to read data for partially written blocks from a different location than the write target. The srcmap is only supported for buffered writes so far. Signed-off-by: Goldwyn Rodrigues <[email protected]> [hch: merged two patches, removed the IOMAP_F_COW flag, use iomap as srcmap if not set, adjust length down to srcmap end as well] Signed-off-by: Christoph Hellwig <[email protected]> Reviewed-by: Darrick J. Wong <[email protected]> Signed-off-by: Darrick J. Wong <[email protected]> Acked-by: Goldwyn Rodrigues <[email protected]>
1 parent eb81cf9 commit c039b99

File tree

12 files changed

+82
-55
lines changed

12 files changed

+82
-55
lines changed

fs/dax.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,7 +1090,7 @@ EXPORT_SYMBOL_GPL(__dax_zero_page_range);
10901090

10911091
static loff_t
10921092
dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
1093-
struct iomap *iomap)
1093+
struct iomap *iomap, struct iomap *srcmap)
10941094
{
10951095
struct block_device *bdev = iomap->bdev;
10961096
struct dax_device *dax_dev = iomap->dax_dev;
@@ -1247,7 +1247,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
12471247
struct inode *inode = mapping->host;
12481248
unsigned long vaddr = vmf->address;
12491249
loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
1250-
struct iomap iomap = { 0 };
1250+
struct iomap iomap = { .type = IOMAP_HOLE };
1251+
struct iomap srcmap = { .type = IOMAP_HOLE };
12511252
unsigned flags = IOMAP_FAULT;
12521253
int error, major = 0;
12531254
bool write = vmf->flags & FAULT_FLAG_WRITE;
@@ -1292,7 +1293,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
12921293
* the file system block size to be equal the page size, which means
12931294
* that we never have to deal with more than a single extent here.
12941295
*/
1295-
error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
1296+
error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap, &srcmap);
12961297
if (iomap_errp)
12971298
*iomap_errp = error;
12981299
if (error) {
@@ -1471,7 +1472,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
14711472
unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
14721473
struct inode *inode = mapping->host;
14731474
vm_fault_t result = VM_FAULT_FALLBACK;
1474-
struct iomap iomap = { 0 };
1475+
struct iomap iomap = { .type = IOMAP_HOLE };
1476+
struct iomap srcmap = { .type = IOMAP_HOLE };
14751477
pgoff_t max_pgoff;
14761478
void *entry;
14771479
loff_t pos;
@@ -1546,7 +1548,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
15461548
* to look up our filesystem block.
15471549
*/
15481550
pos = (loff_t)xas.xa_index << PAGE_SHIFT;
1549-
error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
1551+
error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap,
1552+
&srcmap);
15501553
if (error)
15511554
goto unlock_entry;
15521555

fs/ext2/inode.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -801,7 +801,7 @@ int ext2_get_block(struct inode *inode, sector_t iblock,
801801

802802
#ifdef CONFIG_FS_DAX
803803
static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
804-
unsigned flags, struct iomap *iomap)
804+
unsigned flags, struct iomap *iomap, struct iomap *srcmap)
805805
{
806806
unsigned int blkbits = inode->i_blkbits;
807807
unsigned long first_block = offset >> blkbits;

fs/ext4/inode.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3407,7 +3407,7 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
34073407
}
34083408

34093409
static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
3410-
unsigned flags, struct iomap *iomap)
3410+
unsigned flags, struct iomap *iomap, struct iomap *srcmap)
34113411
{
34123412
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
34133413
unsigned int blkbits = inode->i_blkbits;

fs/gfs2/bmap.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1149,7 +1149,8 @@ static inline bool gfs2_iomap_need_write_lock(unsigned flags)
11491149
}
11501150

11511151
static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
1152-
unsigned flags, struct iomap *iomap)
1152+
unsigned flags, struct iomap *iomap,
1153+
struct iomap *srcmap)
11531154
{
11541155
struct gfs2_inode *ip = GFS2_I(inode);
11551156
struct metapath mp = { .mp_aheight = 1, };

fs/iomap/apply.c

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,10 @@ loff_t
2323
iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
2424
const struct iomap_ops *ops, void *data, iomap_actor_t actor)
2525
{
26-
struct iomap iomap = { 0 };
26+
struct iomap iomap = { .type = IOMAP_HOLE };
27+
struct iomap srcmap = { .type = IOMAP_HOLE };
2728
loff_t written = 0, ret;
29+
u64 end;
2830

2931
/*
3032
* Need to map a range from start position for length bytes. This can
@@ -38,7 +40,7 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
3840
* expose transient stale data. If the reserve fails, we can safely
3941
* back out at this point as there is nothing to undo.
4042
*/
41-
ret = ops->iomap_begin(inode, pos, length, flags, &iomap);
43+
ret = ops->iomap_begin(inode, pos, length, flags, &iomap, &srcmap);
4244
if (ret)
4345
return ret;
4446
if (WARN_ON(iomap.offset > pos))
@@ -50,15 +52,26 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags,
5052
* Cut down the length to the one actually provided by the filesystem,
5153
* as it might not be able to give us the whole size that we requested.
5254
*/
53-
if (iomap.offset + iomap.length < pos + length)
54-
length = iomap.offset + iomap.length - pos;
55+
end = iomap.offset + iomap.length;
56+
if (srcmap.type != IOMAP_HOLE)
57+
end = min(end, srcmap.offset + srcmap.length);
58+
if (pos + length > end)
59+
length = end - pos;
5560

5661
/*
57-
* Now that we have guaranteed that the space allocation will succeed.
62+
* Now that we have guaranteed that the space allocation will succeed,
5863
* we can do the copy-in page by page without having to worry about
5964
* failures exposing transient data.
65+
*
66+
* To support COW operations, we read in data for partially blocks from
67+
* the srcmap if the file system filled it in. In that case we the
68+
* length needs to be limited to the earlier of the ends of the iomaps.
69+
* If the file system did not provide a srcmap we pass in the normal
70+
* iomap into the actors so that they don't need to have special
71+
* handling for the two cases.
6072
*/
61-
written = actor(inode, pos, length, data, &iomap);
73+
written = actor(inode, pos, length, data, &iomap,
74+
srcmap.type != IOMAP_HOLE ? &srcmap : &iomap);
6275

6376
/*
6477
* Now the data has been copied, commit the range we've copied. This

fs/iomap/buffered-io.c

Lines changed: 35 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ static inline bool iomap_block_needs_zeroing(struct inode *inode,
234234

235235
static loff_t
236236
iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
237-
struct iomap *iomap)
237+
struct iomap *iomap, struct iomap *srcmap)
238238
{
239239
struct iomap_readpage_ctx *ctx = data;
240240
struct page *page = ctx->cur_page;
@@ -382,7 +382,7 @@ iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos,
382382

383383
static loff_t
384384
iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
385-
void *data, struct iomap *iomap)
385+
void *data, struct iomap *iomap, struct iomap *srcmap)
386386
{
387387
struct iomap_readpage_ctx *ctx = data;
388388
loff_t done, ret;
@@ -402,7 +402,7 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length,
402402
ctx->cur_page_in_bio = false;
403403
}
404404
ret = iomap_readpage_actor(inode, pos + done, length - done,
405-
ctx, iomap);
405+
ctx, iomap, srcmap);
406406
}
407407

408408
return done;
@@ -582,7 +582,7 @@ iomap_read_page_sync(loff_t block_start, struct page *page, unsigned poff,
582582

583583
static int
584584
__iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
585-
struct page *page, struct iomap *iomap)
585+
struct page *page, struct iomap *srcmap)
586586
{
587587
struct iomap_page *iop = iomap_page_create(inode, page);
588588
loff_t block_size = i_blocksize(inode);
@@ -605,7 +605,7 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
605605
(to <= poff || to >= poff + plen))
606606
continue;
607607

608-
if (iomap_block_needs_zeroing(inode, iomap, block_start)) {
608+
if (iomap_block_needs_zeroing(inode, srcmap, block_start)) {
609609
if (WARN_ON_ONCE(flags & IOMAP_WRITE_F_UNSHARE))
610610
return -EIO;
611611
zero_user_segments(page, poff, from, to, poff + plen);
@@ -614,7 +614,7 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
614614
}
615615

616616
status = iomap_read_page_sync(block_start, page, poff, plen,
617-
iomap);
617+
srcmap);
618618
if (status)
619619
return status;
620620
} while ((block_start += plen) < block_end);
@@ -624,13 +624,15 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags,
624624

625625
static int
626626
iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
627-
struct page **pagep, struct iomap *iomap)
627+
struct page **pagep, struct iomap *iomap, struct iomap *srcmap)
628628
{
629629
const struct iomap_page_ops *page_ops = iomap->page_ops;
630630
struct page *page;
631631
int status = 0;
632632

633633
BUG_ON(pos + len > iomap->offset + iomap->length);
634+
if (srcmap != iomap)
635+
BUG_ON(pos + len > srcmap->offset + srcmap->length);
634636

635637
if (fatal_signal_pending(current))
636638
return -EINTR;
@@ -648,13 +650,13 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags,
648650
goto out_no_page;
649651
}
650652

651-
if (iomap->type == IOMAP_INLINE)
652-
iomap_read_inline_data(inode, page, iomap);
653+
if (srcmap->type == IOMAP_INLINE)
654+
iomap_read_inline_data(inode, page, srcmap);
653655
else if (iomap->flags & IOMAP_F_BUFFER_HEAD)
654-
status = __block_write_begin_int(page, pos, len, NULL, iomap);
656+
status = __block_write_begin_int(page, pos, len, NULL, srcmap);
655657
else
656658
status = __iomap_write_begin(inode, pos, len, flags, page,
657-
iomap);
659+
srcmap);
658660

659661
if (unlikely(status))
660662
goto out_unlock;
@@ -740,16 +742,16 @@ iomap_write_end_inline(struct inode *inode, struct page *page,
740742
}
741743

742744
static int
743-
iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
744-
unsigned copied, struct page *page, struct iomap *iomap)
745+
iomap_write_end(struct inode *inode, loff_t pos, unsigned len, unsigned copied,
746+
struct page *page, struct iomap *iomap, struct iomap *srcmap)
745747
{
746748
const struct iomap_page_ops *page_ops = iomap->page_ops;
747749
loff_t old_size = inode->i_size;
748750
int ret;
749751

750-
if (iomap->type == IOMAP_INLINE) {
752+
if (srcmap->type == IOMAP_INLINE) {
751753
ret = iomap_write_end_inline(inode, page, iomap, pos, copied);
752-
} else if (iomap->flags & IOMAP_F_BUFFER_HEAD) {
754+
} else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
753755
ret = block_write_end(NULL, inode->i_mapping, pos, len, copied,
754756
page, NULL);
755757
} else {
@@ -780,7 +782,7 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
780782

781783
static loff_t
782784
iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
783-
struct iomap *iomap)
785+
struct iomap *iomap, struct iomap *srcmap)
784786
{
785787
struct iov_iter *i = data;
786788
long status = 0;
@@ -814,7 +816,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
814816
break;
815817
}
816818

817-
status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap);
819+
status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap,
820+
srcmap);
818821
if (unlikely(status))
819822
break;
820823

@@ -825,8 +828,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
825828

826829
flush_dcache_page(page);
827830

828-
status = iomap_write_end(inode, pos, bytes, copied, page,
829-
iomap);
831+
status = iomap_write_end(inode, pos, bytes, copied, page, iomap,
832+
srcmap);
830833
if (unlikely(status < 0))
831834
break;
832835
copied = status;
@@ -879,7 +882,7 @@ EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
879882

880883
static loff_t
881884
iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
882-
struct iomap *iomap)
885+
struct iomap *iomap, struct iomap *srcmap)
883886
{
884887
long status = 0;
885888
ssize_t written = 0;
@@ -888,7 +891,7 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
888891
if (!(iomap->flags & IOMAP_F_SHARED))
889892
return length;
890893
/* don't bother with holes or unwritten extents */
891-
if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
894+
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
892895
return length;
893896

894897
do {
@@ -897,11 +900,12 @@ iomap_unshare_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
897900
struct page *page;
898901

899902
status = iomap_write_begin(inode, pos, bytes,
900-
IOMAP_WRITE_F_UNSHARE, &page, iomap);
903+
IOMAP_WRITE_F_UNSHARE, &page, iomap, srcmap);
901904
if (unlikely(status))
902905
return status;
903906

904-
status = iomap_write_end(inode, pos, bytes, bytes, page, iomap);
907+
status = iomap_write_end(inode, pos, bytes, bytes, page, iomap,
908+
srcmap);
905909
if (unlikely(status <= 0)) {
906910
if (WARN_ON_ONCE(status == 0))
907911
return -EIO;
@@ -940,19 +944,19 @@ iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
940944
EXPORT_SYMBOL_GPL(iomap_file_unshare);
941945

942946
static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
943-
unsigned bytes, struct iomap *iomap)
947+
unsigned bytes, struct iomap *iomap, struct iomap *srcmap)
944948
{
945949
struct page *page;
946950
int status;
947951

948-
status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap);
952+
status = iomap_write_begin(inode, pos, bytes, 0, &page, iomap, srcmap);
949953
if (status)
950954
return status;
951955

952956
zero_user(page, offset, bytes);
953957
mark_page_accessed(page);
954958

955-
return iomap_write_end(inode, pos, bytes, bytes, page, iomap);
959+
return iomap_write_end(inode, pos, bytes, bytes, page, iomap, srcmap);
956960
}
957961

958962
static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
@@ -964,14 +968,14 @@ static int iomap_dax_zero(loff_t pos, unsigned offset, unsigned bytes,
964968

965969
static loff_t
966970
iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
967-
void *data, struct iomap *iomap)
971+
void *data, struct iomap *iomap, struct iomap *srcmap)
968972
{
969973
bool *did_zero = data;
970974
loff_t written = 0;
971975
int status;
972976

973977
/* already zeroed? we're done. */
974-
if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
978+
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
975979
return count;
976980

977981
do {
@@ -983,7 +987,8 @@ iomap_zero_range_actor(struct inode *inode, loff_t pos, loff_t count,
983987
if (IS_DAX(inode))
984988
status = iomap_dax_zero(pos, offset, bytes, iomap);
985989
else
986-
status = iomap_zero(inode, pos, offset, bytes, iomap);
990+
status = iomap_zero(inode, pos, offset, bytes, iomap,
991+
srcmap);
987992
if (status < 0)
988993
return status;
989994

@@ -1033,7 +1038,7 @@ EXPORT_SYMBOL_GPL(iomap_truncate_page);
10331038

10341039
static loff_t
10351040
iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
1036-
void *data, struct iomap *iomap)
1041+
void *data, struct iomap *iomap, struct iomap *srcmap)
10371042
{
10381043
struct page *page = data;
10391044
int ret;

fs/iomap/direct-io.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length,
358358

359359
static loff_t
360360
iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
361-
void *data, struct iomap *iomap)
361+
void *data, struct iomap *iomap, struct iomap *srcmap)
362362
{
363363
struct iomap_dio *dio = data;
364364

fs/iomap/fiemap.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ static int iomap_to_fiemap(struct fiemap_extent_info *fi,
4444

4545
static loff_t
4646
iomap_fiemap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
47-
struct iomap *iomap)
47+
struct iomap *iomap, struct iomap *srcmap)
4848
{
4949
struct fiemap_ctx *ctx = data;
5050
loff_t ret = length;
@@ -111,7 +111,7 @@ EXPORT_SYMBOL_GPL(iomap_fiemap);
111111

112112
static loff_t
113113
iomap_bmap_actor(struct inode *inode, loff_t pos, loff_t length,
114-
void *data, struct iomap *iomap)
114+
void *data, struct iomap *iomap, struct iomap *srcmap)
115115
{
116116
sector_t *bno = data, addr;
117117

0 commit comments

Comments
 (0)