Skip to content

Commit a2e4339

Browse files
committed
Merge tag 'vfs-6.16-rc1.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull iomap updates from Christian Brauner: - More fallout and preparatory work associated with the folio batch prototype posted a while back. Mainly this just cleans up some of the helpers and pushes some pos/len trimming further down in the write begin path. - Add missing flag descriptions to the iomap documentation * tag 'vfs-6.16-rc1.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: iomap: rework iomap_write_begin() to return folio offset and length iomap: push non-large folio check into get folio path iomap: helper to trim pos/bytes to within folio iomap: drop pos param from __iomap_[get|put]_folio() iomap: drop unnecessary pos param from iomap_write_[begin|end] iomap: resample iter->pos after iomap_write_begin() calls iomap: trace: Add missing flags to [IOMAP_|IOMAP_F_]FLAGS_STRINGS Documentation: iomap: Add missing flags description
2 parents c5bfc48 + 2cb0e96 commit a2e4339

File tree

3 files changed

+93
-50
lines changed

3 files changed

+93
-50
lines changed

Documentation/filesystems/iomap/design.rst

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,13 +243,25 @@ The fields are as follows:
243243
regular file data.
244244
This is only useful for FIEMAP.
245245

246-
* **IOMAP_F_PRIVATE**: Starting with this value, the upper bits can
247-
be set by the filesystem for its own purposes.
246+
* **IOMAP_F_BOUNDARY**: This indicates I/O and its completion must not be
247+
merged with any other I/O or completion. Filesystems must use this when
248+
submitting I/O to devices that cannot handle I/O crossing certain LBAs
249+
(e.g. ZNS devices). This flag applies only to buffered I/O writeback; all
250+
other functions ignore it.
251+
252+
* **IOMAP_F_PRIVATE**: This flag is reserved for filesystem private use.
248253

249254
* **IOMAP_F_ANON_WRITE**: Indicates that (write) I/O does not have a target
250255
block assigned to it yet and the file system will do that in the bio
251256
submission handler, splitting the I/O as needed.
252257

258+
* **IOMAP_F_ATOMIC_BIO**: This indicates write I/O must be submitted with the
259+
``REQ_ATOMIC`` flag set in the bio. Filesystems need to set this flag to
260+
inform iomap that the write I/O operation requires torn-write protection
261+
based on HW-offload mechanism. They must also ensure that mapping updates
262+
upon the completion of the I/O must be performed in a single metadata
263+
update.
264+
253265
These flags can be set by iomap itself during file operations.
254266
The filesystem should supply an ``->iomap_end`` function if it needs
255267
to observe these flags:

fs/iomap/buffered-io.c

Lines changed: 58 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -679,11 +679,12 @@ static int iomap_read_folio_sync(loff_t block_start, struct folio *folio,
679679
return submit_bio_wait(&bio);
680680
}
681681

682-
static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
683-
size_t len, struct folio *folio)
682+
static int __iomap_write_begin(const struct iomap_iter *iter, size_t len,
683+
struct folio *folio)
684684
{
685685
const struct iomap *srcmap = iomap_iter_srcmap(iter);
686686
struct iomap_folio_state *ifs;
687+
loff_t pos = iter->pos;
687688
loff_t block_size = i_blocksize(iter->inode);
688689
loff_t block_start = round_down(pos, block_size);
689690
loff_t block_end = round_up(pos + len, block_size);
@@ -741,21 +742,25 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
741742
return 0;
742743
}
743744

744-
static struct folio *__iomap_get_folio(struct iomap_iter *iter, loff_t pos,
745-
size_t len)
745+
static struct folio *__iomap_get_folio(struct iomap_iter *iter, size_t len)
746746
{
747747
const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;
748+
loff_t pos = iter->pos;
749+
750+
if (!mapping_large_folio_support(iter->inode->i_mapping))
751+
len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));
748752

749753
if (folio_ops && folio_ops->get_folio)
750754
return folio_ops->get_folio(iter, pos, len);
751755
else
752756
return iomap_get_folio(iter, pos, len);
753757
}
754758

755-
static void __iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
759+
static void __iomap_put_folio(struct iomap_iter *iter, size_t ret,
756760
struct folio *folio)
757761
{
758762
const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;
763+
loff_t pos = iter->pos;
759764

760765
if (folio_ops && folio_ops->put_folio) {
761766
folio_ops->put_folio(iter->inode, pos, ret, folio);
@@ -765,6 +770,22 @@ static void __iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
765770
}
766771
}
767772

773+
/* trim pos and bytes to within a given folio */
774+
static loff_t iomap_trim_folio_range(struct iomap_iter *iter,
775+
struct folio *folio, size_t *offset, u64 *bytes)
776+
{
777+
loff_t pos = iter->pos;
778+
size_t fsize = folio_size(folio);
779+
780+
WARN_ON_ONCE(pos < folio_pos(folio));
781+
WARN_ON_ONCE(pos >= folio_pos(folio) + fsize);
782+
783+
*offset = offset_in_folio(folio, pos);
784+
*bytes = min(*bytes, fsize - *offset);
785+
786+
return pos;
787+
}
788+
768789
static int iomap_write_begin_inline(const struct iomap_iter *iter,
769790
struct folio *folio)
770791
{
@@ -774,25 +795,30 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter,
774795
return iomap_read_inline_data(iter, folio);
775796
}
776797

777-
static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
778-
size_t len, struct folio **foliop)
798+
/*
799+
* Grab and prepare a folio for write based on iter state. Returns the folio,
800+
* offset, and length. Callers can optionally pass a max length *plen,
801+
* otherwise init to zero.
802+
*/
803+
static int iomap_write_begin(struct iomap_iter *iter, struct folio **foliop,
804+
size_t *poffset, u64 *plen)
779805
{
780806
const struct iomap_folio_ops *folio_ops = iter->iomap.folio_ops;
781807
const struct iomap *srcmap = iomap_iter_srcmap(iter);
808+
loff_t pos = iter->pos;
809+
u64 len = min_t(u64, SIZE_MAX, iomap_length(iter));
782810
struct folio *folio;
783811
int status = 0;
784812

813+
len = min_not_zero(len, *plen);
785814
BUG_ON(pos + len > iter->iomap.offset + iter->iomap.length);
786815
if (srcmap != &iter->iomap)
787816
BUG_ON(pos + len > srcmap->offset + srcmap->length);
788817

789818
if (fatal_signal_pending(current))
790819
return -EINTR;
791820

792-
if (!mapping_large_folio_support(iter->inode->i_mapping))
793-
len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos));
794-
795-
folio = __iomap_get_folio(iter, pos, len);
821+
folio = __iomap_get_folio(iter, len);
796822
if (IS_ERR(folio))
797823
return PTR_ERR(folio);
798824

@@ -816,24 +842,24 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
816842
}
817843
}
818844

819-
if (pos + len > folio_pos(folio) + folio_size(folio))
820-
len = folio_pos(folio) + folio_size(folio) - pos;
845+
pos = iomap_trim_folio_range(iter, folio, poffset, &len);
821846

822847
if (srcmap->type == IOMAP_INLINE)
823848
status = iomap_write_begin_inline(iter, folio);
824849
else if (srcmap->flags & IOMAP_F_BUFFER_HEAD)
825850
status = __block_write_begin_int(folio, pos, len, NULL, srcmap);
826851
else
827-
status = __iomap_write_begin(iter, pos, len, folio);
852+
status = __iomap_write_begin(iter, len, folio);
828853

829854
if (unlikely(status))
830855
goto out_unlock;
831856

832857
*foliop = folio;
858+
*plen = len;
833859
return 0;
834860

835861
out_unlock:
836-
__iomap_put_folio(iter, pos, 0, folio);
862+
__iomap_put_folio(iter, 0, folio);
837863

838864
return status;
839865
}
@@ -883,10 +909,11 @@ static void iomap_write_end_inline(const struct iomap_iter *iter,
883909
* Returns true if all copied bytes have been written to the pagecache,
884910
* otherwise return false.
885911
*/
886-
static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
887-
size_t copied, struct folio *folio)
912+
static bool iomap_write_end(struct iomap_iter *iter, size_t len, size_t copied,
913+
struct folio *folio)
888914
{
889915
const struct iomap *srcmap = iomap_iter_srcmap(iter);
916+
loff_t pos = iter->pos;
890917

891918
if (srcmap->type == IOMAP_INLINE) {
892919
iomap_write_end_inline(iter, folio, pos, copied);
@@ -917,14 +944,14 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
917944
struct folio *folio;
918945
loff_t old_size;
919946
size_t offset; /* Offset into folio */
920-
size_t bytes; /* Bytes to write to folio */
947+
u64 bytes; /* Bytes to write to folio */
921948
size_t copied; /* Bytes copied from user */
922949
u64 written; /* Bytes have been written */
923-
loff_t pos = iter->pos;
950+
loff_t pos;
924951

925952
bytes = iov_iter_count(i);
926953
retry:
927-
offset = pos & (chunk - 1);
954+
offset = iter->pos & (chunk - 1);
928955
bytes = min(chunk - offset, bytes);
929956
status = balance_dirty_pages_ratelimited_flags(mapping,
930957
bdp_flags);
@@ -949,23 +976,21 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
949976
break;
950977
}
951978

952-
status = iomap_write_begin(iter, pos, bytes, &folio);
979+
status = iomap_write_begin(iter, &folio, &offset, &bytes);
953980
if (unlikely(status)) {
954-
iomap_write_failed(iter->inode, pos, bytes);
981+
iomap_write_failed(iter->inode, iter->pos, bytes);
955982
break;
956983
}
957984
if (iter->iomap.flags & IOMAP_F_STALE)
958985
break;
959986

960-
offset = offset_in_folio(folio, pos);
961-
if (bytes > folio_size(folio) - offset)
962-
bytes = folio_size(folio) - offset;
987+
pos = iter->pos;
963988

964989
if (mapping_writably_mapped(mapping))
965990
flush_dcache_folio(folio);
966991

967992
copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
968-
written = iomap_write_end(iter, pos, bytes, copied, folio) ?
993+
written = iomap_write_end(iter, bytes, copied, folio) ?
969994
copied : 0;
970995

971996
/*
@@ -980,7 +1005,7 @@ static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
9801005
i_size_write(iter->inode, pos + written);
9811006
iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
9821007
}
983-
__iomap_put_folio(iter, pos, written, folio);
1008+
__iomap_put_folio(iter, written, folio);
9841009

9851010
if (old_size < pos)
9861011
pagecache_isize_extended(iter->inode, old_size, pos);
@@ -1276,22 +1301,17 @@ static int iomap_unshare_iter(struct iomap_iter *iter)
12761301
do {
12771302
struct folio *folio;
12781303
size_t offset;
1279-
loff_t pos = iter->pos;
12801304
bool ret;
12811305

12821306
bytes = min_t(u64, SIZE_MAX, bytes);
1283-
status = iomap_write_begin(iter, pos, bytes, &folio);
1307+
status = iomap_write_begin(iter, &folio, &offset, &bytes);
12841308
if (unlikely(status))
12851309
return status;
12861310
if (iomap->flags & IOMAP_F_STALE)
12871311
break;
12881312

1289-
offset = offset_in_folio(folio, pos);
1290-
if (bytes > folio_size(folio) - offset)
1291-
bytes = folio_size(folio) - offset;
1292-
1293-
ret = iomap_write_end(iter, pos, bytes, bytes, folio);
1294-
__iomap_put_folio(iter, pos, bytes, folio);
1313+
ret = iomap_write_end(iter, bytes, bytes, folio);
1314+
__iomap_put_folio(iter, bytes, folio);
12951315
if (WARN_ON_ONCE(!ret))
12961316
return -EIO;
12971317

@@ -1351,27 +1371,23 @@ static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
13511371
do {
13521372
struct folio *folio;
13531373
size_t offset;
1354-
loff_t pos = iter->pos;
13551374
bool ret;
13561375

13571376
bytes = min_t(u64, SIZE_MAX, bytes);
1358-
status = iomap_write_begin(iter, pos, bytes, &folio);
1377+
status = iomap_write_begin(iter, &folio, &offset, &bytes);
13591378
if (status)
13601379
return status;
13611380
if (iter->iomap.flags & IOMAP_F_STALE)
13621381
break;
13631382

13641383
/* warn about zeroing folios beyond eof that won't write back */
13651384
WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size);
1366-
offset = offset_in_folio(folio, pos);
1367-
if (bytes > folio_size(folio) - offset)
1368-
bytes = folio_size(folio) - offset;
13691385

13701386
folio_zero_range(folio, offset, bytes);
13711387
folio_mark_accessed(folio);
13721388

1373-
ret = iomap_write_end(iter, pos, bytes, bytes, folio);
1374-
__iomap_put_folio(iter, pos, bytes, folio);
1389+
ret = iomap_write_end(iter, bytes, bytes, folio);
1390+
__iomap_put_folio(iter, bytes, folio);
13751391
if (WARN_ON_ONCE(!ret))
13761392
return -EIO;
13771393

fs/iomap/trace.h

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -99,15 +99,26 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued);
9999
{ IOMAP_FAULT, "FAULT" }, \
100100
{ IOMAP_DIRECT, "DIRECT" }, \
101101
{ IOMAP_NOWAIT, "NOWAIT" }, \
102-
{ IOMAP_ATOMIC, "ATOMIC" }
102+
{ IOMAP_OVERWRITE_ONLY, "OVERWRITE_ONLY" }, \
103+
{ IOMAP_UNSHARE, "UNSHARE" }, \
104+
{ IOMAP_DAX, "DAX" }, \
105+
{ IOMAP_ATOMIC, "ATOMIC" }, \
106+
{ IOMAP_DONTCACHE, "DONTCACHE" }
103107

104108
#define IOMAP_F_FLAGS_STRINGS \
105109
{ IOMAP_F_NEW, "NEW" }, \
106110
{ IOMAP_F_DIRTY, "DIRTY" }, \
107111
{ IOMAP_F_SHARED, "SHARED" }, \
108112
{ IOMAP_F_MERGED, "MERGED" }, \
109113
{ IOMAP_F_BUFFER_HEAD, "BH" }, \
110-
{ IOMAP_F_SIZE_CHANGED, "SIZE_CHANGED" }
114+
{ IOMAP_F_XATTR, "XATTR" }, \
115+
{ IOMAP_F_BOUNDARY, "BOUNDARY" }, \
116+
{ IOMAP_F_ANON_WRITE, "ANON_WRITE" }, \
117+
{ IOMAP_F_ATOMIC_BIO, "ATOMIC_BIO" }, \
118+
{ IOMAP_F_PRIVATE, "PRIVATE" }, \
119+
{ IOMAP_F_SIZE_CHANGED, "SIZE_CHANGED" }, \
120+
{ IOMAP_F_STALE, "STALE" }
121+
111122

112123
#define IOMAP_DIO_STRINGS \
113124
{IOMAP_DIO_FORCE_WAIT, "DIO_FORCE_WAIT" }, \
@@ -138,15 +149,17 @@ DECLARE_EVENT_CLASS(iomap_class,
138149
__entry->bdev = iomap->bdev ? iomap->bdev->bd_dev : 0;
139150
),
140151
TP_printk("dev %d:%d ino 0x%llx bdev %d:%d addr 0x%llx offset 0x%llx "
141-
"length 0x%llx type %s flags %s",
152+
"length 0x%llx type %s (0x%x) flags %s (0x%x)",
142153
MAJOR(__entry->dev), MINOR(__entry->dev),
143154
__entry->ino,
144155
MAJOR(__entry->bdev), MINOR(__entry->bdev),
145156
__entry->addr,
146157
__entry->offset,
147158
__entry->length,
148159
__print_symbolic(__entry->type, IOMAP_TYPE_STRINGS),
149-
__print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS))
160+
__entry->type,
161+
__print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS),
162+
__entry->flags)
150163
)
151164

152165
#define DEFINE_IOMAP_EVENT(name) \
@@ -185,7 +198,7 @@ TRACE_EVENT(iomap_writepage_map,
185198
__entry->bdev = iomap->bdev ? iomap->bdev->bd_dev : 0;
186199
),
187200
TP_printk("dev %d:%d ino 0x%llx bdev %d:%d pos 0x%llx dirty len 0x%llx "
188-
"addr 0x%llx offset 0x%llx length 0x%llx type %s flags %s",
201+
"addr 0x%llx offset 0x%llx length 0x%llx type %s (0x%x) flags %s (0x%x)",
189202
MAJOR(__entry->dev), MINOR(__entry->dev),
190203
__entry->ino,
191204
MAJOR(__entry->bdev), MINOR(__entry->bdev),
@@ -195,7 +208,9 @@ TRACE_EVENT(iomap_writepage_map,
195208
__entry->offset,
196209
__entry->length,
197210
__print_symbolic(__entry->type, IOMAP_TYPE_STRINGS),
198-
__print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS))
211+
__entry->type,
212+
__print_flags(__entry->flags, "|", IOMAP_F_FLAGS_STRINGS),
213+
__entry->flags)
199214
);
200215

201216
TRACE_EVENT(iomap_iter,

0 commit comments

Comments
 (0)