Skip to content

Commit 8e64154

Browse files
committed
Merge branch 'xfs-6.15-zoned_devices' into XFS-for-linus-6.15-merge
Merge Zoned allocator for XFS. Signed-off-by: Carlos Maiolino <[email protected]>
2 parents 4701f33 + f56f73e commit 8e64154

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

91 files changed

+6387
-1384
lines changed

Documentation/filesystems/iomap/design.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,10 @@ The fields are as follows:
246246
* **IOMAP_F_PRIVATE**: Starting with this value, the upper bits can
247247
be set by the filesystem for its own purposes.
248248

249+
* **IOMAP_F_ANON_WRITE**: Indicates that (write) I/O does not have a target
250+
block assigned to it yet and the file system will do that in the bio
251+
submission handler, splitting the I/O as needed.
252+
249253
These flags can be set by iomap itself during file operations.
250254
The filesystem should supply an ``->iomap_end`` function if it needs
251255
to observe these flags:
@@ -352,6 +356,11 @@ operations:
352356
``IOMAP_NOWAIT`` is often set on behalf of ``IOCB_NOWAIT`` or
353357
``RWF_NOWAIT``.
354358

359+
* ``IOMAP_DONTCACHE`` is set when the caller wishes to perform a
360+
buffered file I/O and would like the kernel to drop the pagecache
361+
after the I/O completes, if it isn't already being used by another
362+
thread.
363+
355364
If it is necessary to read existing file contents from a `different
356365
<https://lore.kernel.org/all/[email protected]/>`_
357366
device or address range on a device, the filesystem should return that

Documentation/filesystems/iomap/operations.rst

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ These ``struct kiocb`` flags are significant for buffered I/O with iomap:
131131

132132
* ``IOCB_NOWAIT``: Turns on ``IOMAP_NOWAIT``.
133133

134+
* ``IOCB_DONTCACHE``: Turns on ``IOMAP_DONTCACHE``.
135+
134136
Internal per-Folio State
135137
------------------------
136138

@@ -283,7 +285,7 @@ The ``ops`` structure must be specified and is as follows:
283285
struct iomap_writeback_ops {
284286
int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode,
285287
loff_t offset, unsigned len);
286-
int (*prepare_ioend)(struct iomap_ioend *ioend, int status);
288+
int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status);
287289
void (*discard_folio)(struct folio *folio, loff_t pos);
288290
};
289291
@@ -306,13 +308,12 @@ The fields are as follows:
306308
purpose.
307309
This function must be supplied by the filesystem.
308310

309-
- ``prepare_ioend``: Enables filesystems to transform the writeback
310-
ioend or perform any other preparatory work before the writeback I/O
311-
is submitted.
311+
- ``submit_ioend``: Allows the file systems to hook into writeback bio
312+
submission.
312313
This might include pre-write space accounting updates, or installing
313314
a custom ``->bi_end_io`` function for internal purposes, such as
314315
deferring the ioend completion to a workqueue to run metadata update
315-
transactions from process context.
316+
transactions from process context before submitting the bio.
316317
This function is optional.
317318

318319
- ``discard_folio``: iomap calls this function after ``->map_blocks``
@@ -341,7 +342,7 @@ This can happen in interrupt or process context, depending on the
341342
storage device.
342343

343344
Filesystems that need to update internal bookkeeping (e.g. unwritten
344-
extent conversions) should provide a ``->prepare_ioend`` function to
345+
extent conversions) should provide a ``->submit_ioend`` function to
345346
set ``struct iomap_end::bio::bi_end_io`` to its own function.
346347
This function should call ``iomap_finish_ioends`` after finishing its
347348
own work (e.g. unwritten extent conversion).

fs/dax.c

Lines changed: 61 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,19 +1258,19 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
12581258
}
12591259
#endif /* CONFIG_FS_DAX_PMD */
12601260

1261-
static s64 dax_unshare_iter(struct iomap_iter *iter)
1261+
static int dax_unshare_iter(struct iomap_iter *iter)
12621262
{
12631263
struct iomap *iomap = &iter->iomap;
12641264
const struct iomap *srcmap = iomap_iter_srcmap(iter);
12651265
loff_t copy_pos = iter->pos;
12661266
u64 copy_len = iomap_length(iter);
12671267
u32 mod;
12681268
int id = 0;
1269-
s64 ret = 0;
1269+
s64 ret;
12701270
void *daddr = NULL, *saddr = NULL;
12711271

12721272
if (!iomap_want_unshare_iter(iter))
1273-
return iomap_length(iter);
1273+
return iomap_iter_advance_full(iter);
12741274

12751275
/*
12761276
* Extend the file range to be aligned to fsblock/pagesize, because
@@ -1300,14 +1300,14 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
13001300
if (ret < 0)
13011301
goto out_unlock;
13021302

1303-
if (copy_mc_to_kernel(daddr, saddr, copy_len) == 0)
1304-
ret = iomap_length(iter);
1305-
else
1303+
if (copy_mc_to_kernel(daddr, saddr, copy_len) != 0)
13061304
ret = -EIO;
13071305

13081306
out_unlock:
13091307
dax_read_unlock(id);
1310-
return dax_mem2blk_err(ret);
1308+
if (ret < 0)
1309+
return dax_mem2blk_err(ret);
1310+
return iomap_iter_advance_full(iter);
13111311
}
13121312

13131313
int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
@@ -1326,7 +1326,7 @@ int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
13261326

13271327
iter.len = min(len, size - pos);
13281328
while ((ret = iomap_iter(&iter, ops)) > 0)
1329-
iter.processed = dax_unshare_iter(&iter);
1329+
iter.status = dax_unshare_iter(&iter);
13301330
return ret;
13311331
}
13321332
EXPORT_SYMBOL_GPL(dax_file_unshare);
@@ -1354,51 +1354,52 @@ static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size)
13541354
return ret;
13551355
}
13561356

1357-
static s64 dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
1357+
static int dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
13581358
{
13591359
const struct iomap *iomap = &iter->iomap;
13601360
const struct iomap *srcmap = iomap_iter_srcmap(iter);
1361-
loff_t pos = iter->pos;
13621361
u64 length = iomap_length(iter);
1363-
s64 written = 0;
1362+
int ret;
13641363

13651364
/* already zeroed? we're done. */
13661365
if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
1367-
return length;
1366+
return iomap_iter_advance(iter, &length);
13681367

13691368
/*
13701369
* invalidate the pages whose sharing state is to be changed
13711370
* because of CoW.
13721371
*/
13731372
if (iomap->flags & IOMAP_F_SHARED)
13741373
invalidate_inode_pages2_range(iter->inode->i_mapping,
1375-
pos >> PAGE_SHIFT,
1376-
(pos + length - 1) >> PAGE_SHIFT);
1374+
iter->pos >> PAGE_SHIFT,
1375+
(iter->pos + length - 1) >> PAGE_SHIFT);
13771376

13781377
do {
1378+
loff_t pos = iter->pos;
13791379
unsigned offset = offset_in_page(pos);
1380-
unsigned size = min_t(u64, PAGE_SIZE - offset, length);
13811380
pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
1382-
long rc;
13831381
int id;
13841382

1383+
length = min_t(u64, PAGE_SIZE - offset, length);
1384+
13851385
id = dax_read_lock();
1386-
if (IS_ALIGNED(pos, PAGE_SIZE) && size == PAGE_SIZE)
1387-
rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
1386+
if (IS_ALIGNED(pos, PAGE_SIZE) && length == PAGE_SIZE)
1387+
ret = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
13881388
else
1389-
rc = dax_memzero(iter, pos, size);
1389+
ret = dax_memzero(iter, pos, length);
13901390
dax_read_unlock(id);
13911391

1392-
if (rc < 0)
1393-
return rc;
1394-
pos += size;
1395-
length -= size;
1396-
written += size;
1392+
if (ret < 0)
1393+
return ret;
1394+
1395+
ret = iomap_iter_advance(iter, &length);
1396+
if (ret)
1397+
return ret;
13971398
} while (length > 0);
13981399

13991400
if (did_zero)
14001401
*did_zero = true;
1401-
return written;
1402+
return ret;
14021403
}
14031404

14041405
int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
@@ -1413,7 +1414,7 @@ int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
14131414
int ret;
14141415

14151416
while ((ret = iomap_iter(&iter, ops)) > 0)
1416-
iter.processed = dax_zero_iter(&iter, did_zero);
1417+
iter.status = dax_zero_iter(&iter, did_zero);
14171418
return ret;
14181419
}
14191420
EXPORT_SYMBOL_GPL(dax_zero_range);
@@ -1431,8 +1432,7 @@ int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
14311432
}
14321433
EXPORT_SYMBOL_GPL(dax_truncate_page);
14331434

1434-
static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
1435-
struct iov_iter *iter)
1435+
static int dax_iomap_iter(struct iomap_iter *iomi, struct iov_iter *iter)
14361436
{
14371437
const struct iomap *iomap = &iomi->iomap;
14381438
const struct iomap *srcmap = iomap_iter_srcmap(iomi);
@@ -1451,8 +1451,10 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
14511451
if (pos >= end)
14521452
return 0;
14531453

1454-
if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN)
1455-
return iov_iter_zero(min(length, end - pos), iter);
1454+
if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN) {
1455+
done = iov_iter_zero(min(length, end - pos), iter);
1456+
return iomap_iter_advance(iomi, &done);
1457+
}
14561458
}
14571459

14581460
/*
@@ -1485,7 +1487,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
14851487
}
14861488

14871489
id = dax_read_lock();
1488-
while (pos < end) {
1490+
while ((pos = iomi->pos) < end) {
14891491
unsigned offset = pos & (PAGE_SIZE - 1);
14901492
const size_t size = ALIGN(length + offset, PAGE_SIZE);
14911493
pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
@@ -1535,18 +1537,16 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
15351537
xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr,
15361538
map_len, iter);
15371539

1538-
pos += xfer;
1539-
length -= xfer;
1540-
done += xfer;
1541-
1542-
if (xfer == 0)
1540+
length = xfer;
1541+
ret = iomap_iter_advance(iomi, &length);
1542+
if (!ret && xfer == 0)
15431543
ret = -EFAULT;
15441544
if (xfer < map_len)
15451545
break;
15461546
}
15471547
dax_read_unlock(id);
15481548

1549-
return done ? done : ret;
1549+
return ret;
15501550
}
15511551

15521552
/**
@@ -1586,7 +1586,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
15861586
iomi.flags |= IOMAP_NOWAIT;
15871587

15881588
while ((ret = iomap_iter(&iomi, ops)) > 0)
1589-
iomi.processed = dax_iomap_iter(&iomi, iter);
1589+
iomi.status = dax_iomap_iter(&iomi, iter);
15901590

15911591
done = iomi.pos - iocb->ki_pos;
15921592
iocb->ki_pos = iomi.pos;
@@ -1757,7 +1757,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
17571757

17581758
while ((error = iomap_iter(&iter, ops)) > 0) {
17591759
if (WARN_ON_ONCE(iomap_length(&iter) < PAGE_SIZE)) {
1760-
iter.processed = -EIO; /* fs corruption? */
1760+
iter.status = -EIO; /* fs corruption? */
17611761
continue;
17621762
}
17631763

@@ -1769,8 +1769,10 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
17691769
ret |= VM_FAULT_MAJOR;
17701770
}
17711771

1772-
if (!(ret & VM_FAULT_ERROR))
1773-
iter.processed = PAGE_SIZE;
1772+
if (!(ret & VM_FAULT_ERROR)) {
1773+
u64 length = PAGE_SIZE;
1774+
iter.status = iomap_iter_advance(&iter, &length);
1775+
}
17741776
}
17751777

17761778
if (iomap_errp)
@@ -1883,8 +1885,10 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
18831885
continue; /* actually breaks out of the loop */
18841886

18851887
ret = dax_fault_iter(vmf, &iter, pfnp, &xas, &entry, true);
1886-
if (ret != VM_FAULT_FALLBACK)
1887-
iter.processed = PMD_SIZE;
1888+
if (ret != VM_FAULT_FALLBACK) {
1889+
u64 length = PMD_SIZE;
1890+
iter.status = iomap_iter_advance(&iter, &length);
1891+
}
18881892
}
18891893

18901894
unlock_entry:
@@ -1999,20 +2003,21 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, unsigned int order,
19992003
}
20002004
EXPORT_SYMBOL_GPL(dax_finish_sync_fault);
20012005

2002-
static loff_t dax_range_compare_iter(struct iomap_iter *it_src,
2006+
static int dax_range_compare_iter(struct iomap_iter *it_src,
20032007
struct iomap_iter *it_dest, u64 len, bool *same)
20042008
{
20052009
const struct iomap *smap = &it_src->iomap;
20062010
const struct iomap *dmap = &it_dest->iomap;
20072011
loff_t pos1 = it_src->pos, pos2 = it_dest->pos;
2012+
u64 dest_len;
20082013
void *saddr, *daddr;
20092014
int id, ret;
20102015

20112016
len = min(len, min(smap->length, dmap->length));
20122017

20132018
if (smap->type == IOMAP_HOLE && dmap->type == IOMAP_HOLE) {
20142019
*same = true;
2015-
return len;
2020+
goto advance;
20162021
}
20172022

20182023
if (smap->type == IOMAP_HOLE || dmap->type == IOMAP_HOLE) {
@@ -2035,7 +2040,13 @@ static loff_t dax_range_compare_iter(struct iomap_iter *it_src,
20352040
if (!*same)
20362041
len = 0;
20372042
dax_read_unlock(id);
2038-
return len;
2043+
2044+
advance:
2045+
dest_len = len;
2046+
ret = iomap_iter_advance(it_src, &len);
2047+
if (!ret)
2048+
ret = iomap_iter_advance(it_dest, &dest_len);
2049+
return ret;
20392050

20402051
out_unlock:
20412052
dax_read_unlock(id);
@@ -2058,15 +2069,15 @@ int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
20582069
.len = len,
20592070
.flags = IOMAP_DAX,
20602071
};
2061-
int ret, compared = 0;
2072+
int ret, status;
20622073

20632074
while ((ret = iomap_iter(&src_iter, ops)) > 0 &&
20642075
(ret = iomap_iter(&dst_iter, ops)) > 0) {
2065-
compared = dax_range_compare_iter(&src_iter, &dst_iter,
2076+
status = dax_range_compare_iter(&src_iter, &dst_iter,
20662077
min(src_iter.len, dst_iter.len), same);
2067-
if (compared < 0)
2078+
if (status < 0)
20682079
return ret;
2069-
src_iter.processed = dst_iter.processed = compared;
2080+
src_iter.status = dst_iter.status = status;
20702081
}
20712082
return ret;
20722083
}

fs/gfs2/bmap.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1300,7 +1300,8 @@ static int gfs2_block_zero_range(struct inode *inode, loff_t from,
13001300
unsigned int length)
13011301
{
13021302
BUG_ON(current->journal_info);
1303-
return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops);
1303+
return iomap_zero_range(inode, from, length, NULL, &gfs2_iomap_ops,
1304+
NULL);
13041305
}
13051306

13061307
#define GFS2_JTRUNC_REVOKES 8192

fs/iomap/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ iomap-y += trace.o \
1212
iter.o
1313
iomap-$(CONFIG_BLOCK) += buffered-io.o \
1414
direct-io.o \
15+
ioend.o \
1516
fiemap.o \
1617
seek.o
1718
iomap-$(CONFIG_SWAP) += swapfile.o

0 commit comments

Comments
 (0)