@@ -1350,40 +1350,12 @@ static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i)
1350
1350
return filemap_write_and_wait_range (mapping , i -> pos , end );
1351
1351
}
1352
1352
1353
- static loff_t iomap_zero_iter (struct iomap_iter * iter , bool * did_zero ,
1354
- bool * range_dirty )
1353
+ static loff_t iomap_zero_iter (struct iomap_iter * iter , bool * did_zero )
1355
1354
{
1356
- const struct iomap * srcmap = iomap_iter_srcmap (iter );
1357
1355
loff_t pos = iter -> pos ;
1358
1356
loff_t length = iomap_length (iter );
1359
1357
loff_t written = 0 ;
1360
1358
1361
- /*
1362
- * We must zero subranges of unwritten mappings that might be dirty in
1363
- * pagecache from previous writes. We only know whether the entire range
1364
- * was clean or not, however, and dirty folios may have been written
1365
- * back or reclaimed at any point after mapping lookup.
1366
- *
1367
- * The easiest way to deal with this is to flush pagecache to trigger
1368
- * any pending unwritten conversions and then grab the updated extents
1369
- * from the fs. The flush may change the current mapping, so mark it
1370
- * stale for the iterator to remap it for the next pass to handle
1371
- * properly.
1372
- *
1373
- * Note that holes are treated the same as unwritten because zero range
1374
- * is (ab)used for partial folio zeroing in some cases. Hole backed
1375
- * post-eof ranges can be dirtied via mapped write and the flush
1376
- * triggers writeback time post-eof zeroing.
1377
- */
1378
- if (srcmap -> type == IOMAP_HOLE || srcmap -> type == IOMAP_UNWRITTEN ) {
1379
- if (* range_dirty ) {
1380
- * range_dirty = false;
1381
- return iomap_zero_iter_flush_and_stale (iter );
1382
- }
1383
- /* range is clean and already zeroed, nothing to do */
1384
- return length ;
1385
- }
1386
-
1387
1359
do {
1388
1360
struct folio * folio ;
1389
1361
int status ;
@@ -1431,28 +1403,58 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
1431
1403
.len = len ,
1432
1404
.flags = IOMAP_ZERO ,
1433
1405
};
1406
+ struct address_space * mapping = inode -> i_mapping ;
1407
+ unsigned int blocksize = i_blocksize (inode );
1408
+ unsigned int off = pos & (blocksize - 1 );
1409
+ loff_t plen = min_t (loff_t , len , blocksize - off );
1434
1410
int ret ;
1435
1411
bool range_dirty ;
1436
1412
1437
1413
/*
1438
- * Zero range wants to skip pre-zeroed (i.e. unwritten) mappings, but
1439
- * pagecache must be flushed to ensure stale data from previous
1440
- * buffered writes is not exposed. A flush is only required for certain
1441
- * types of mappings, but checking pagecache after mapping lookup is
1442
- * racy with writeback and reclaim.
1414
+ * Zero range can skip mappings that are zero on disk so long as
1415
+ * pagecache is clean. If pagecache was dirty prior to zero range, the
1416
+ * mapping converts on writeback completion and so must be zeroed.
1443
1417
*
1444
- * Therefore, check the entire range first and pass along whether any
1445
- * part of it is dirty. If so and an underlying mapping warrants it,
1446
- * flush the cache at that point. This trades off the occasional false
1447
- * positive (and spurious flush, if the dirty data and mapping don't
1448
- * happen to overlap) for simplicity in handling a relatively uncommon
1449
- * situation.
1418
+ * The simplest way to deal with this across a range is to flush
1419
+ * pagecache and process the updated mappings. To avoid excessive
1420
+ * flushing on partial eof zeroing, special case it to zero the
1421
+ * unaligned start portion if already dirty in pagecache.
1422
+ */
1423
+ if (off &&
1424
+ filemap_range_needs_writeback (mapping , pos , pos + plen - 1 )) {
1425
+ iter .len = plen ;
1426
+ while ((ret = iomap_iter (& iter , ops )) > 0 )
1427
+ iter .processed = iomap_zero_iter (& iter , did_zero );
1428
+
1429
+ iter .len = len - (iter .pos - pos );
1430
+ if (ret || !iter .len )
1431
+ return ret ;
1432
+ }
1433
+
1434
+ /*
1435
+ * To avoid an unconditional flush, check pagecache state and only flush
1436
+ * if dirty and the fs returns a mapping that might convert on
1437
+ * writeback.
1450
1438
*/
1451
1439
range_dirty = filemap_range_needs_writeback (inode -> i_mapping ,
1452
- pos , pos + len - 1 );
1440
+ iter .pos , iter .pos + iter .len - 1 );
1441
+ while ((ret = iomap_iter (& iter , ops )) > 0 ) {
1442
+ const struct iomap * srcmap = iomap_iter_srcmap (& iter );
1453
1443
1454
- while ((ret = iomap_iter (& iter , ops )) > 0 )
1455
- iter .processed = iomap_zero_iter (& iter , did_zero , & range_dirty );
1444
+ if (srcmap -> type == IOMAP_HOLE ||
1445
+ srcmap -> type == IOMAP_UNWRITTEN ) {
1446
+ loff_t proc = iomap_length (& iter );
1447
+
1448
+ if (range_dirty ) {
1449
+ range_dirty = false;
1450
+ proc = iomap_zero_iter_flush_and_stale (& iter );
1451
+ }
1452
+ iter .processed = proc ;
1453
+ continue ;
1454
+ }
1455
+
1456
+ iter .processed = iomap_zero_iter (& iter , did_zero );
1457
+ }
1456
1458
return ret ;
1457
1459
}
1458
1460
EXPORT_SYMBOL_GPL (iomap_zero_range );
0 commit comments