@@ -1350,40 +1350,12 @@ static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i)
13501350 return filemap_write_and_wait_range (mapping , i -> pos , end );
13511351}
13521352
1353- static loff_t iomap_zero_iter (struct iomap_iter * iter , bool * did_zero ,
1354- bool * range_dirty )
1353+ static loff_t iomap_zero_iter (struct iomap_iter * iter , bool * did_zero )
13551354{
1356- const struct iomap * srcmap = iomap_iter_srcmap (iter );
13571355 loff_t pos = iter -> pos ;
13581356 loff_t length = iomap_length (iter );
13591357 loff_t written = 0 ;
13601358
1361- /*
1362- * We must zero subranges of unwritten mappings that might be dirty in
1363- * pagecache from previous writes. We only know whether the entire range
1364- * was clean or not, however, and dirty folios may have been written
1365- * back or reclaimed at any point after mapping lookup.
1366- *
1367- * The easiest way to deal with this is to flush pagecache to trigger
1368- * any pending unwritten conversions and then grab the updated extents
1369- * from the fs. The flush may change the current mapping, so mark it
1370- * stale for the iterator to remap it for the next pass to handle
1371- * properly.
1372- *
1373- * Note that holes are treated the same as unwritten because zero range
1374- * is (ab)used for partial folio zeroing in some cases. Hole backed
1375- * post-eof ranges can be dirtied via mapped write and the flush
1376- * triggers writeback time post-eof zeroing.
1377- */
1378- if (srcmap -> type == IOMAP_HOLE || srcmap -> type == IOMAP_UNWRITTEN ) {
1379- if (* range_dirty ) {
1380- * range_dirty = false;
1381- return iomap_zero_iter_flush_and_stale (iter );
1382- }
1383- /* range is clean and already zeroed, nothing to do */
1384- return length ;
1385- }
1386-
13871359 do {
13881360 struct folio * folio ;
13891361 int status ;
@@ -1431,28 +1403,58 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
14311403 .len = len ,
14321404 .flags = IOMAP_ZERO ,
14331405 };
1406+ struct address_space * mapping = inode -> i_mapping ;
1407+ unsigned int blocksize = i_blocksize (inode );
1408+ unsigned int off = pos & (blocksize - 1 );
1409+ loff_t plen = min_t (loff_t , len , blocksize - off );
14341410 int ret ;
14351411 bool range_dirty ;
14361412
14371413 /*
1438- * Zero range wants to skip pre-zeroed (i.e. unwritten) mappings, but
1439- * pagecache must be flushed to ensure stale data from previous
1440- * buffered writes is not exposed. A flush is only required for certain
1441- * types of mappings, but checking pagecache after mapping lookup is
1442- * racy with writeback and reclaim.
1414+ * Zero range can skip mappings that are zero on disk so long as
1415+ * pagecache is clean. If pagecache was dirty prior to zero range, the
1416+ * mapping converts on writeback completion and so must be zeroed.
14431417 *
1444- * Therefore, check the entire range first and pass along whether any
1445- * part of it is dirty. If so and an underlying mapping warrants it,
1446- * flush the cache at that point. This trades off the occasional false
1447- * positive (and spurious flush, if the dirty data and mapping don't
1448- * happen to overlap) for simplicity in handling a relatively uncommon
1449- * situation.
1418+ * The simplest way to deal with this across a range is to flush
1419+ * pagecache and process the updated mappings. To avoid excessive
1420+ * flushing on partial eof zeroing, special case it to zero the
1421+ * unaligned start portion if already dirty in pagecache.
1422+ */
1423+ if (off &&
1424+ filemap_range_needs_writeback (mapping , pos , pos + plen - 1 )) {
1425+ iter .len = plen ;
1426+ while ((ret = iomap_iter (& iter , ops )) > 0 )
1427+ iter .processed = iomap_zero_iter (& iter , did_zero );
1428+
1429+ iter .len = len - (iter .pos - pos );
1430+ if (ret || !iter .len )
1431+ return ret ;
1432+ }
1433+
1434+ /*
1435+ * To avoid an unconditional flush, check pagecache state and only flush
1436+ * if dirty and the fs returns a mapping that might convert on
1437+ * writeback.
14501438 */
14511439 range_dirty = filemap_range_needs_writeback (inode -> i_mapping ,
1452- pos , pos + len - 1 );
1440+ iter .pos , iter .pos + iter .len - 1 );
1441+ while ((ret = iomap_iter (& iter , ops )) > 0 ) {
1442+ const struct iomap * srcmap = iomap_iter_srcmap (& iter );
14531443
1454- while ((ret = iomap_iter (& iter , ops )) > 0 )
1455- iter .processed = iomap_zero_iter (& iter , did_zero , & range_dirty );
1444+ if (srcmap -> type == IOMAP_HOLE ||
1445+ srcmap -> type == IOMAP_UNWRITTEN ) {
1446+ loff_t proc = iomap_length (& iter );
1447+
1448+ if (range_dirty ) {
1449+ range_dirty = false;
1450+ proc = iomap_zero_iter_flush_and_stale (& iter );
1451+ }
1452+ iter .processed = proc ;
1453+ continue ;
1454+ }
1455+
1456+ iter .processed = iomap_zero_iter (& iter , did_zero );
1457+ }
14561458 return ret ;
14571459}
14581460EXPORT_SYMBOL_GPL (iomap_zero_range );
0 commit comments