Skip to content

Commit 568570f

Browse files
committed
Merge tag 'xfs-6.12-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs fixes from Carlos Maiolino: - Fix integer overflow in xrep_bmap - Fix stale dealloc punching for COW IO * tag 'xfs-6.12-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: xfs: punch delalloc extents from the COW fork for COW writes xfs: set IOMAP_F_SHARED for all COW fork allocations xfs: share more code in xfs_buffered_write_iomap_begin xfs: support the COW fork in xfs_bmap_punch_delalloc_range xfs: IOMAP_ZERO and IOMAP_UNSHARE already hold invalidate_lock xfs: take XFS_MMAPLOCK_EXCL xfs_file_write_zero_eof xfs: factor out a xfs_file_write_zero_eof helper iomap: move locking out of iomap_write_delalloc_release iomap: remove iomap_file_buffered_write_punch_delalloc iomap: factor out a iomap_last_written_block helper xfs: fix integer overflow in xrep_bmap
2 parents 5e9ab26 + f6f91d2 commit 568570f

File tree

9 files changed

+199
-165
lines changed

9 files changed

+199
-165
lines changed

Documentation/filesystems/iomap/operations.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ The filesystem must arrange to `cancel
208208
such `reservations
209209
<https://lore.kernel.org/linux-xfs/[email protected]/>`_
210210
because writeback will not consume the reservation.
211-
The ``iomap_file_buffered_write_punch_delalloc`` can be called from a
211+
The ``iomap_write_delalloc_release`` can be called from a
212212
``->iomap_end`` function to find all the clean areas of the folios
213213
caching a fresh (``IOMAP_F_NEW``) delalloc mapping.
214214
It takes the ``invalidate_lock``.

fs/iomap/buffered-io.c

Lines changed: 36 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,10 +1145,36 @@ static void iomap_write_delalloc_scan(struct inode *inode,
11451145
}
11461146

11471147
/*
1148+
* When a short write occurs, the filesystem might need to use ->iomap_end
1149+
* to remove space reservations created in ->iomap_begin.
1150+
*
1151+
* For filesystems that use delayed allocation, there can be dirty pages over
1152+
* the delalloc extent outside the range of a short write but still within the
1153+
* delalloc extent allocated for this iomap if the write raced with page
1154+
* faults.
1155+
*
11481156
* Punch out all the delalloc blocks in the range given except for those that
11491157
* have dirty data still pending in the page cache - those are going to be
11501158
* written and so must still retain the delalloc backing for writeback.
11511159
*
1160+
* The punch() callback *must* only punch delalloc extents in the range passed
1161+
* to it. It must skip over all other types of extents in the range and leave
1162+
* them completely unchanged. It must do this punch atomically with respect to
1163+
* other extent modifications.
1164+
*
1165+
* The punch() callback may be called with a folio locked to prevent writeback
1166+
* extent allocation racing at the edge of the range we are currently punching.
1167+
* The locked folio may or may not cover the range being punched, so it is not
1168+
* safe for the punch() callback to lock folios itself.
1169+
*
1170+
* Lock order is:
1171+
*
1172+
* inode->i_rwsem (shared or exclusive)
1173+
* inode->i_mapping->invalidate_lock (exclusive)
1174+
* folio_lock()
1175+
* ->punch
1176+
* internal filesystem allocation lock
1177+
*
11521178
* As we are scanning the page cache for data, we don't need to reimplement the
11531179
* wheel - mapping_seek_hole_data() does exactly what we need to identify the
11541180
* start and end of data ranges correctly even for sub-folio block sizes. This
@@ -1177,20 +1203,21 @@ static void iomap_write_delalloc_scan(struct inode *inode,
11771203
* require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose
11781204
* the code to subtle off-by-one bugs....
11791205
*/
1180-
static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
1206+
void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
11811207
loff_t end_byte, unsigned flags, struct iomap *iomap,
11821208
iomap_punch_t punch)
11831209
{
11841210
loff_t punch_start_byte = start_byte;
11851211
loff_t scan_end_byte = min(i_size_read(inode), end_byte);
11861212

11871213
/*
1188-
* Lock the mapping to avoid races with page faults re-instantiating
1189-
* folios and dirtying them via ->page_mkwrite whilst we walk the
1190-
* cache and perform delalloc extent removal. Failing to do this can
1191-
* leave dirty pages with no space reservation in the cache.
1214+
* The caller must hold invalidate_lock to avoid races with page faults
1215+
* re-instantiating folios and dirtying them via ->page_mkwrite whilst
1216+
* we walk the cache and perform delalloc extent removal. Failing to do
1217+
* this can leave dirty pages with no space reservation in the cache.
11921218
*/
1193-
filemap_invalidate_lock(inode->i_mapping);
1219+
lockdep_assert_held_write(&inode->i_mapping->invalidate_lock);
1220+
11941221
while (start_byte < scan_end_byte) {
11951222
loff_t data_end;
11961223

@@ -1207,7 +1234,7 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
12071234
if (start_byte == -ENXIO || start_byte == scan_end_byte)
12081235
break;
12091236
if (WARN_ON_ONCE(start_byte < 0))
1210-
goto out_unlock;
1237+
return;
12111238
WARN_ON_ONCE(start_byte < punch_start_byte);
12121239
WARN_ON_ONCE(start_byte > scan_end_byte);
12131240

@@ -1218,7 +1245,7 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
12181245
data_end = mapping_seek_hole_data(inode->i_mapping, start_byte,
12191246
scan_end_byte, SEEK_HOLE);
12201247
if (WARN_ON_ONCE(data_end < 0))
1221-
goto out_unlock;
1248+
return;
12221249

12231250
/*
12241251
* If we race with post-direct I/O invalidation of the page cache,
@@ -1240,74 +1267,8 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
12401267
if (punch_start_byte < end_byte)
12411268
punch(inode, punch_start_byte, end_byte - punch_start_byte,
12421269
iomap);
1243-
out_unlock:
1244-
filemap_invalidate_unlock(inode->i_mapping);
1245-
}
1246-
1247-
/*
1248-
* When a short write occurs, the filesystem may need to remove reserved space
1249-
* that was allocated in ->iomap_begin from it's ->iomap_end method. For
1250-
* filesystems that use delayed allocation, we need to punch out delalloc
1251-
* extents from the range that are not dirty in the page cache. As the write can
1252-
* race with page faults, there can be dirty pages over the delalloc extent
1253-
* outside the range of a short write but still within the delalloc extent
1254-
* allocated for this iomap.
1255-
*
1256-
* This function uses [start_byte, end_byte) intervals (i.e. open ended) to
1257-
* simplify range iterations.
1258-
*
1259-
* The punch() callback *must* only punch delalloc extents in the range passed
1260-
* to it. It must skip over all other types of extents in the range and leave
1261-
* them completely unchanged. It must do this punch atomically with respect to
1262-
* other extent modifications.
1263-
*
1264-
* The punch() callback may be called with a folio locked to prevent writeback
1265-
* extent allocation racing at the edge of the range we are currently punching.
1266-
* The locked folio may or may not cover the range being punched, so it is not
1267-
* safe for the punch() callback to lock folios itself.
1268-
*
1269-
* Lock order is:
1270-
*
1271-
* inode->i_rwsem (shared or exclusive)
1272-
* inode->i_mapping->invalidate_lock (exclusive)
1273-
* folio_lock()
1274-
* ->punch
1275-
* internal filesystem allocation lock
1276-
*/
1277-
void iomap_file_buffered_write_punch_delalloc(struct inode *inode,
1278-
loff_t pos, loff_t length, ssize_t written, unsigned flags,
1279-
struct iomap *iomap, iomap_punch_t punch)
1280-
{
1281-
loff_t start_byte;
1282-
loff_t end_byte;
1283-
unsigned int blocksize = i_blocksize(inode);
1284-
1285-
if (iomap->type != IOMAP_DELALLOC)
1286-
return;
1287-
1288-
/* If we didn't reserve the blocks, we're not allowed to punch them. */
1289-
if (!(iomap->flags & IOMAP_F_NEW))
1290-
return;
1291-
1292-
/*
1293-
* start_byte refers to the first unused block after a short write. If
1294-
* nothing was written, round offset down to point at the first block in
1295-
* the range.
1296-
*/
1297-
if (unlikely(!written))
1298-
start_byte = round_down(pos, blocksize);
1299-
else
1300-
start_byte = round_up(pos + written, blocksize);
1301-
end_byte = round_up(pos + length, blocksize);
1302-
1303-
/* Nothing to do if we've written the entire delalloc extent */
1304-
if (start_byte >= end_byte)
1305-
return;
1306-
1307-
iomap_write_delalloc_release(inode, start_byte, end_byte, flags, iomap,
1308-
punch);
13091270
}
1310-
EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc);
1271+
EXPORT_SYMBOL_GPL(iomap_write_delalloc_release);
13111272

13121273
static loff_t iomap_unshare_iter(struct iomap_iter *iter)
13131274
{

fs/xfs/scrub/bmap_repair.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -801,7 +801,7 @@ xrep_bmap(
801801
{
802802
struct xrep_bmap *rb;
803803
char *descr;
804-
unsigned int max_bmbt_recs;
804+
xfs_extnum_t max_bmbt_recs;
805805
bool large_extcount;
806806
int error = 0;
807807

fs/xfs/xfs_aops.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ xfs_end_ioend(
116116
if (unlikely(error)) {
117117
if (ioend->io_flags & IOMAP_F_SHARED) {
118118
xfs_reflink_cancel_cow_range(ip, offset, size, true);
119-
xfs_bmap_punch_delalloc_range(ip, offset,
119+
xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, offset,
120120
offset + size);
121121
}
122122
goto done;
@@ -456,7 +456,7 @@ xfs_discard_folio(
456456
* byte of the next folio. Hence the end offset is only dependent on the
457457
* folio itself and not the start offset that is passed in.
458458
*/
459-
xfs_bmap_punch_delalloc_range(ip, pos,
459+
xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, pos,
460460
folio_pos(folio) + folio_size(folio));
461461
}
462462

fs/xfs/xfs_bmap_util.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -442,11 +442,12 @@ xfs_getbmap(
442442
void
443443
xfs_bmap_punch_delalloc_range(
444444
struct xfs_inode *ip,
445+
int whichfork,
445446
xfs_off_t start_byte,
446447
xfs_off_t end_byte)
447448
{
448449
struct xfs_mount *mp = ip->i_mount;
449-
struct xfs_ifork *ifp = &ip->i_df;
450+
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
450451
xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte);
451452
xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte);
452453
struct xfs_bmbt_irec got, del;
@@ -474,11 +475,14 @@ xfs_bmap_punch_delalloc_range(
474475
continue;
475476
}
476477

477-
xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur, &got, &del);
478+
xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, &del);
478479
if (!xfs_iext_get_extent(ifp, &icur, &got))
479480
break;
480481
}
481482

483+
if (whichfork == XFS_COW_FORK && !ifp->if_bytes)
484+
xfs_inode_clear_cowblocks_tag(ip);
485+
482486
out_unlock:
483487
xfs_iunlock(ip, XFS_ILOCK_EXCL);
484488
}
@@ -580,7 +584,7 @@ xfs_free_eofblocks(
580584
*/
581585
if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) {
582586
if (ip->i_delayed_blks) {
583-
xfs_bmap_punch_delalloc_range(ip,
587+
xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK,
584588
round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize),
585589
LLONG_MAX);
586590
}

fs/xfs/xfs_bmap_util.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap)
3030
}
3131
#endif /* CONFIG_XFS_RT */
3232

33-
void xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
33+
void xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, int whichfork,
3434
xfs_off_t start_byte, xfs_off_t end_byte);
3535

3636
struct kgetbmap {

0 commit comments

Comments
 (0)