Skip to content

Commit caf0ea4

Browse files
Christoph Hellwigcmaiolino
authored andcommitted
iomap: remove iomap_file_buffered_write_punch_delalloc
Currently iomap_file_buffered_write_punch_delalloc can be called from XFS either with the invalidate lock held or not. To fix this while keeping the locking in the file system and not the iomap library code we'll need to life the locking up into the file system. To prepare for that, open code iomap_file_buffered_write_punch_delalloc in the only caller, and instead export iomap_write_delalloc_release. Signed-off-by: Christoph Hellwig <[email protected]> Reviewed-by: Darrick J. Wong <[email protected]> Signed-off-by: Carlos Maiolino <[email protected]>
1 parent c0adf8c commit caf0ea4

File tree

4 files changed

+46
-63
lines changed

4 files changed

+46
-63
lines changed

Documentation/filesystems/iomap/operations.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ The filesystem must arrange to `cancel
208208
such `reservations
209209
<https://lore.kernel.org/linux-xfs/[email protected]/>`_
210210
because writeback will not consume the reservation.
211-
The ``iomap_file_buffered_write_punch_delalloc`` can be called from a
211+
The ``iomap_write_delalloc_release`` can be called from a
212212
``->iomap_end`` function to find all the clean areas of the folios
213213
caching a fresh (``IOMAP_F_NEW``) delalloc mapping.
214214
It takes the ``invalidate_lock``.

fs/iomap/buffered-io.c

Lines changed: 28 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,10 +1145,36 @@ static void iomap_write_delalloc_scan(struct inode *inode,
11451145
}
11461146

11471147
/*
1148+
* When a short write occurs, the filesystem might need to use ->iomap_end
1149+
* to remove space reservations created in ->iomap_begin.
1150+
*
1151+
* For filesystems that use delayed allocation, there can be dirty pages over
1152+
* the delalloc extent outside the range of a short write but still within the
1153+
* delalloc extent allocated for this iomap if the write raced with page
1154+
* faults.
1155+
*
11481156
* Punch out all the delalloc blocks in the range given except for those that
11491157
* have dirty data still pending in the page cache - those are going to be
11501158
* written and so must still retain the delalloc backing for writeback.
11511159
*
1160+
* The punch() callback *must* only punch delalloc extents in the range passed
1161+
* to it. It must skip over all other types of extents in the range and leave
1162+
* them completely unchanged. It must do this punch atomically with respect to
1163+
* other extent modifications.
1164+
*
1165+
* The punch() callback may be called with a folio locked to prevent writeback
1166+
* extent allocation racing at the edge of the range we are currently punching.
1167+
* The locked folio may or may not cover the range being punched, so it is not
1168+
* safe for the punch() callback to lock folios itself.
1169+
*
1170+
* Lock order is:
1171+
*
1172+
* inode->i_rwsem (shared or exclusive)
1173+
* inode->i_mapping->invalidate_lock (exclusive)
1174+
* folio_lock()
1175+
* ->punch
1176+
* internal filesystem allocation lock
1177+
*
11521178
* As we are scanning the page cache for data, we don't need to reimplement the
11531179
* wheel - mapping_seek_hole_data() does exactly what we need to identify the
11541180
* start and end of data ranges correctly even for sub-folio block sizes. This
@@ -1177,7 +1203,7 @@ static void iomap_write_delalloc_scan(struct inode *inode,
11771203
* require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose
11781204
* the code to subtle off-by-one bugs....
11791205
*/
1180-
static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
1206+
void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
11811207
loff_t end_byte, unsigned flags, struct iomap *iomap,
11821208
iomap_punch_t punch)
11831209
{
@@ -1243,62 +1269,7 @@ static void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
12431269
out_unlock:
12441270
filemap_invalidate_unlock(inode->i_mapping);
12451271
}
1246-
1247-
/*
1248-
* When a short write occurs, the filesystem may need to remove reserved space
1249-
* that was allocated in ->iomap_begin from it's ->iomap_end method. For
1250-
* filesystems that use delayed allocation, we need to punch out delalloc
1251-
* extents from the range that are not dirty in the page cache. As the write can
1252-
* race with page faults, there can be dirty pages over the delalloc extent
1253-
* outside the range of a short write but still within the delalloc extent
1254-
* allocated for this iomap.
1255-
*
1256-
* This function uses [start_byte, end_byte) intervals (i.e. open ended) to
1257-
* simplify range iterations.
1258-
*
1259-
* The punch() callback *must* only punch delalloc extents in the range passed
1260-
* to it. It must skip over all other types of extents in the range and leave
1261-
* them completely unchanged. It must do this punch atomically with respect to
1262-
* other extent modifications.
1263-
*
1264-
* The punch() callback may be called with a folio locked to prevent writeback
1265-
* extent allocation racing at the edge of the range we are currently punching.
1266-
* The locked folio may or may not cover the range being punched, so it is not
1267-
* safe for the punch() callback to lock folios itself.
1268-
*
1269-
* Lock order is:
1270-
*
1271-
* inode->i_rwsem (shared or exclusive)
1272-
* inode->i_mapping->invalidate_lock (exclusive)
1273-
* folio_lock()
1274-
* ->punch
1275-
* internal filesystem allocation lock
1276-
*/
1277-
void iomap_file_buffered_write_punch_delalloc(struct inode *inode,
1278-
loff_t pos, loff_t length, ssize_t written, unsigned flags,
1279-
struct iomap *iomap, iomap_punch_t punch)
1280-
{
1281-
loff_t start_byte;
1282-
loff_t end_byte;
1283-
1284-
if (iomap->type != IOMAP_DELALLOC)
1285-
return;
1286-
1287-
/* If we didn't reserve the blocks, we're not allowed to punch them. */
1288-
if (!(iomap->flags & IOMAP_F_NEW))
1289-
return;
1290-
1291-
start_byte = iomap_last_written_block(inode, pos, written);
1292-
end_byte = round_up(pos + length, i_blocksize(inode));
1293-
1294-
/* Nothing to do if we've written the entire delalloc extent */
1295-
if (start_byte >= end_byte)
1296-
return;
1297-
1298-
iomap_write_delalloc_release(inode, start_byte, end_byte, flags, iomap,
1299-
punch);
1300-
}
1301-
EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc);
1272+
EXPORT_SYMBOL_GPL(iomap_write_delalloc_release);
13021273

13031274
static loff_t iomap_unshare_iter(struct iomap_iter *iter)
13041275
{

fs/xfs/xfs_iomap.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1227,8 +1227,20 @@ xfs_buffered_write_iomap_end(
12271227
unsigned flags,
12281228
struct iomap *iomap)
12291229
{
1230-
iomap_file_buffered_write_punch_delalloc(inode, offset, length, written,
1231-
flags, iomap, &xfs_buffered_write_delalloc_punch);
1230+
loff_t start_byte, end_byte;
1231+
1232+
/* If we didn't reserve the blocks, we're not allowed to punch them. */
1233+
if (iomap->type != IOMAP_DELALLOC || !(iomap->flags & IOMAP_F_NEW))
1234+
return 0;
1235+
1236+
/* Nothing to do if we've written the entire delalloc extent */
1237+
start_byte = iomap_last_written_block(inode, offset, written);
1238+
end_byte = round_up(offset + length, i_blocksize(inode));
1239+
if (start_byte >= end_byte)
1240+
return 0;
1241+
1242+
iomap_write_delalloc_release(inode, start_byte, end_byte, flags, iomap,
1243+
xfs_buffered_write_delalloc_punch);
12321244
return 0;
12331245
}
12341246

include/linux/iomap.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -290,9 +290,9 @@ vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf,
290290

291291
typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length,
292292
struct iomap *iomap);
293-
void iomap_file_buffered_write_punch_delalloc(struct inode *inode, loff_t pos,
294-
loff_t length, ssize_t written, unsigned flag,
295-
struct iomap *iomap, iomap_punch_t punch);
293+
void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
294+
loff_t end_byte, unsigned flags, struct iomap *iomap,
295+
iomap_punch_t punch);
296296

297297
int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
298298
u64 start, u64 len, const struct iomap_ops *ops);

0 commit comments

Comments
 (0)