Skip to content

Commit fac69ec

Browse files
Christoph Hellwigcmaiolino
authored andcommitted
xfs: simplify buffer I/O submission
The code in _xfs_buf_ioapply is unnecessarily complicated because it doesn't take advantage of modern bio features. Simplify it by making use of bio splitting and chaining, that is build a single bio for the pages in the buffer using a simple loop, and then split that bio on the map boundaries for discontiguous multi-FSB buffers and chain the split bios to the main one so that there is only a single I/O completion. This not only simplifies the code to build the buffer, but also removes the need for the b_io_remaining field as buffer ownership is granted to the bio on submit of the final bio with no chance for a completion before that as well as the b_io_error field that is now superfluous because there always is exactly one completion. Signed-off-by: Christoph Hellwig <[email protected]> Reviewed-by: Darrick J. Wong <[email protected]> Acked-by: Dave Chinner <[email protected]> Signed-off-by: Carlos Maiolino <[email protected]>
1 parent 8db65d3 commit fac69ec

File tree

2 files changed

+60
-146
lines changed

2 files changed

+60
-146
lines changed

fs/xfs/xfs_buf.c

Lines changed: 60 additions & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,13 +1362,6 @@ xfs_buf_ioend(
13621362
{
13631363
trace_xfs_buf_iodone(bp, _RET_IP_);
13641364

1365-
/*
1366-
* Pull in IO completion errors now. We are guaranteed to be running
1367-
* single threaded, so we don't need the lock to read b_io_error.
1368-
*/
1369-
if (!bp->b_error && bp->b_io_error)
1370-
xfs_buf_ioerror(bp, bp->b_io_error);
1371-
13721365
if (bp->b_flags & XBF_READ) {
13731366
if (!bp->b_error && bp->b_ops)
13741367
bp->b_ops->verify_read(bp);
@@ -1491,118 +1484,26 @@ static void
14911484
xfs_buf_bio_end_io(
14921485
struct bio *bio)
14931486
{
1494-
struct xfs_buf *bp = (struct xfs_buf *)bio->bi_private;
1487+
struct xfs_buf *bp = bio->bi_private;
14951488

1496-
if (!bio->bi_status &&
1497-
(bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) &&
1498-
XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
1499-
bio->bi_status = BLK_STS_IOERR;
1500-
1501-
/*
1502-
* don't overwrite existing errors - otherwise we can lose errors on
1503-
* buffers that require multiple bios to complete.
1504-
*/
1505-
if (bio->bi_status) {
1506-
int error = blk_status_to_errno(bio->bi_status);
1507-
1508-
cmpxchg(&bp->b_io_error, 0, error);
1509-
}
1489+
if (bio->bi_status)
1490+
xfs_buf_ioerror(bp, blk_status_to_errno(bio->bi_status));
1491+
else if ((bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) &&
1492+
XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_IOERROR))
1493+
xfs_buf_ioerror(bp, -EIO);
15101494

15111495
if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
15121496
invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
15131497

1514-
if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
1515-
xfs_buf_ioend_async(bp);
1498+
xfs_buf_ioend_async(bp);
15161499
bio_put(bio);
15171500
}
15181501

1519-
static void
1520-
xfs_buf_ioapply_map(
1521-
struct xfs_buf *bp,
1522-
int map,
1523-
int *buf_offset,
1524-
int *count,
1525-
blk_opf_t op)
1526-
{
1527-
int page_index;
1528-
unsigned int total_nr_pages = bp->b_page_count;
1529-
int nr_pages;
1530-
struct bio *bio;
1531-
sector_t sector = bp->b_maps[map].bm_bn;
1532-
int size;
1533-
int offset;
1534-
1535-
/* skip the pages in the buffer before the start offset */
1536-
page_index = 0;
1537-
offset = *buf_offset;
1538-
while (offset >= PAGE_SIZE) {
1539-
page_index++;
1540-
offset -= PAGE_SIZE;
1541-
}
1542-
1543-
/*
1544-
* Limit the IO size to the length of the current vector, and update the
1545-
* remaining IO count for the next time around.
1546-
*/
1547-
size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count);
1548-
*count -= size;
1549-
*buf_offset += size;
1550-
1551-
next_chunk:
1552-
atomic_inc(&bp->b_io_remaining);
1553-
nr_pages = bio_max_segs(total_nr_pages);
1554-
1555-
bio = bio_alloc(bp->b_target->bt_bdev, nr_pages, op, GFP_NOIO);
1556-
bio->bi_iter.bi_sector = sector;
1557-
bio->bi_end_io = xfs_buf_bio_end_io;
1558-
bio->bi_private = bp;
1559-
1560-
for (; size && nr_pages; nr_pages--, page_index++) {
1561-
int rbytes, nbytes = PAGE_SIZE - offset;
1562-
1563-
if (nbytes > size)
1564-
nbytes = size;
1565-
1566-
rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes,
1567-
offset);
1568-
if (rbytes < nbytes)
1569-
break;
1570-
1571-
offset = 0;
1572-
sector += BTOBB(nbytes);
1573-
size -= nbytes;
1574-
total_nr_pages--;
1575-
}
1576-
1577-
if (likely(bio->bi_iter.bi_size)) {
1578-
if (xfs_buf_is_vmapped(bp)) {
1579-
flush_kernel_vmap_range(bp->b_addr,
1580-
xfs_buf_vmap_len(bp));
1581-
}
1582-
submit_bio(bio);
1583-
if (size)
1584-
goto next_chunk;
1585-
} else {
1586-
/*
1587-
* This is guaranteed not to be the last io reference count
1588-
* because the caller (xfs_buf_submit) holds a count itself.
1589-
*/
1590-
atomic_dec(&bp->b_io_remaining);
1591-
xfs_buf_ioerror(bp, -EIO);
1592-
bio_put(bio);
1593-
}
1594-
1595-
}
1596-
1597-
STATIC void
1598-
_xfs_buf_ioapply(
1599-
struct xfs_buf *bp)
1502+
static inline blk_opf_t
1503+
xfs_buf_bio_op(
1504+
struct xfs_buf *bp)
16001505
{
1601-
struct blk_plug plug;
1602-
blk_opf_t op;
1603-
int offset;
1604-
int size;
1605-
int i;
1506+
blk_opf_t op;
16061507

16071508
if (bp->b_flags & XBF_WRITE) {
16081509
op = REQ_OP_WRITE;
@@ -1612,25 +1513,53 @@ _xfs_buf_ioapply(
16121513
op |= REQ_RAHEAD;
16131514
}
16141515

1615-
/* we only use the buffer cache for meta-data */
1616-
op |= REQ_META;
1516+
return op | REQ_META;
1517+
}
1518+
1519+
static void
1520+
xfs_buf_submit_bio(
1521+
struct xfs_buf *bp)
1522+
{
1523+
unsigned int size = BBTOB(bp->b_length);
1524+
unsigned int map = 0, p;
1525+
struct blk_plug plug;
1526+
struct bio *bio;
1527+
1528+
bio = bio_alloc(bp->b_target->bt_bdev, bp->b_page_count,
1529+
xfs_buf_bio_op(bp), GFP_NOIO);
1530+
bio->bi_private = bp;
1531+
bio->bi_end_io = xfs_buf_bio_end_io;
1532+
1533+
if (bp->b_flags & _XBF_KMEM) {
1534+
__bio_add_page(bio, virt_to_page(bp->b_addr), size,
1535+
bp->b_offset);
1536+
} else {
1537+
for (p = 0; p < bp->b_page_count; p++)
1538+
__bio_add_page(bio, bp->b_pages[p], PAGE_SIZE, 0);
1539+
bio->bi_iter.bi_size = size; /* limit to the actual size used */
1540+
1541+
if (xfs_buf_is_vmapped(bp))
1542+
flush_kernel_vmap_range(bp->b_addr,
1543+
xfs_buf_vmap_len(bp));
1544+
}
16171545

16181546
/*
1619-
* Walk all the vectors issuing IO on them. Set up the initial offset
1620-
* into the buffer and the desired IO size before we start -
1621-
* _xfs_buf_ioapply_vec() will modify them appropriately for each
1622-
* subsequent call.
1547+
* If there is more than one map segment, split out a new bio for each
1548+
* map except of the last one. The last map is handled by the
1549+
* remainder of the original bio outside the loop.
16231550
*/
1624-
offset = bp->b_offset;
1625-
size = BBTOB(bp->b_length);
16261551
blk_start_plug(&plug);
1627-
for (i = 0; i < bp->b_map_count; i++) {
1628-
xfs_buf_ioapply_map(bp, i, &offset, &size, op);
1629-
if (bp->b_error)
1630-
break;
1631-
if (size <= 0)
1632-
break; /* all done */
1552+
for (map = 0; map < bp->b_map_count - 1; map++) {
1553+
struct bio *split;
1554+
1555+
split = bio_split(bio, bp->b_maps[map].bm_len, GFP_NOFS,
1556+
&fs_bio_set);
1557+
split->bi_iter.bi_sector = bp->b_maps[map].bm_bn;
1558+
bio_chain(split, bio);
1559+
submit_bio(split);
16331560
}
1561+
bio->bi_iter.bi_sector = bp->b_maps[map].bm_bn;
1562+
submit_bio(bio);
16341563
blk_finish_plug(&plug);
16351564
}
16361565

@@ -1729,14 +1658,7 @@ xfs_buf_submit(
17291658
* left over from previous use of the buffer (e.g. failed readahead).
17301659
*/
17311660
bp->b_error = 0;
1732-
bp->b_io_error = 0;
17331661

1734-
/*
1735-
* Set the count to 1 initially, this will stop an I/O completion
1736-
* callout which happens before we have started all the I/O from calling
1737-
* xfs_buf_ioend too early.
1738-
*/
1739-
atomic_set(&bp->b_io_remaining, 1);
17401662
if (bp->b_flags & XBF_ASYNC)
17411663
xfs_buf_ioacct_inc(bp);
17421664

@@ -1749,21 +1671,15 @@ xfs_buf_submit(
17491671
if (xfs_buftarg_is_mem(bp->b_target))
17501672
goto done;
17511673

1752-
_xfs_buf_ioapply(bp);
1674+
xfs_buf_submit_bio(bp);
1675+
goto rele;
17531676

17541677
done:
1755-
/*
1756-
* If _xfs_buf_ioapply failed, we can get back here with only the IO
1757-
* reference we took above. If we drop it to zero, run completion so
1758-
* that we don't return to the caller with completion still pending.
1759-
*/
1760-
if (atomic_dec_and_test(&bp->b_io_remaining) == 1) {
1761-
if (bp->b_error || !(bp->b_flags & XBF_ASYNC))
1762-
xfs_buf_ioend(bp);
1763-
else
1764-
xfs_buf_ioend_async(bp);
1765-
}
1766-
1678+
if (bp->b_error || !(bp->b_flags & XBF_ASYNC))
1679+
xfs_buf_ioend(bp);
1680+
else
1681+
xfs_buf_ioend_async(bp);
1682+
rele:
17671683
/*
17681684
* Release the hold that keeps the buffer referenced for the entire
17691685
* I/O. Note that if the buffer is async, it is not safe to reference

fs/xfs/xfs_buf.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,6 @@ struct xfs_buf {
184184
struct list_head b_lru; /* lru list */
185185
spinlock_t b_lock; /* internal state lock */
186186
unsigned int b_state; /* internal state flags */
187-
int b_io_error; /* internal IO error state */
188187
wait_queue_head_t b_waiters; /* unpin waiters */
189188
struct list_head b_list;
190189
struct xfs_perag *b_pag;
@@ -202,7 +201,6 @@ struct xfs_buf {
202201
struct xfs_buf_map __b_map; /* inline compound buffer map */
203202
int b_map_count;
204203
atomic_t b_pin_count; /* pin count */
205-
atomic_t b_io_remaining; /* #outstanding I/O requests */
206204
unsigned int b_page_count; /* size of page array */
207205
unsigned int b_offset; /* page offset of b_addr,
208206
only for _XBF_KMEM buffers */

0 commit comments

Comments
 (0)