Skip to content

Commit afd384f

Browse files
committed
Revert "virtio-blk: support completion batching for the IRQ path"
This reverts commit 07b679f. This change appears to have broken things... We now see applications hanging during disk accesses. e.g. multi-port virtio-blk device running in h/w (FPGA) Host running a simple 'fio' test. [global] thread=1 direct=1 ioengine=libaio norandommap=1 group_reporting=1 bs=4K rw=read iodepth=128 runtime=1 numjobs=4 time_based [job0] filename=/dev/vda [job1] filename=/dev/vdb [job2] filename=/dev/vdc ... [job15] filename=/dev/vdp i.e. 16 disks; 4 queues per disk; simple burst of 4KB reads This is repeatedly run in a loop. After a few, normally <10 seconds, fio hangs. With 64 queues (16 disks), failure occurs within a few seconds; with 8 queues (2 disks) it may take ~hour before hanging. Last message: fio-3.19 Starting 8 threads Jobs: 1 (f=1): [_(7),R(1)][68.3%][eta 03h:11m:06s] I think this means at the end of the run 1 queue was left incomplete. 'diskstats' (run while fio is hung) shows no outstanding transactions. e.g. $ cat /proc/diskstats ... 252 0 vda 1843140071 0 14745120568 712568645 0 0 0 0 0 3117947 712568645 0 0 0 0 0 0 252 16 vdb 1816291511 0 14530332088 704905623 0 0 0 0 0 3117711 704905623 0 0 0 0 0 0 ... Other stats (in the h/w, and added to the virtio-blk driver ([a]virtio_queue_rq(), [b]virtblk_handle_req(), [c]virtblk_request_done()) all agree, and show every request had a completion, and that virtblk_request_done() never gets called. e.g. PF= 0 vq=0 1 2 3 [a]request_count - 839416590 813148916 105586179 84988123 [b]completion1_count - 839416590 813148916 105586179 84988123 [c]completion2_count - 0 0 0 0 PF= 1 vq=0 1 2 3 [a]request_count - 823335887 812516140 104582672 75856549 [b]completion1_count - 823335887 812516140 104582672 75856549 [c]completion2_count - 0 0 0 0 i.e. the issue is after the virtio-blk driver. This change was introduced in kernel 6.3.0. I am seeing this using 6.3.3. If I run with an earlier kernel (5.15), it does not occur. If I make a simple patch to the 6.3.3 virtio-blk driver, to skip the blk_mq_add_to_batch()call, it does not fail. e.g. kernel 5.15 - this is OK virtio_blk.c,virtblk_done() [irq handler] if (likely(!blk_should_fake_timeout(req->q))) { blk_mq_complete_request(req); } kernel 6.3.3 - this fails virtio_blk.c,virtblk_handle_req() [irq handler] if (likely(!blk_should_fake_timeout(req->q))) { if (!blk_mq_complete_request_remote(req)) { if (!blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr), virtblk_complete_batch)) { virtblk_request_done(req); //this never gets called... so blk_mq_add_to_batch() must always succeed } } } If I do, kernel 6.3.3 - this is OK virtio_blk.c,virtblk_handle_req() [irq handler] if (likely(!blk_should_fake_timeout(req->q))) { if (!blk_mq_complete_request_remote(req)) { virtblk_request_done(req); //force this here... if (!blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr), virtblk_complete_batch)) { virtblk_request_done(req); //this never gets called... so blk_mq_add_to_batch() must always succeed } } } Perhaps you might like to fix/test/revert this change... Martin Reported-by: kernel test robot <[email protected]> Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/ Cc: Suwan Kim <[email protected]> Tested-by: [email protected] Reported-by: "Roberts, Martin" <[email protected]> Message-Id: <336455b4f630f329380a8f53ee8cad3868764d5c.1686295549.git.mst@redhat.com> Signed-off-by: Michael S. Tsirkin <[email protected]>
1 parent 45a3e24 commit afd384f

File tree

1 file changed

+37
-45
lines changed

1 file changed

+37
-45
lines changed

drivers/block/virtio_blk.c

Lines changed: 37 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -348,63 +348,33 @@ static inline void virtblk_request_done(struct request *req)
348348
blk_mq_end_request(req, status);
349349
}
350350

351-
static void virtblk_complete_batch(struct io_comp_batch *iob)
352-
{
353-
struct request *req;
354-
355-
rq_list_for_each(&iob->req_list, req) {
356-
virtblk_unmap_data(req, blk_mq_rq_to_pdu(req));
357-
virtblk_cleanup_cmd(req);
358-
}
359-
blk_mq_end_request_batch(iob);
360-
}
361-
362-
static int virtblk_handle_req(struct virtio_blk_vq *vq,
363-
struct io_comp_batch *iob)
364-
{
365-
struct virtblk_req *vbr;
366-
int req_done = 0;
367-
unsigned int len;
368-
369-
while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) {
370-
struct request *req = blk_mq_rq_from_pdu(vbr);
371-
372-
if (likely(!blk_should_fake_timeout(req->q)) &&
373-
!blk_mq_complete_request_remote(req) &&
374-
!blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr),
375-
virtblk_complete_batch))
376-
virtblk_request_done(req);
377-
req_done++;
378-
}
379-
380-
return req_done;
381-
}
382-
383351
static void virtblk_done(struct virtqueue *vq)
384352
{
385353
struct virtio_blk *vblk = vq->vdev->priv;
386-
struct virtio_blk_vq *vblk_vq = &vblk->vqs[vq->index];
387-
int req_done = 0;
354+
bool req_done = false;
355+
int qid = vq->index;
356+
struct virtblk_req *vbr;
388357
unsigned long flags;
389-
DEFINE_IO_COMP_BATCH(iob);
358+
unsigned int len;
390359

391-
spin_lock_irqsave(&vblk_vq->lock, flags);
360+
spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
392361
do {
393362
virtqueue_disable_cb(vq);
394-
req_done += virtblk_handle_req(vblk_vq, &iob);
363+
while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
364+
struct request *req = blk_mq_rq_from_pdu(vbr);
395365

366+
if (likely(!blk_should_fake_timeout(req->q)))
367+
blk_mq_complete_request(req);
368+
req_done = true;
369+
}
396370
if (unlikely(virtqueue_is_broken(vq)))
397371
break;
398372
} while (!virtqueue_enable_cb(vq));
399373

400-
if (req_done) {
401-
if (!rq_list_empty(iob.req_list))
402-
iob.complete(&iob);
403-
404-
/* In case queue is stopped waiting for more buffers. */
374+
/* In case queue is stopped waiting for more buffers. */
375+
if (req_done)
405376
blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
406-
}
407-
spin_unlock_irqrestore(&vblk_vq->lock, flags);
377+
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
408378
}
409379

410380
static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx)
@@ -1283,15 +1253,37 @@ static void virtblk_map_queues(struct blk_mq_tag_set *set)
12831253
}
12841254
}
12851255

1256+
static void virtblk_complete_batch(struct io_comp_batch *iob)
1257+
{
1258+
struct request *req;
1259+
1260+
rq_list_for_each(&iob->req_list, req) {
1261+
virtblk_unmap_data(req, blk_mq_rq_to_pdu(req));
1262+
virtblk_cleanup_cmd(req);
1263+
}
1264+
blk_mq_end_request_batch(iob);
1265+
}
1266+
12861267
static int virtblk_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
12871268
{
12881269
struct virtio_blk *vblk = hctx->queue->queuedata;
12891270
struct virtio_blk_vq *vq = get_virtio_blk_vq(hctx);
1271+
struct virtblk_req *vbr;
12901272
unsigned long flags;
1273+
unsigned int len;
12911274
int found = 0;
12921275

12931276
spin_lock_irqsave(&vq->lock, flags);
1294-
found = virtblk_handle_req(vq, iob);
1277+
1278+
while ((vbr = virtqueue_get_buf(vq->vq, &len)) != NULL) {
1279+
struct request *req = blk_mq_rq_from_pdu(vbr);
1280+
1281+
found++;
1282+
if (!blk_mq_complete_request_remote(req) &&
1283+
!blk_mq_add_to_batch(req, iob, virtblk_vbr_status(vbr),
1284+
virtblk_complete_batch))
1285+
virtblk_request_done(req);
1286+
}
12951287

12961288
if (found)
12971289
blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);

0 commit comments

Comments
 (0)