@@ -74,9 +74,11 @@ typedef struct {
74
74
int cq_phase ;
75
75
int free_req_head ;
76
76
NVMeRequest reqs [NVME_NUM_REQS ];
77
- bool busy ;
78
77
int need_kick ;
79
78
int inflight ;
79
+
80
+ /* Thread-safe, no lock necessary */
81
+ QEMUBH * completion_bh ;
80
82
} NVMeQueuePair ;
81
83
82
84
/* Memory mapped registers */
@@ -140,6 +142,8 @@ struct BDRVNVMeState {
140
142
#define NVME_BLOCK_OPT_DEVICE "device"
141
143
#define NVME_BLOCK_OPT_NAMESPACE "namespace"
142
144
145
+ static void nvme_process_completion_bh (void * opaque );
146
+
143
147
static QemuOptsList runtime_opts = {
144
148
.name = "nvme" ,
145
149
.head = QTAILQ_HEAD_INITIALIZER (runtime_opts .head ),
@@ -181,6 +185,9 @@ static void nvme_init_queue(BlockDriverState *bs, NVMeQueue *q,
181
185
182
186
static void nvme_free_queue_pair (NVMeQueuePair * q )
183
187
{
188
+ if (q -> completion_bh ) {
189
+ qemu_bh_delete (q -> completion_bh );
190
+ }
184
191
qemu_vfree (q -> prp_list_pages );
185
192
qemu_vfree (q -> sq .queue );
186
193
qemu_vfree (q -> cq .queue );
@@ -214,6 +221,8 @@ static NVMeQueuePair *nvme_create_queue_pair(BlockDriverState *bs,
214
221
q -> index = idx ;
215
222
qemu_co_queue_init (& q -> free_req_queue );
216
223
q -> prp_list_pages = qemu_blockalign0 (bs , s -> page_size * NVME_NUM_REQS );
224
+ q -> completion_bh = aio_bh_new (bdrv_get_aio_context (bs ),
225
+ nvme_process_completion_bh , q );
217
226
r = qemu_vfio_dma_map (s -> vfio , q -> prp_list_pages ,
218
227
s -> page_size * NVME_NUM_REQS ,
219
228
false, & prp_list_iova );
@@ -352,11 +361,21 @@ static bool nvme_process_completion(NVMeQueuePair *q)
352
361
NvmeCqe * c ;
353
362
354
363
trace_nvme_process_completion (s , q -> index , q -> inflight );
355
- if (q -> busy || s -> plugged ) {
356
- trace_nvme_process_completion_queue_busy (s , q -> index );
364
+ if (s -> plugged ) {
365
+ trace_nvme_process_completion_queue_plugged (s , q -> index );
357
366
return false;
358
367
}
359
- q -> busy = true;
368
+
369
+ /*
370
+ * Support re-entrancy when a request cb() function invokes aio_poll().
371
+ * Pending completions must be visible to aio_poll() so that a cb()
372
+ * function can wait for the completion of another request.
373
+ *
374
+ * The aio_poll() loop will execute our BH and we'll resume completion
375
+ * processing there.
376
+ */
377
+ qemu_bh_schedule (q -> completion_bh );
378
+
360
379
assert (q -> inflight >= 0 );
361
380
while (q -> inflight ) {
362
381
int ret ;
@@ -384,10 +403,10 @@ static bool nvme_process_completion(NVMeQueuePair *q)
384
403
assert (req .cb );
385
404
nvme_put_free_req_locked (q , preq );
386
405
preq -> cb = preq -> opaque = NULL ;
406
+ q -> inflight -- ;
387
407
qemu_mutex_unlock (& q -> lock );
388
408
req .cb (req .opaque , ret );
389
409
qemu_mutex_lock (& q -> lock );
390
- q -> inflight -- ;
391
410
progress = true;
392
411
}
393
412
if (progress ) {
@@ -396,10 +415,28 @@ static bool nvme_process_completion(NVMeQueuePair *q)
396
415
* q -> cq .doorbell = cpu_to_le32 (q -> cq .head );
397
416
nvme_wake_free_req_locked (q );
398
417
}
399
- q -> busy = false;
418
+
419
+ qemu_bh_cancel (q -> completion_bh );
420
+
400
421
return progress ;
401
422
}
402
423
424
+ static void nvme_process_completion_bh (void * opaque )
425
+ {
426
+ NVMeQueuePair * q = opaque ;
427
+
428
+ /*
429
+ * We're being invoked because a nvme_process_completion() cb() function
430
+ * called aio_poll(). The callback may be waiting for further completions
431
+ * so notify the device that it has space to fill in more completions now.
432
+ */
433
+ smp_mb_release ();
434
+ * q -> cq .doorbell = cpu_to_le32 (q -> cq .head );
435
+ nvme_wake_free_req_locked (q );
436
+
437
+ nvme_process_completion (q );
438
+ }
439
+
403
440
static void nvme_trace_command (const NvmeCmd * cmd )
404
441
{
405
442
int i ;
@@ -1309,6 +1346,13 @@ static void nvme_detach_aio_context(BlockDriverState *bs)
1309
1346
{
1310
1347
BDRVNVMeState * s = bs -> opaque ;
1311
1348
1349
+ for (int i = 0 ; i < s -> nr_queues ; i ++ ) {
1350
+ NVMeQueuePair * q = s -> queues [i ];
1351
+
1352
+ qemu_bh_delete (q -> completion_bh );
1353
+ q -> completion_bh = NULL ;
1354
+ }
1355
+
1312
1356
aio_set_event_notifier (bdrv_get_aio_context (bs ), & s -> irq_notifier ,
1313
1357
false, NULL , NULL );
1314
1358
}
@@ -1321,6 +1365,13 @@ static void nvme_attach_aio_context(BlockDriverState *bs,
1321
1365
s -> aio_context = new_context ;
1322
1366
aio_set_event_notifier (new_context , & s -> irq_notifier ,
1323
1367
false, nvme_handle_event , nvme_poll_cb );
1368
+
1369
+ for (int i = 0 ; i < s -> nr_queues ; i ++ ) {
1370
+ NVMeQueuePair * q = s -> queues [i ];
1371
+
1372
+ q -> completion_bh =
1373
+ aio_bh_new (new_context , nvme_process_completion_bh , q );
1374
+ }
1324
1375
}
1325
1376
1326
1377
static void nvme_aio_plug (BlockDriverState * bs )
0 commit comments