33
33
#define NVME_QUEUE_SIZE 128
34
34
#define NVME_BAR_SIZE 8192
35
35
36
+ /*
37
+ * We have to leave one slot empty as that is the full queue case where
38
+ * head == tail + 1.
39
+ */
40
+ #define NVME_NUM_REQS (NVME_QUEUE_SIZE - 1)
41
+
36
42
typedef struct {
37
43
int32_t head , tail ;
38
44
uint8_t * queue ;
@@ -47,7 +53,7 @@ typedef struct {
47
53
int cid ;
48
54
void * prp_list_page ;
49
55
uint64_t prp_list_iova ;
50
- bool busy ;
56
+ int free_req_next ; /* q->reqs[] index of next free req */
51
57
} NVMeRequest ;
52
58
53
59
typedef struct {
@@ -61,7 +67,8 @@ typedef struct {
61
67
/* Fields protected by @lock */
62
68
NVMeQueue sq , cq ;
63
69
int cq_phase ;
64
- NVMeRequest reqs [NVME_QUEUE_SIZE ];
70
+ int free_req_head ;
71
+ NVMeRequest reqs [NVME_NUM_REQS ];
65
72
bool busy ;
66
73
int need_kick ;
67
74
int inflight ;
@@ -200,19 +207,23 @@ static NVMeQueuePair *nvme_create_queue_pair(BlockDriverState *bs,
200
207
qemu_mutex_init (& q -> lock );
201
208
q -> index = idx ;
202
209
qemu_co_queue_init (& q -> free_req_queue );
203
- q -> prp_list_pages = qemu_blockalign0 (bs , s -> page_size * NVME_QUEUE_SIZE );
210
+ q -> prp_list_pages = qemu_blockalign0 (bs , s -> page_size * NVME_NUM_REQS );
204
211
r = qemu_vfio_dma_map (s -> vfio , q -> prp_list_pages ,
205
- s -> page_size * NVME_QUEUE_SIZE ,
212
+ s -> page_size * NVME_NUM_REQS ,
206
213
false, & prp_list_iova );
207
214
if (r ) {
208
215
goto fail ;
209
216
}
210
- for (i = 0 ; i < NVME_QUEUE_SIZE ; i ++ ) {
217
+ q -> free_req_head = -1 ;
218
+ for (i = 0 ; i < NVME_NUM_REQS ; i ++ ) {
211
219
NVMeRequest * req = & q -> reqs [i ];
212
220
req -> cid = i + 1 ;
221
+ req -> free_req_next = q -> free_req_head ;
222
+ q -> free_req_head = i ;
213
223
req -> prp_list_page = q -> prp_list_pages + i * s -> page_size ;
214
224
req -> prp_list_iova = prp_list_iova + i * s -> page_size ;
215
225
}
226
+
216
227
nvme_init_queue (bs , & q -> sq , size , NVME_SQ_ENTRY_BYTES , & local_err );
217
228
if (local_err ) {
218
229
error_propagate (errp , local_err );
@@ -254,13 +265,11 @@ static void nvme_kick(BDRVNVMeState *s, NVMeQueuePair *q)
254
265
*/
255
266
static NVMeRequest * nvme_get_free_req (NVMeQueuePair * q )
256
267
{
257
- int i ;
258
- NVMeRequest * req = NULL ;
268
+ NVMeRequest * req ;
259
269
260
270
qemu_mutex_lock (& q -> lock );
261
- while (q -> inflight + q -> need_kick > NVME_QUEUE_SIZE - 2 ) {
262
- /* We have to leave one slot empty as that is the full queue case (head
263
- * == tail + 1). */
271
+
272
+ while (q -> free_req_head == -1 ) {
264
273
if (qemu_in_coroutine ()) {
265
274
trace_nvme_free_req_queue_wait (q );
266
275
qemu_co_queue_wait (& q -> free_req_queue , & q -> lock );
@@ -269,20 +278,41 @@ static NVMeRequest *nvme_get_free_req(NVMeQueuePair *q)
269
278
return NULL ;
270
279
}
271
280
}
272
- for (i = 0 ; i < NVME_QUEUE_SIZE ; i ++ ) {
273
- if (!q -> reqs [i ].busy ) {
274
- q -> reqs [i ].busy = true;
275
- req = & q -> reqs [i ];
276
- break ;
277
- }
278
- }
279
- /* We have checked inflight and need_kick while holding q->lock, so one
280
- * free req must be available. */
281
- assert (req );
281
+
282
+ req = & q -> reqs [q -> free_req_head ];
283
+ q -> free_req_head = req -> free_req_next ;
284
+ req -> free_req_next = -1 ;
285
+
282
286
qemu_mutex_unlock (& q -> lock );
283
287
return req ;
284
288
}
285
289
290
+ /* With q->lock */
291
+ static void nvme_put_free_req_locked (NVMeQueuePair * q , NVMeRequest * req )
292
+ {
293
+ req -> free_req_next = q -> free_req_head ;
294
+ q -> free_req_head = req - q -> reqs ;
295
+ }
296
+
297
+ /* With q->lock */
298
+ static void nvme_wake_free_req_locked (BDRVNVMeState * s , NVMeQueuePair * q )
299
+ {
300
+ if (!qemu_co_queue_empty (& q -> free_req_queue )) {
301
+ replay_bh_schedule_oneshot_event (s -> aio_context ,
302
+ nvme_free_req_queue_cb , q );
303
+ }
304
+ }
305
+
306
+ /* Insert a request in the freelist and wake waiters */
307
+ static void nvme_put_free_req_and_wake (BDRVNVMeState * s , NVMeQueuePair * q ,
308
+ NVMeRequest * req )
309
+ {
310
+ qemu_mutex_lock (& q -> lock );
311
+ nvme_put_free_req_locked (q , req );
312
+ nvme_wake_free_req_locked (s , q );
313
+ qemu_mutex_unlock (& q -> lock );
314
+ }
315
+
286
316
static inline int nvme_translate_error (const NvmeCqe * c )
287
317
{
288
318
uint16_t status = (le16_to_cpu (c -> status ) >> 1 ) & 0xFF ;
@@ -344,7 +374,7 @@ static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q)
344
374
req = * preq ;
345
375
assert (req .cid == cid );
346
376
assert (req .cb );
347
- preq -> busy = false ;
377
+ nvme_put_free_req_locked ( q , preq ) ;
348
378
preq -> cb = preq -> opaque = NULL ;
349
379
qemu_mutex_unlock (& q -> lock );
350
380
req .cb (req .opaque , ret );
@@ -356,10 +386,7 @@ static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q)
356
386
/* Notify the device so it can post more completions. */
357
387
smp_mb_release ();
358
388
* q -> cq .doorbell = cpu_to_le32 (q -> cq .head );
359
- if (!qemu_co_queue_empty (& q -> free_req_queue )) {
360
- replay_bh_schedule_oneshot_event (s -> aio_context ,
361
- nvme_free_req_queue_cb , q );
362
- }
389
+ nvme_wake_free_req_locked (s , q );
363
390
}
364
391
q -> busy = false;
365
392
return progress ;
@@ -1001,7 +1028,7 @@ static coroutine_fn int nvme_co_prw_aligned(BlockDriverState *bs,
1001
1028
r = nvme_cmd_map_qiov (bs , & cmd , req , qiov );
1002
1029
qemu_co_mutex_unlock (& s -> dma_map_lock );
1003
1030
if (r ) {
1004
- req -> busy = false ;
1031
+ nvme_put_free_req_and_wake ( s , ioq , req ) ;
1005
1032
return r ;
1006
1033
}
1007
1034
nvme_submit_command (s , ioq , req , & cmd , nvme_rw_cb , & data );
@@ -1218,7 +1245,7 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
1218
1245
qemu_co_mutex_unlock (& s -> dma_map_lock );
1219
1246
1220
1247
if (ret ) {
1221
- req -> busy = false ;
1248
+ nvme_put_free_req_and_wake ( s , ioq , req ) ;
1222
1249
goto out ;
1223
1250
}
1224
1251
0 commit comments