@@ -361,13 +361,19 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
361361 bool has_sched ,
362362 struct request * rq )
363363{
364- /* dispatch flush rq directly */
365- if (rq -> rq_flags & RQF_FLUSH_SEQ ) {
366- spin_lock (& hctx -> lock );
367- list_add (& rq -> queuelist , & hctx -> dispatch );
368- spin_unlock (& hctx -> lock );
364+ /*
365+ * dispatch flush and passthrough rq directly
366+ *
367+ * passthrough request has to be added to hctx->dispatch directly.
368+ * For some reason, device may be in one situation which can't
369+ * handle FS request, so STS_RESOURCE is always returned and the
370+ * FS request will be added to hctx->dispatch. However passthrough
371+ * request may be required at that time for fixing the problem. If
372+ * passthrough request is added to scheduler queue, there isn't any
373+ * chance to dispatch it given we prioritize requests in hctx->dispatch.
374+ */
375+ if ((rq -> rq_flags & RQF_FLUSH_SEQ ) || blk_rq_is_passthrough (rq ))
369376 return true;
370- }
371377
372378 if (has_sched )
373379 rq -> rq_flags |= RQF_SORTED ;
@@ -391,8 +397,32 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
391397
392398 WARN_ON (e && (rq -> tag != -1 ));
393399
394- if (blk_mq_sched_bypass_insert (hctx , !!e , rq ))
400+ if (blk_mq_sched_bypass_insert (hctx , !!e , rq )) {
401+ /*
402+ * Firstly normal IO request is inserted to scheduler queue or
403+ * sw queue, meantime we add flush request to dispatch queue(
404+ * hctx->dispatch) directly and there is at most one in-flight
405+ * flush request for each hw queue, so it doesn't matter to add
406+ * flush request to tail or front of the dispatch queue.
407+ *
408+ * Secondly in case of NCQ, flush request belongs to non-NCQ
409+ * command, and queueing it will fail when there is any
410+ * in-flight normal IO request(NCQ command). When adding flush
411+ * rq to the front of hctx->dispatch, it is easier to introduce
412+ * extra time to flush rq's latency because of S_SCHED_RESTART
413+ * compared with adding to the tail of dispatch queue, then
414+ * chance of flush merge is increased, and less flush requests
415+ * will be issued to controller. It is observed that ~10% time
416+ * is saved in blktests block/004 on disk attached to AHCI/NCQ
417+ * drive when adding flush rq to the front of hctx->dispatch.
418+ *
419+ * Simply queue flush rq to the front of hctx->dispatch so that
420+ * intensive flush workloads can benefit in case of NCQ HW.
421+ */
422+ at_head = (rq -> rq_flags & RQF_FLUSH_SEQ ) ? true : at_head ;
423+ blk_mq_request_bypass_insert (rq , at_head , false);
395424 goto run ;
425+ }
396426
397427 if (e && e -> type -> ops .insert_requests ) {
398428 LIST_HEAD (list );
0 commit comments