Skip to content

Commit 172fb49

Browse files
committed
nvme-pci: enhance timeout kernel log
Kernel configs don't necessarily have opcode decoding, and some opcodes are not even decodable. It is still interesting for debugging SSD issues to know what opcode is timing out, what request type it came from, and the data size (if applicable). Also print the command_id along side blk-mq's tag to help match commands with protocol wire traces and firmware logs, Reviewed-by: Max Gurtovoy <[email protected]> Signed-off-by: Keith Busch <[email protected]>
1 parent a7de1de commit 172fb49

File tree

1 file changed

+13
-10
lines changed

1 file changed

+13
-10
lines changed

drivers/nvme/host/pci.c

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1284,6 +1284,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
12841284
struct request *abort_req;
12851285
struct nvme_command cmd = { };
12861286
u32 csts = readl(dev->bar + NVME_REG_CSTS);
1287+
u8 opcode;
12871288

12881289
/* If PCI error recovery process is happening, we cannot reset or
12891290
* the recovery mechanism will surely fail.
@@ -1310,8 +1311,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
13101311

13111312
if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) {
13121313
dev_warn(dev->ctrl.device,
1313-
"I/O %d QID %d timeout, completion polled\n",
1314-
req->tag, nvmeq->qid);
1314+
"I/O tag %d (%04x) QID %d timeout, completion polled\n",
1315+
req->tag, nvme_cid(req), nvmeq->qid);
13151316
return BLK_EH_DONE;
13161317
}
13171318

@@ -1327,8 +1328,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
13271328
fallthrough;
13281329
case NVME_CTRL_DELETING:
13291330
dev_warn_ratelimited(dev->ctrl.device,
1330-
"I/O %d QID %d timeout, disable controller\n",
1331-
req->tag, nvmeq->qid);
1331+
"I/O tag %d (%04x) QID %d timeout, disable controller\n",
1332+
req->tag, nvme_cid(req), nvmeq->qid);
13321333
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
13331334
nvme_dev_disable(dev, true);
13341335
return BLK_EH_DONE;
@@ -1343,10 +1344,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
13431344
* command was already aborted once before and still hasn't been
13441345
* returned to the driver, or if this is the admin queue.
13451346
*/
1347+
opcode = nvme_req(req)->cmd->common.opcode;
13461348
if (!nvmeq->qid || iod->aborted) {
13471349
dev_warn(dev->ctrl.device,
1348-
"I/O %d QID %d timeout, reset controller\n",
1349-
req->tag, nvmeq->qid);
1350+
"I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n",
1351+
req->tag, nvme_cid(req), opcode,
1352+
nvme_opcode_str(nvmeq->qid, opcode, 0), nvmeq->qid);
13501353
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
13511354
goto disable;
13521355
}
@@ -1362,10 +1365,10 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
13621365
cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
13631366

13641367
dev_warn(nvmeq->dev->ctrl.device,
1365-
"I/O %d (%s) QID %d timeout, aborting\n",
1366-
req->tag,
1367-
nvme_get_opcode_str(nvme_req(req)->cmd->common.opcode),
1368-
nvmeq->qid);
1368+
"I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, aborting req_op:%s(%u) size:%u\n",
1369+
req->tag, nvme_cid(req), opcode, nvme_get_opcode_str(opcode),
1370+
nvmeq->qid, blk_op_str(req_op(req)), req_op(req),
1371+
blk_rq_bytes(req));
13691372

13701373
abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd),
13711374
BLK_MQ_REQ_NOWAIT);

0 commit comments

Comments
 (0)