Skip to content

Commit 210b1f6

Browse files
committed
nvme-pci: do not directly handle subsys reset fallout
Scheduling reset_work after a nvme subsystem reset is expected to fail on pcie, but this also prevents potential handling the platform's pcie services may provide that might successfully recovering the link without re-enumeration. Such examples include AER, DPC, and power's EEH. Provide a pci specific operation that safely initiates a subsystem reset, and instead of scheduling reset work, read back the status register to trigger a pcie read error. Since this only affects pci, the other fabrics drivers subscribe to a generic nvmf subsystem reset that is exactly the same as before. The loop fabric doesn't use it because nvmet doesn't support setting that property anyway. And since we're using the magic NSSR value in two places now, provide a symbolic define for it. Reported-by: Nilay Shroff <[email protected]> Reviewed-by: Christoph Hellwig <[email protected]> Signed-off-by: Keith Busch <[email protected]>
1 parent bf86e7d commit 210b1f6

File tree

8 files changed

+61
-11
lines changed

8 files changed

+61
-11
lines changed

drivers/nvme/host/fabrics.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,21 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
280280
}
281281
EXPORT_SYMBOL_GPL(nvmf_reg_write32);
282282

283+
int nvmf_subsystem_reset(struct nvme_ctrl *ctrl)
284+
{
285+
int ret;
286+
287+
if (!nvme_wait_reset(ctrl))
288+
return -EBUSY;
289+
290+
ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, NVME_SUBSYS_RESET);
291+
if (ret)
292+
return ret;
293+
294+
return nvme_try_sched_reset(ctrl);
295+
}
296+
EXPORT_SYMBOL_GPL(nvmf_subsystem_reset);
297+
283298
/**
284299
* nvmf_log_connect_error() - Error-parsing-diagnostic print out function for
285300
* connect() errors.

drivers/nvme/host/fabrics.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ static inline unsigned int nvmf_nr_io_queues(struct nvmf_ctrl_options *opts)
217217
int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
218218
int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
219219
int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
220+
int nvmf_subsystem_reset(struct nvme_ctrl *ctrl);
220221
int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl);
221222
int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
222223
int nvmf_register_transport(struct nvmf_transport_ops *ops);

drivers/nvme/host/fc.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3382,6 +3382,7 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
33823382
.reg_read32 = nvmf_reg_read32,
33833383
.reg_read64 = nvmf_reg_read64,
33843384
.reg_write32 = nvmf_reg_write32,
3385+
.subsystem_reset = nvmf_subsystem_reset,
33853386
.free_ctrl = nvme_fc_free_ctrl,
33863387
.submit_async_event = nvme_fc_submit_async_event,
33873388
.delete_ctrl = nvme_fc_delete_ctrl,

drivers/nvme/host/nvme.h

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,7 @@ struct nvme_ctrl_ops {
551551
int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
552552
void (*free_ctrl)(struct nvme_ctrl *ctrl);
553553
void (*submit_async_event)(struct nvme_ctrl *ctrl);
554+
int (*subsystem_reset)(struct nvme_ctrl *ctrl);
554555
void (*delete_ctrl)(struct nvme_ctrl *ctrl);
555556
void (*stop_ctrl)(struct nvme_ctrl *ctrl);
556557
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
@@ -649,18 +650,9 @@ int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
649650

650651
static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
651652
{
652-
int ret;
653-
654-
if (!ctrl->subsystem)
653+
if (!ctrl->subsystem || !ctrl->ops->subsystem_reset)
655654
return -ENOTTY;
656-
if (!nvme_wait_reset(ctrl))
657-
return -EBUSY;
658-
659-
ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65);
660-
if (ret)
661-
return ret;
662-
663-
return nvme_try_sched_reset(ctrl);
655+
return ctrl->ops->subsystem_reset(ctrl);
664656
}
665657

666658
/*

drivers/nvme/host/pci.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,6 +1143,41 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
11431143
spin_unlock(&nvmeq->sq_lock);
11441144
}
11451145

1146+
static int nvme_pci_subsystem_reset(struct nvme_ctrl *ctrl)
1147+
{
1148+
struct nvme_dev *dev = to_nvme_dev(ctrl);
1149+
int ret = 0;
1150+
1151+
/*
1152+
* Taking the shutdown_lock ensures the BAR mapping is not being
1153+
* altered by reset_work. Holding this lock before the RESETTING state
1154+
* change, if successful, also ensures nvme_remove won't be able to
1155+
* proceed to iounmap until we're done.
1156+
*/
1157+
mutex_lock(&dev->shutdown_lock);
1158+
if (!dev->bar_mapped_size) {
1159+
ret = -ENODEV;
1160+
goto unlock;
1161+
}
1162+
1163+
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) {
1164+
ret = -EBUSY;
1165+
goto unlock;
1166+
}
1167+
1168+
writel(NVME_SUBSYS_RESET, dev->bar + NVME_REG_NSSR);
1169+
nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE);
1170+
1171+
/*
1172+
* Read controller status to flush the previous write and trigger a
1173+
* pcie read error.
1174+
*/
1175+
readl(dev->bar + NVME_REG_CSTS);
1176+
unlock:
1177+
mutex_unlock(&dev->shutdown_lock);
1178+
return ret;
1179+
}
1180+
11461181
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
11471182
{
11481183
struct nvme_command c = { };
@@ -2859,6 +2894,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
28592894
.reg_read64 = nvme_pci_reg_read64,
28602895
.free_ctrl = nvme_pci_free_ctrl,
28612896
.submit_async_event = nvme_pci_submit_async_event,
2897+
.subsystem_reset = nvme_pci_subsystem_reset,
28622898
.get_address = nvme_pci_get_address,
28632899
.print_device_info = nvme_pci_print_device_info,
28642900
.supports_pci_p2pdma = nvme_pci_supports_pci_p2pdma,

drivers/nvme/host/rdma.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2201,6 +2201,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
22012201
.reg_read32 = nvmf_reg_read32,
22022202
.reg_read64 = nvmf_reg_read64,
22032203
.reg_write32 = nvmf_reg_write32,
2204+
.subsystem_reset = nvmf_subsystem_reset,
22042205
.free_ctrl = nvme_rdma_free_ctrl,
22052206
.submit_async_event = nvme_rdma_submit_async_event,
22062207
.delete_ctrl = nvme_rdma_delete_ctrl,

drivers/nvme/host/tcp.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2662,6 +2662,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
26622662
.reg_read32 = nvmf_reg_read32,
26632663
.reg_read64 = nvmf_reg_read64,
26642664
.reg_write32 = nvmf_reg_write32,
2665+
.subsystem_reset = nvmf_subsystem_reset,
26652666
.free_ctrl = nvme_tcp_free_ctrl,
26662667
.submit_async_event = nvme_tcp_submit_async_event,
26672668
.delete_ctrl = nvme_tcp_delete_ctrl,

include/linux/nvme.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525

2626
#define NVME_NSID_ALL 0xffffffff
2727

28+
/* Special NSSR value, 'NVMe' */
29+
#define NVME_SUBSYS_RESET 0x4E564D65
30+
2831
enum nvme_subsys_type {
2932
/* Referral to another discovery type target subsystem */
3033
NVME_NQN_DISC = 1,

0 commit comments

Comments
 (0)