Skip to content

Commit 4c2a2d0

Browse files
Quinn Tranmartinkpetersen
authored andcommitted
scsi: qla2xxx: Fix NVME cmd and LS cmd timeout race condition
This patch uses kref to protect access between fcp_abort path and nvme command and LS command completion path. Stack trace below shows the abort path is accessing stale memory (nvme_private->sp). When command kref reaches 0, nvme_private & srb resource will be disconnected from each other. Any subsequence nvme abort request will not be able to reference the original srb. [ 5631.003998] BUG: unable to handle kernel paging request at 00000010000005d8 [ 5631.004016] IP: [<ffffffffc087df92>] qla_nvme_abort_work+0x22/0x100 [qla2xxx] [ 5631.004086] Workqueue: events qla_nvme_abort_work [qla2xxx] [ 5631.004097] RIP: 0010:[<ffffffffc087df92>] [<ffffffffc087df92>] qla_nvme_abort_work+0x22/0x100 [qla2xxx] [ 5631.004109] Call Trace: [ 5631.004115] [<ffffffffaa4b8174>] ? pwq_dec_nr_in_flight+0x64/0xb0 [ 5631.004117] [<ffffffffaa4b9d4f>] process_one_work+0x17f/0x440 [ 5631.004120] [<ffffffffaa4bade6>] worker_thread+0x126/0x3c0 Signed-off-by: Quinn Tran <[email protected]> Signed-off-by: Himanshu Madhani <[email protected]> Signed-off-by: Martin K. Petersen <[email protected]>
1 parent 2eb9238 commit 4c2a2d0

File tree

3 files changed

+117
-50
lines changed

3 files changed

+117
-50
lines changed

drivers/scsi/qla2xxx/qla_def.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,8 @@ typedef struct srb {
532532
uint8_t cmd_type;
533533
uint8_t pad[3];
534534
atomic_t ref_count;
535+
struct kref cmd_kref; /* need to migrate ref_count over to this */
536+
void *priv;
535537
wait_queue_head_t nvme_ls_waitq;
536538
struct fc_port *fcport;
537539
struct scsi_qla_host *vha;
@@ -554,6 +556,7 @@ typedef struct srb {
554556
} u;
555557
void (*done)(void *, int);
556558
void (*free)(void *);
559+
void (*put_fn)(struct kref *kref);
557560
} srb_t;
558561

559562
#define GET_CMD_SP(sp) (sp->u.scmd.cmd)

drivers/scsi/qla2xxx/qla_nvme.c

Lines changed: 113 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -123,53 +123,91 @@ static int qla_nvme_alloc_queue(struct nvme_fc_local_port *lport,
123123
return 0;
124124
}
125125

126+
static void qla_nvme_release_fcp_cmd_kref(struct kref *kref)
127+
{
128+
struct srb *sp = container_of(kref, struct srb, cmd_kref);
129+
struct nvme_private *priv = (struct nvme_private *)sp->priv;
130+
struct nvmefc_fcp_req *fd;
131+
struct srb_iocb *nvme;
132+
unsigned long flags;
133+
134+
if (!priv)
135+
goto out;
136+
137+
nvme = &sp->u.iocb_cmd;
138+
fd = nvme->u.nvme.desc;
139+
140+
spin_lock_irqsave(&priv->cmd_lock, flags);
141+
priv->sp = NULL;
142+
sp->priv = NULL;
143+
if (priv->comp_status == QLA_SUCCESS) {
144+
fd->rcv_rsplen = nvme->u.nvme.rsp_pyld_len;
145+
} else {
146+
fd->rcv_rsplen = 0;
147+
fd->transferred_length = 0;
148+
}
149+
fd->status = 0;
150+
spin_unlock_irqrestore(&priv->cmd_lock, flags);
151+
152+
fd->done(fd);
153+
out:
154+
qla2xxx_rel_qpair_sp(sp->qpair, sp);
155+
}
156+
157+
static void qla_nvme_release_ls_cmd_kref(struct kref *kref)
158+
{
159+
struct srb *sp = container_of(kref, struct srb, cmd_kref);
160+
struct nvme_private *priv = (struct nvme_private *)sp->priv;
161+
struct nvmefc_ls_req *fd;
162+
unsigned long flags;
163+
164+
if (!priv)
165+
goto out;
166+
167+
spin_lock_irqsave(&priv->cmd_lock, flags);
168+
priv->sp = NULL;
169+
sp->priv = NULL;
170+
spin_unlock_irqrestore(&priv->cmd_lock, flags);
171+
172+
fd = priv->fd;
173+
fd->done(fd, priv->comp_status);
174+
out:
175+
qla2x00_rel_sp(sp);
176+
}
177+
178+
static void qla_nvme_ls_complete(struct work_struct *work)
179+
{
180+
struct nvme_private *priv =
181+
container_of(work, struct nvme_private, ls_work);
182+
183+
kref_put(&priv->sp->cmd_kref, qla_nvme_release_ls_cmd_kref);
184+
}
185+
126186
static void qla_nvme_sp_ls_done(void *ptr, int res)
127187
{
128188
srb_t *sp = ptr;
129-
struct srb_iocb *nvme;
130-
struct nvmefc_ls_req *fd;
131189
struct nvme_private *priv;
132190

133-
if (WARN_ON_ONCE(atomic_read(&sp->ref_count) == 0))
191+
if (WARN_ON_ONCE(kref_read(&sp->cmd_kref) == 0))
134192
return;
135193

136-
atomic_dec(&sp->ref_count);
137-
138194
if (res)
139195
res = -EINVAL;
140196

141-
nvme = &sp->u.iocb_cmd;
142-
fd = nvme->u.nvme.desc;
143-
priv = fd->private;
197+
priv = (struct nvme_private *)sp->priv;
144198
priv->comp_status = res;
199+
INIT_WORK(&priv->ls_work, qla_nvme_ls_complete);
145200
schedule_work(&priv->ls_work);
146-
/* work schedule doesn't need the sp */
147-
qla2x00_rel_sp(sp);
148201
}
149202

203+
/* it assumed that QPair lock is held. */
150204
static void qla_nvme_sp_done(void *ptr, int res)
151205
{
152206
srb_t *sp = ptr;
153-
struct srb_iocb *nvme;
154-
struct nvmefc_fcp_req *fd;
207+
struct nvme_private *priv = (struct nvme_private *)sp->priv;
155208

156-
nvme = &sp->u.iocb_cmd;
157-
fd = nvme->u.nvme.desc;
158-
159-
if (WARN_ON_ONCE(atomic_read(&sp->ref_count) == 0))
160-
return;
161-
162-
atomic_dec(&sp->ref_count);
163-
164-
if (res == QLA_SUCCESS) {
165-
fd->rcv_rsplen = nvme->u.nvme.rsp_pyld_len;
166-
} else {
167-
fd->rcv_rsplen = 0;
168-
fd->transferred_length = 0;
169-
}
170-
fd->status = 0;
171-
fd->done(fd);
172-
qla2xxx_rel_qpair_sp(sp->qpair, sp);
209+
priv->comp_status = res;
210+
kref_put(&sp->cmd_kref, qla_nvme_release_fcp_cmd_kref);
173211

174212
return;
175213
}
@@ -188,44 +226,50 @@ static void qla_nvme_abort_work(struct work_struct *work)
188226
__func__, sp, sp->handle, fcport, fcport->deleted);
189227

190228
if (!ha->flags.fw_started && (fcport && fcport->deleted))
191-
return;
229+
goto out;
192230

193231
if (ha->flags.host_shutting_down) {
194232
ql_log(ql_log_info, sp->fcport->vha, 0xffff,
195233
"%s Calling done on sp: %p, type: 0x%x, sp->ref_count: 0x%x\n",
196234
__func__, sp, sp->type, atomic_read(&sp->ref_count));
197235
sp->done(sp, 0);
198-
return;
236+
goto out;
199237
}
200238

201-
if (WARN_ON_ONCE(atomic_read(&sp->ref_count) == 0))
202-
return;
203-
204239
rval = ha->isp_ops->abort_command(sp);
205240

206241
ql_dbg(ql_dbg_io, fcport->vha, 0x212b,
207242
"%s: %s command for sp=%p, handle=%x on fcport=%p rval=%x\n",
208243
__func__, (rval != QLA_SUCCESS) ? "Failed to abort" : "Aborted",
209244
sp, sp->handle, fcport, rval);
245+
246+
out:
247+
/* kref_get was done before work was schedule. */
248+
kref_put(&sp->cmd_kref, sp->put_fn);
210249
}
211250

212251
static void qla_nvme_ls_abort(struct nvme_fc_local_port *lport,
213252
struct nvme_fc_remote_port *rport, struct nvmefc_ls_req *fd)
214253
{
215254
struct nvme_private *priv = fd->private;
255+
unsigned long flags;
256+
257+
spin_lock_irqsave(&priv->cmd_lock, flags);
258+
if (!priv->sp) {
259+
spin_unlock_irqrestore(&priv->cmd_lock, flags);
260+
return;
261+
}
262+
263+
if (!kref_get_unless_zero(&priv->sp->cmd_kref)) {
264+
spin_unlock_irqrestore(&priv->cmd_lock, flags);
265+
return;
266+
}
267+
spin_unlock_irqrestore(&priv->cmd_lock, flags);
216268

217269
INIT_WORK(&priv->abort_work, qla_nvme_abort_work);
218270
schedule_work(&priv->abort_work);
219271
}
220272

221-
static void qla_nvme_ls_complete(struct work_struct *work)
222-
{
223-
struct nvme_private *priv =
224-
container_of(work, struct nvme_private, ls_work);
225-
struct nvmefc_ls_req *fd = priv->fd;
226-
227-
fd->done(fd, priv->comp_status);
228-
}
229273

230274
static int qla_nvme_ls_req(struct nvme_fc_local_port *lport,
231275
struct nvme_fc_remote_port *rport, struct nvmefc_ls_req *fd)
@@ -257,11 +301,13 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport,
257301
sp->type = SRB_NVME_LS;
258302
sp->name = "nvme_ls";
259303
sp->done = qla_nvme_sp_ls_done;
260-
atomic_set(&sp->ref_count, 1);
261-
nvme = &sp->u.iocb_cmd;
304+
sp->put_fn = qla_nvme_release_ls_cmd_kref;
305+
sp->priv = (void *)priv;
262306
priv->sp = sp;
307+
kref_init(&sp->cmd_kref);
308+
spin_lock_init(&priv->cmd_lock);
309+
nvme = &sp->u.iocb_cmd;
263310
priv->fd = fd;
264-
INIT_WORK(&priv->ls_work, qla_nvme_ls_complete);
265311
nvme->u.nvme.desc = fd;
266312
nvme->u.nvme.dir = 0;
267313
nvme->u.nvme.dl = 0;
@@ -278,9 +324,10 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport,
278324
if (rval != QLA_SUCCESS) {
279325
ql_log(ql_log_warn, vha, 0x700e,
280326
"qla2x00_start_sp failed = %d\n", rval);
281-
atomic_dec(&sp->ref_count);
282327
wake_up(&sp->nvme_ls_waitq);
283-
sp->free(sp);
328+
sp->priv = NULL;
329+
priv->sp = NULL;
330+
qla2x00_rel_sp(sp);
284331
return rval;
285332
}
286333

@@ -292,6 +339,18 @@ static void qla_nvme_fcp_abort(struct nvme_fc_local_port *lport,
292339
struct nvmefc_fcp_req *fd)
293340
{
294341
struct nvme_private *priv = fd->private;
342+
unsigned long flags;
343+
344+
spin_lock_irqsave(&priv->cmd_lock, flags);
345+
if (!priv->sp) {
346+
spin_unlock_irqrestore(&priv->cmd_lock, flags);
347+
return;
348+
}
349+
if (!kref_get_unless_zero(&priv->sp->cmd_kref)) {
350+
spin_unlock_irqrestore(&priv->cmd_lock, flags);
351+
return;
352+
}
353+
spin_unlock_irqrestore(&priv->cmd_lock, flags);
295354

296355
INIT_WORK(&priv->abort_work, qla_nvme_abort_work);
297356
schedule_work(&priv->abort_work);
@@ -515,12 +574,15 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport,
515574
if (!sp)
516575
return -EBUSY;
517576

518-
atomic_set(&sp->ref_count, 1);
519577
init_waitqueue_head(&sp->nvme_ls_waitq);
578+
kref_init(&sp->cmd_kref);
579+
spin_lock_init(&priv->cmd_lock);
580+
sp->priv = (void *)priv;
520581
priv->sp = sp;
521582
sp->type = SRB_NVME_CMD;
522583
sp->name = "nvme_cmd";
523584
sp->done = qla_nvme_sp_done;
585+
sp->put_fn = qla_nvme_release_fcp_cmd_kref;
524586
sp->qpair = qpair;
525587
sp->vha = vha;
526588
nvme = &sp->u.iocb_cmd;
@@ -530,9 +592,10 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport,
530592
if (rval != QLA_SUCCESS) {
531593
ql_log(ql_log_warn, vha, 0x212d,
532594
"qla2x00_start_nvme_mq failed = %d\n", rval);
533-
atomic_dec(&sp->ref_count);
534595
wake_up(&sp->nvme_ls_waitq);
535-
sp->free(sp);
596+
sp->priv = NULL;
597+
priv->sp = NULL;
598+
qla2xxx_rel_qpair_sp(sp->qpair, sp);
536599
}
537600

538601
return rval;

drivers/scsi/qla2xxx/qla_nvme.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ struct nvme_private {
3434
struct work_struct ls_work;
3535
struct work_struct abort_work;
3636
int comp_status;
37+
spinlock_t cmd_lock;
3738
};
3839

3940
struct qla_nvme_rport {

0 commit comments

Comments
 (0)