Skip to content

Commit 4049dc9

Browse files
igawkeithbusch
authored andcommitted
nvmet-fc: defer cleanup using RCU properly
When the target executes a disconnect and the host triggers a reconnect immediately, the reconnect command still finds an existing association. The reconnect crashes later on because nvmet_fc_delete_target_assoc blindly removes resources while the reconnect code wants to use it. To address this, nvmet_fc_find_target_assoc should not be able to lookup an association which is being removed. The association list is already under RCU lifetime management, so let's properly use it and remove the association from the list and wait for a grace period before cleaning up all. This means we also can drop the RCU management on the queues, because this is now handled via the association itself. A second step split the execution context so that the initial disconnect command can complete without running the reconnect code in the same context. As usual, this is done by deferring the ->done to a workqueue. Reviewed-by: Christoph Hellwig <[email protected]> Reviewed-by: Hannes Reinecke <[email protected]> Signed-off-by: Daniel Wagner <[email protected]> Signed-off-by: Keith Busch <[email protected]>
1 parent c691e6d commit 4049dc9

File tree

1 file changed

+37
-46
lines changed
  • drivers/nvme/target

1 file changed

+37
-46
lines changed

drivers/nvme/target/fc.c

Lines changed: 37 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ struct nvmet_fc_tgt_assoc {
166166
struct nvmet_fc_hostport *hostport;
167167
struct nvmet_fc_ls_iod *rcv_disconn;
168168
struct list_head a_list;
169-
struct nvmet_fc_tgt_queue __rcu *queues[NVMET_NR_QUEUES + 1];
169+
struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1];
170170
struct kref ref;
171171
struct work_struct del_work;
172172
struct rcu_head rcu;
@@ -803,14 +803,11 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
803803
if (!queue)
804804
return NULL;
805805

806-
if (!nvmet_fc_tgt_a_get(assoc))
807-
goto out_free_queue;
808-
809806
queue->work_q = alloc_workqueue("ntfc%d.%d.%d", 0, 0,
810807
assoc->tgtport->fc_target_port.port_num,
811808
assoc->a_id, qid);
812809
if (!queue->work_q)
813-
goto out_a_put;
810+
goto out_free_queue;
814811

815812
queue->qid = qid;
816813
queue->sqsize = sqsize;
@@ -832,15 +829,13 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
832829
goto out_fail_iodlist;
833830

834831
WARN_ON(assoc->queues[qid]);
835-
rcu_assign_pointer(assoc->queues[qid], queue);
832+
assoc->queues[qid] = queue;
836833

837834
return queue;
838835

839836
out_fail_iodlist:
840837
nvmet_fc_destroy_fcp_iodlist(assoc->tgtport, queue);
841838
destroy_workqueue(queue->work_q);
842-
out_a_put:
843-
nvmet_fc_tgt_a_put(assoc);
844839
out_free_queue:
845840
kfree(queue);
846841
return NULL;
@@ -853,12 +848,8 @@ nvmet_fc_tgt_queue_free(struct kref *ref)
853848
struct nvmet_fc_tgt_queue *queue =
854849
container_of(ref, struct nvmet_fc_tgt_queue, ref);
855850

856-
rcu_assign_pointer(queue->assoc->queues[queue->qid], NULL);
857-
858851
nvmet_fc_destroy_fcp_iodlist(queue->assoc->tgtport, queue);
859852

860-
nvmet_fc_tgt_a_put(queue->assoc);
861-
862853
destroy_workqueue(queue->work_q);
863854

864855
kfree_rcu(queue, rcu);
@@ -970,7 +961,7 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport,
970961
rcu_read_lock();
971962
list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) {
972963
if (association_id == assoc->association_id) {
973-
queue = rcu_dereference(assoc->queues[qid]);
964+
queue = assoc->queues[qid];
974965
if (queue &&
975966
(!atomic_read(&queue->connected) ||
976967
!nvmet_fc_tgt_q_get(queue)))
@@ -1173,13 +1164,18 @@ nvmet_fc_target_assoc_free(struct kref *ref)
11731164
struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
11741165
struct nvmet_fc_ls_iod *oldls;
11751166
unsigned long flags;
1167+
int i;
1168+
1169+
for (i = NVMET_NR_QUEUES; i >= 0; i--) {
1170+
if (assoc->queues[i])
1171+
nvmet_fc_delete_target_queue(assoc->queues[i]);
1172+
}
11761173

11771174
/* Send Disconnect now that all i/o has completed */
11781175
nvmet_fc_xmt_disconnect_assoc(assoc);
11791176

11801177
nvmet_fc_free_hostport(assoc->hostport);
11811178
spin_lock_irqsave(&tgtport->lock, flags);
1182-
list_del_rcu(&assoc->a_list);
11831179
oldls = assoc->rcv_disconn;
11841180
spin_unlock_irqrestore(&tgtport->lock, flags);
11851181
/* if pending Rcv Disconnect Association LS, send rsp now */
@@ -1209,7 +1205,7 @@ static void
12091205
nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
12101206
{
12111207
struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
1212-
struct nvmet_fc_tgt_queue *queue;
1208+
unsigned long flags;
12131209
int i, terminating;
12141210

12151211
terminating = atomic_xchg(&assoc->terminating, 1);
@@ -1218,29 +1214,21 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
12181214
if (terminating)
12191215
return;
12201216

1217+
spin_lock_irqsave(&tgtport->lock, flags);
1218+
list_del_rcu(&assoc->a_list);
1219+
spin_unlock_irqrestore(&tgtport->lock, flags);
12211220

1222-
for (i = NVMET_NR_QUEUES; i >= 0; i--) {
1223-
rcu_read_lock();
1224-
queue = rcu_dereference(assoc->queues[i]);
1225-
if (!queue) {
1226-
rcu_read_unlock();
1227-
continue;
1228-
}
1221+
synchronize_rcu();
12291222

1230-
if (!nvmet_fc_tgt_q_get(queue)) {
1231-
rcu_read_unlock();
1232-
continue;
1233-
}
1234-
rcu_read_unlock();
1235-
nvmet_fc_delete_target_queue(queue);
1236-
nvmet_fc_tgt_q_put(queue);
1223+
/* ensure all in-flight I/Os have been processed */
1224+
for (i = NVMET_NR_QUEUES; i >= 0; i--) {
1225+
if (assoc->queues[i])
1226+
flush_workqueue(assoc->queues[i]->work_q);
12371227
}
12381228

12391229
dev_info(tgtport->dev,
12401230
"{%d:%d} Association deleted\n",
12411231
tgtport->fc_target_port.port_num, assoc->a_id);
1242-
1243-
nvmet_fc_tgt_a_put(assoc);
12441232
}
12451233

12461234
static struct nvmet_fc_tgt_assoc *
@@ -1493,9 +1481,8 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport)
14931481
list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) {
14941482
if (!nvmet_fc_tgt_a_get(assoc))
14951483
continue;
1496-
if (!queue_work(nvmet_wq, &assoc->del_work))
1497-
/* already deleting - release local reference */
1498-
nvmet_fc_tgt_a_put(assoc);
1484+
queue_work(nvmet_wq, &assoc->del_work);
1485+
nvmet_fc_tgt_a_put(assoc);
14991486
}
15001487
rcu_read_unlock();
15011488
}
@@ -1548,9 +1535,8 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port,
15481535
continue;
15491536
assoc->hostport->invalid = 1;
15501537
noassoc = false;
1551-
if (!queue_work(nvmet_wq, &assoc->del_work))
1552-
/* already deleting - release local reference */
1553-
nvmet_fc_tgt_a_put(assoc);
1538+
queue_work(nvmet_wq, &assoc->del_work);
1539+
nvmet_fc_tgt_a_put(assoc);
15541540
}
15551541
spin_unlock_irqrestore(&tgtport->lock, flags);
15561542

@@ -1582,7 +1568,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
15821568

15831569
rcu_read_lock();
15841570
list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) {
1585-
queue = rcu_dereference(assoc->queues[0]);
1571+
queue = assoc->queues[0];
15861572
if (queue && queue->nvme_sq.ctrl == ctrl) {
15871573
if (nvmet_fc_tgt_a_get(assoc))
15881574
found_ctrl = true;
@@ -1594,9 +1580,8 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
15941580
nvmet_fc_tgtport_put(tgtport);
15951581

15961582
if (found_ctrl) {
1597-
if (!queue_work(nvmet_wq, &assoc->del_work))
1598-
/* already deleting - release local reference */
1599-
nvmet_fc_tgt_a_put(assoc);
1583+
queue_work(nvmet_wq, &assoc->del_work);
1584+
nvmet_fc_tgt_a_put(assoc);
16001585
return;
16011586
}
16021587

@@ -1626,6 +1611,8 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port)
16261611
/* terminate any outstanding associations */
16271612
__nvmet_fc_free_assocs(tgtport);
16281613

1614+
flush_workqueue(nvmet_wq);
1615+
16291616
/*
16301617
* should terminate LS's as well. However, LS's will be generated
16311618
* at the tail end of association termination, so they likely don't
@@ -1871,9 +1858,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
18711858
sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
18721859
FCNVME_LS_DISCONNECT_ASSOC);
18731860

1874-
/* release get taken in nvmet_fc_find_target_assoc */
1875-
nvmet_fc_tgt_a_put(assoc);
1876-
18771861
/*
18781862
* The rules for LS response says the response cannot
18791863
* go back until ABTS's have been sent for all outstanding
@@ -1888,8 +1872,6 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
18881872
assoc->rcv_disconn = iod;
18891873
spin_unlock_irqrestore(&tgtport->lock, flags);
18901874

1891-
nvmet_fc_delete_target_assoc(assoc);
1892-
18931875
if (oldls) {
18941876
dev_info(tgtport->dev,
18951877
"{%d:%d} Multiple Disconnect Association LS's "
@@ -1905,6 +1887,9 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
19051887
nvmet_fc_xmt_ls_rsp(tgtport, oldls);
19061888
}
19071889

1890+
queue_work(nvmet_wq, &assoc->del_work);
1891+
nvmet_fc_tgt_a_put(assoc);
1892+
19081893
return false;
19091894
}
19101895

@@ -2903,6 +2888,9 @@ nvmet_fc_remove_port(struct nvmet_port *port)
29032888

29042889
nvmet_fc_portentry_unbind(pe);
29052890

2891+
/* terminate any outstanding associations */
2892+
__nvmet_fc_free_assocs(pe->tgtport);
2893+
29062894
kfree(pe);
29072895
}
29082896

@@ -2934,6 +2922,9 @@ static int __init nvmet_fc_init_module(void)
29342922

29352923
static void __exit nvmet_fc_exit_module(void)
29362924
{
2925+
/* ensure any shutdown operation, e.g. delete ctrls have finished */
2926+
flush_workqueue(nvmet_wq);
2927+
29372928
/* sanity check - all lports should be removed */
29382929
if (!list_empty(&nvmet_fc_target_list))
29392930
pr_warn("%s: targetport list not empty\n", __func__);

0 commit comments

Comments
 (0)