Skip to content

Commit 6778a6b

Browse files
karstengrdavem330
authored andcommitted
net/smc: separate LLC wait queues for flow and messages
There might be races in scenarios where both SMC link groups are on the same system. Prevent that by creating separate wait queues for LLC flows and messages. Switch to non-interruptable versions of wait_event() and wake_up() for the llc flow waiter to make sure the waiters get control sequentially. Fine tune the llc_flow_lock to include the assignment of the message. Write to system log when an unexpected message was dropped. And remove an extra indirection and use the existing local variable lgr in smc_llc_enqueue(). Fixes: 555da9a ("net/smc: add event-based llc_flow framework") Reviewed-by: Ursula Braun <[email protected]> Signed-off-by: Karsten Graul <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent a42e6ae commit 6778a6b

File tree

3 files changed

+77
-48
lines changed

3 files changed

+77
-48
lines changed

net/smc/smc_core.c

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,8 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
247247
if (smc_link_usable(lnk))
248248
lnk->state = SMC_LNK_INACTIVE;
249249
}
250-
wake_up_interruptible_all(&lgr->llc_waiter);
250+
wake_up_all(&lgr->llc_msg_waiter);
251+
wake_up_all(&lgr->llc_flow_waiter);
251252
}
252253

253254
static void smc_lgr_free(struct smc_link_group *lgr);
@@ -1130,18 +1131,19 @@ static void smcr_link_up(struct smc_link_group *lgr,
11301131
return;
11311132
if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
11321133
/* some other llc task is ongoing */
1133-
wait_event_interruptible_timeout(lgr->llc_waiter,
1134-
(lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
1134+
wait_event_timeout(lgr->llc_flow_waiter,
1135+
(list_empty(&lgr->list) ||
1136+
lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
11351137
SMC_LLC_WAIT_TIME);
11361138
}
1137-
if (list_empty(&lgr->list) ||
1138-
!smc_ib_port_active(smcibdev, ibport))
1139-
return; /* lgr or device no longer active */
1140-
link = smc_llc_usable_link(lgr);
1141-
if (!link)
1142-
return;
1143-
smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid,
1144-
NULL, SMC_LLC_REQ);
1139+
/* lgr or device no longer active? */
1140+
if (!list_empty(&lgr->list) &&
1141+
smc_ib_port_active(smcibdev, ibport))
1142+
link = smc_llc_usable_link(lgr);
1143+
if (link)
1144+
smc_llc_send_add_link(link, smcibdev->mac[ibport - 1],
1145+
gid, NULL, SMC_LLC_REQ);
1146+
wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */
11451147
}
11461148
}
11471149

@@ -1195,13 +1197,17 @@ static void smcr_link_down(struct smc_link *lnk)
11951197
if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
11961198
/* another llc task is ongoing */
11971199
mutex_unlock(&lgr->llc_conf_mutex);
1198-
wait_event_interruptible_timeout(lgr->llc_waiter,
1199-
(lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
1200+
wait_event_timeout(lgr->llc_flow_waiter,
1201+
(list_empty(&lgr->list) ||
1202+
lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
12001203
SMC_LLC_WAIT_TIME);
12011204
mutex_lock(&lgr->llc_conf_mutex);
12021205
}
1203-
smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true,
1204-
SMC_LLC_DEL_LOST_PATH);
1206+
if (!list_empty(&lgr->list))
1207+
smc_llc_send_delete_link(to_lnk, del_link_id,
1208+
SMC_LLC_REQ, true,
1209+
SMC_LLC_DEL_LOST_PATH);
1210+
wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */
12051211
}
12061212
}
12071213

@@ -1262,7 +1268,7 @@ static void smc_link_down_work(struct work_struct *work)
12621268

12631269
if (list_empty(&lgr->list))
12641270
return;
1265-
wake_up_interruptible_all(&lgr->llc_waiter);
1271+
wake_up_all(&lgr->llc_msg_waiter);
12661272
mutex_lock(&lgr->llc_conf_mutex);
12671273
smcr_link_down(link);
12681274
mutex_unlock(&lgr->llc_conf_mutex);

net/smc/smc_core.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,10 @@ struct smc_link_group {
262262
struct work_struct llc_del_link_work;
263263
struct work_struct llc_event_work;
264264
/* llc event worker */
265-
wait_queue_head_t llc_waiter;
265+
wait_queue_head_t llc_flow_waiter;
266266
/* w4 next llc event */
267+
wait_queue_head_t llc_msg_waiter;
268+
/* w4 next llc msg */
267269
struct smc_llc_flow llc_flow_lcl;
268270
/* llc local control field */
269271
struct smc_llc_flow llc_flow_rmt;

net/smc/smc_llc.c

Lines changed: 52 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,26 @@ static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow,
186186
flow->qentry = qentry;
187187
}
188188

189+
static void smc_llc_flow_parallel(struct smc_link_group *lgr, u8 flow_type,
190+
struct smc_llc_qentry *qentry)
191+
{
192+
u8 msg_type = qentry->msg.raw.hdr.common.type;
193+
194+
if ((msg_type == SMC_LLC_ADD_LINK || msg_type == SMC_LLC_DELETE_LINK) &&
195+
flow_type != msg_type && !lgr->delayed_event) {
196+
lgr->delayed_event = qentry;
197+
return;
198+
}
199+
/* drop parallel or already-in-progress llc requests */
200+
if (flow_type != msg_type)
201+
pr_warn_once("smc: SMC-R lg %*phN dropped parallel "
202+
"LLC msg: msg %d flow %d role %d\n",
203+
SMC_LGR_ID_SIZE, &lgr->id,
204+
qentry->msg.raw.hdr.common.type,
205+
flow_type, lgr->role);
206+
kfree(qentry);
207+
}
208+
189209
/* try to start a new llc flow, initiated by an incoming llc msg */
190210
static bool smc_llc_flow_start(struct smc_llc_flow *flow,
191211
struct smc_llc_qentry *qentry)
@@ -195,14 +215,7 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow,
195215
spin_lock_bh(&lgr->llc_flow_lock);
196216
if (flow->type) {
197217
/* a flow is already active */
198-
if ((qentry->msg.raw.hdr.common.type == SMC_LLC_ADD_LINK ||
199-
qentry->msg.raw.hdr.common.type == SMC_LLC_DELETE_LINK) &&
200-
!lgr->delayed_event) {
201-
lgr->delayed_event = qentry;
202-
} else {
203-
/* forget this llc request */
204-
kfree(qentry);
205-
}
218+
smc_llc_flow_parallel(lgr, flow->type, qentry);
206219
spin_unlock_bh(&lgr->llc_flow_lock);
207220
return false;
208221
}
@@ -222,8 +235,8 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow,
222235
}
223236
if (qentry == lgr->delayed_event)
224237
lgr->delayed_event = NULL;
225-
spin_unlock_bh(&lgr->llc_flow_lock);
226238
smc_llc_flow_qentry_set(flow, qentry);
239+
spin_unlock_bh(&lgr->llc_flow_lock);
227240
return true;
228241
}
229242

@@ -251,11 +264,11 @@ int smc_llc_flow_initiate(struct smc_link_group *lgr,
251264
return 0;
252265
}
253266
spin_unlock_bh(&lgr->llc_flow_lock);
254-
rc = wait_event_interruptible_timeout(lgr->llc_waiter,
255-
(lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE &&
256-
(lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE ||
257-
lgr->llc_flow_rmt.type == allowed_remote)),
258-
SMC_LLC_WAIT_TIME);
267+
rc = wait_event_timeout(lgr->llc_flow_waiter, (list_empty(&lgr->list) ||
268+
(lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE &&
269+
(lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE ||
270+
lgr->llc_flow_rmt.type == allowed_remote))),
271+
SMC_LLC_WAIT_TIME * 10);
259272
if (!rc)
260273
return -ETIMEDOUT;
261274
goto again;
@@ -272,7 +285,7 @@ void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow)
272285
flow == &lgr->llc_flow_lcl)
273286
schedule_work(&lgr->llc_event_work);
274287
else
275-
wake_up_interruptible(&lgr->llc_waiter);
288+
wake_up(&lgr->llc_flow_waiter);
276289
}
277290

278291
/* lnk is optional and used for early wakeup when link goes down, useful in
@@ -283,26 +296,32 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
283296
int time_out, u8 exp_msg)
284297
{
285298
struct smc_llc_flow *flow = &lgr->llc_flow_lcl;
299+
u8 rcv_msg;
286300

287-
wait_event_interruptible_timeout(lgr->llc_waiter,
288-
(flow->qentry ||
289-
(lnk && !smc_link_usable(lnk)) ||
290-
list_empty(&lgr->list)),
291-
time_out);
301+
wait_event_timeout(lgr->llc_msg_waiter,
302+
(flow->qentry ||
303+
(lnk && !smc_link_usable(lnk)) ||
304+
list_empty(&lgr->list)),
305+
time_out);
292306
if (!flow->qentry ||
293307
(lnk && !smc_link_usable(lnk)) || list_empty(&lgr->list)) {
294308
smc_llc_flow_qentry_del(flow);
295309
goto out;
296310
}
297-
if (exp_msg && flow->qentry->msg.raw.hdr.common.type != exp_msg) {
311+
rcv_msg = flow->qentry->msg.raw.hdr.common.type;
312+
if (exp_msg && rcv_msg != exp_msg) {
298313
if (exp_msg == SMC_LLC_ADD_LINK &&
299-
flow->qentry->msg.raw.hdr.common.type ==
300-
SMC_LLC_DELETE_LINK) {
314+
rcv_msg == SMC_LLC_DELETE_LINK) {
301315
/* flow_start will delay the unexpected msg */
302316
smc_llc_flow_start(&lgr->llc_flow_lcl,
303317
smc_llc_flow_qentry_clr(flow));
304318
return NULL;
305319
}
320+
pr_warn_once("smc: SMC-R lg %*phN dropped unexpected LLC msg: "
321+
"msg %d exp %d flow %d role %d flags %x\n",
322+
SMC_LGR_ID_SIZE, &lgr->id, rcv_msg, exp_msg,
323+
flow->type, lgr->role,
324+
flow->qentry->msg.raw.hdr.flags);
306325
smc_llc_flow_qentry_del(flow);
307326
}
308327
out:
@@ -1459,7 +1478,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
14591478
/* a flow is waiting for this message */
14601479
smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
14611480
qentry);
1462-
wake_up_interruptible(&lgr->llc_waiter);
1481+
wake_up(&lgr->llc_msg_waiter);
14631482
} else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
14641483
qentry)) {
14651484
schedule_work(&lgr->llc_add_link_work);
@@ -1474,7 +1493,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
14741493
if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
14751494
/* a flow is waiting for this message */
14761495
smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry);
1477-
wake_up_interruptible(&lgr->llc_waiter);
1496+
wake_up(&lgr->llc_msg_waiter);
14781497
return;
14791498
}
14801499
break;
@@ -1485,7 +1504,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
14851504
/* DEL LINK REQ during ADD LINK SEQ */
14861505
smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
14871506
qentry);
1488-
wake_up_interruptible(&lgr->llc_waiter);
1507+
wake_up(&lgr->llc_msg_waiter);
14891508
} else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
14901509
qentry)) {
14911510
schedule_work(&lgr->llc_del_link_work);
@@ -1496,7 +1515,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
14961515
/* DEL LINK REQ during ADD LINK SEQ */
14971516
smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
14981517
qentry);
1499-
wake_up_interruptible(&lgr->llc_waiter);
1518+
wake_up(&lgr->llc_msg_waiter);
15001519
} else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
15011520
qentry)) {
15021521
schedule_work(&lgr->llc_del_link_work);
@@ -1581,7 +1600,7 @@ static void smc_llc_rx_response(struct smc_link *link,
15811600
case SMC_LLC_DELETE_RKEY:
15821601
/* assign responses to the local flow, we requested them */
15831602
smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry);
1584-
wake_up_interruptible(&link->lgr->llc_waiter);
1603+
wake_up(&link->lgr->llc_msg_waiter);
15851604
return;
15861605
case SMC_LLC_CONFIRM_RKEY_CONT:
15871606
/* not used because max links is 3 */
@@ -1616,7 +1635,7 @@ static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc)
16161635
spin_lock_irqsave(&lgr->llc_event_q_lock, flags);
16171636
list_add_tail(&qentry->list, &lgr->llc_event_q);
16181637
spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags);
1619-
schedule_work(&link->lgr->llc_event_work);
1638+
schedule_work(&lgr->llc_event_work);
16201639
}
16211640

16221641
/* copy received msg and add it to the event queue */
@@ -1677,7 +1696,8 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
16771696
INIT_LIST_HEAD(&lgr->llc_event_q);
16781697
spin_lock_init(&lgr->llc_event_q_lock);
16791698
spin_lock_init(&lgr->llc_flow_lock);
1680-
init_waitqueue_head(&lgr->llc_waiter);
1699+
init_waitqueue_head(&lgr->llc_flow_waiter);
1700+
init_waitqueue_head(&lgr->llc_msg_waiter);
16811701
mutex_init(&lgr->llc_conf_mutex);
16821702
lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
16831703
}
@@ -1686,7 +1706,8 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
16861706
void smc_llc_lgr_clear(struct smc_link_group *lgr)
16871707
{
16881708
smc_llc_event_flush(lgr);
1689-
wake_up_interruptible_all(&lgr->llc_waiter);
1709+
wake_up_all(&lgr->llc_flow_waiter);
1710+
wake_up_all(&lgr->llc_msg_waiter);
16901711
cancel_work_sync(&lgr->llc_event_work);
16911712
cancel_work_sync(&lgr->llc_add_link_work);
16921713
cancel_work_sync(&lgr->llc_del_link_work);

0 commit comments

Comments
 (0)