Skip to content

Commit 92f3cb0

Browse files
Ursula Braundavem330
authored andcommitted
net/smc: fix sleep bug in smc_pnet_find_roce_resource()
Tests showed this BUG: [572555.252867] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:935 [572555.252876] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 131031, name: smcapp [572555.252879] INFO: lockdep is turned off. [572555.252883] CPU: 1 PID: 131031 Comm: smcapp Tainted: G O 5.7.0-rc3uschi+ #356 [572555.252885] Hardware name: IBM 3906 M03 703 (LPAR) [572555.252887] Call Trace: [572555.252896] [<00000000ac364554>] show_stack+0x94/0xe8 [572555.252901] [<00000000aca1f400>] dump_stack+0xa0/0xe0 [572555.252906] [<00000000ac3c8c10>] ___might_sleep+0x260/0x280 [572555.252910] [<00000000acdc0c98>] __mutex_lock+0x48/0x940 [572555.252912] [<00000000acdc15c2>] mutex_lock_nested+0x32/0x40 [572555.252975] [<000003ff801762d0>] mlx5_lag_get_roce_netdev+0x30/0xc0 [mlx5_core] [572555.252996] [<000003ff801fb3aa>] mlx5_ib_get_netdev+0x3a/0xe0 [mlx5_ib] [572555.253007] [<000003ff80063848>] smc_pnet_find_roce_resource+0x1d8/0x310 [smc] [572555.253011] [<000003ff800602f0>] __smc_connect+0x1f0/0x3e0 [smc] [572555.253015] [<000003ff80060634>] smc_connect+0x154/0x190 [smc] [572555.253022] [<00000000acbed8d4>] __sys_connect+0x94/0xd0 [572555.253025] [<00000000acbef620>] __s390x_sys_socketcall+0x170/0x360 [572555.253028] [<00000000acdc6800>] system_call+0x298/0x2b8 [572555.253030] INFO: lockdep is turned off. Function smc_pnet_find_rdma_dev() might be called from smc_pnet_find_roce_resource(). It holds the smc_ib_devices list spinlock while calling infiniband op get_netdev(). At least for mlx5 the get_netdev operation wants mutex serialization, which conflicts with the smc_ib_devices spinlock. This patch switches the smc_ib_devices spinlock into a mutex to allow sleeping when calling get_netdev(). Fixes: a4cf044 ("smc: introduce SMC as an IB-client") Signed-off-by: Ursula Braun <[email protected]> Signed-off-by: Karsten Graul <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent b7eede7 commit 92f3cb0

File tree

4 files changed

+22
-18
lines changed

4 files changed

+22
-18
lines changed

net/smc/smc_core.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <linux/workqueue.h>
1616
#include <linux/wait.h>
1717
#include <linux/reboot.h>
18+
#include <linux/mutex.h>
1819
#include <net/tcp.h>
1920
#include <net/sock.h>
2021
#include <rdma/ib_verbs.h>
@@ -1961,14 +1962,14 @@ static void smc_core_going_away(void)
19611962
struct smc_ib_device *smcibdev;
19621963
struct smcd_dev *smcd;
19631964

1964-
spin_lock(&smc_ib_devices.lock);
1965+
mutex_lock(&smc_ib_devices.mutex);
19651966
list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
19661967
int i;
19671968

19681969
for (i = 0; i < SMC_MAX_PORTS; i++)
19691970
set_bit(i, smcibdev->ports_going_away);
19701971
}
1971-
spin_unlock(&smc_ib_devices.lock);
1972+
mutex_unlock(&smc_ib_devices.mutex);
19721973

19731974
spin_lock(&smcd_dev_list.lock);
19741975
list_for_each_entry(smcd, &smcd_dev_list.list, list) {

net/smc/smc_ib.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <linux/workqueue.h>
1717
#include <linux/scatterlist.h>
1818
#include <linux/wait.h>
19+
#include <linux/mutex.h>
1920
#include <rdma/ib_verbs.h>
2021
#include <rdma/ib_cache.h>
2122

@@ -33,7 +34,7 @@
3334
#define SMC_QP_RNR_RETRY 7 /* 7: infinite */
3435

3536
struct smc_ib_devices smc_ib_devices = { /* smc-registered ib devices */
36-
.lock = __SPIN_LOCK_UNLOCKED(smc_ib_devices.lock),
37+
.mutex = __MUTEX_INITIALIZER(smc_ib_devices.mutex),
3738
.list = LIST_HEAD_INIT(smc_ib_devices.list),
3839
};
3940

@@ -565,9 +566,9 @@ static int smc_ib_add_dev(struct ib_device *ibdev)
565566
INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work);
566567
atomic_set(&smcibdev->lnk_cnt, 0);
567568
init_waitqueue_head(&smcibdev->lnks_deleted);
568-
spin_lock(&smc_ib_devices.lock);
569+
mutex_lock(&smc_ib_devices.mutex);
569570
list_add_tail(&smcibdev->list, &smc_ib_devices.list);
570-
spin_unlock(&smc_ib_devices.lock);
571+
mutex_unlock(&smc_ib_devices.mutex);
571572
ib_set_client_data(ibdev, &smc_ib_client, smcibdev);
572573
INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev,
573574
smc_ib_global_event_handler);
@@ -602,9 +603,9 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
602603
{
603604
struct smc_ib_device *smcibdev = client_data;
604605

605-
spin_lock(&smc_ib_devices.lock);
606+
mutex_lock(&smc_ib_devices.mutex);
606607
list_del_init(&smcibdev->list); /* remove from smc_ib_devices */
607-
spin_unlock(&smc_ib_devices.lock);
608+
mutex_unlock(&smc_ib_devices.mutex);
608609
pr_warn_ratelimited("smc: removing ib device %s\n",
609610
smcibdev->ibdev->name);
610611
smc_smcr_terminate_all(smcibdev);

net/smc/smc_ib.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include <linux/interrupt.h>
1616
#include <linux/if_ether.h>
17+
#include <linux/mutex.h>
1718
#include <linux/wait.h>
1819
#include <rdma/ib_verbs.h>
1920
#include <net/smc.h>
@@ -25,7 +26,7 @@
2526

2627
struct smc_ib_devices { /* list of smc ib devices definition */
2728
struct list_head list;
28-
spinlock_t lock; /* protects list of smc ib devices */
29+
struct mutex mutex; /* protects list of smc ib devices */
2930
};
3031

3132
extern struct smc_ib_devices smc_ib_devices; /* list of smc ib devices */

net/smc/smc_pnet.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <linux/module.h>
1313
#include <linux/list.h>
1414
#include <linux/ctype.h>
15+
#include <linux/mutex.h>
1516
#include <net/netlink.h>
1617
#include <net/genetlink.h>
1718

@@ -129,7 +130,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
129130
return rc;
130131

131132
/* remove ib devices */
132-
spin_lock(&smc_ib_devices.lock);
133+
mutex_lock(&smc_ib_devices.mutex);
133134
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
134135
for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
135136
if (ibdev->pnetid_by_user[ibport] &&
@@ -149,7 +150,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
149150
}
150151
}
151152
}
152-
spin_unlock(&smc_ib_devices.lock);
153+
mutex_unlock(&smc_ib_devices.mutex);
153154
/* remove smcd devices */
154155
spin_lock(&smcd_dev_list.lock);
155156
list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
@@ -240,14 +241,14 @@ static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port,
240241
u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
241242
bool applied = false;
242243

243-
spin_lock(&smc_ib_devices.lock);
244+
mutex_lock(&smc_ib_devices.mutex);
244245
if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
245246
memcpy(ib_dev->pnetid[ib_port - 1], pnet_name,
246247
SMC_MAX_PNETID_LEN);
247248
ib_dev->pnetid_by_user[ib_port - 1] = true;
248249
applied = true;
249250
}
250-
spin_unlock(&smc_ib_devices.lock);
251+
mutex_unlock(&smc_ib_devices.mutex);
251252
return applied;
252253
}
253254

@@ -300,7 +301,7 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
300301
{
301302
struct smc_ib_device *ibdev;
302303

303-
spin_lock(&smc_ib_devices.lock);
304+
mutex_lock(&smc_ib_devices.mutex);
304305
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
305306
if (!strncmp(ibdev->ibdev->name, ib_name,
306307
sizeof(ibdev->ibdev->name)) ||
@@ -311,7 +312,7 @@ static struct smc_ib_device *smc_pnet_find_ib(char *ib_name)
311312
}
312313
ibdev = NULL;
313314
out:
314-
spin_unlock(&smc_ib_devices.lock);
315+
mutex_unlock(&smc_ib_devices.mutex);
315316
return ibdev;
316317
}
317318

@@ -825,7 +826,7 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
825826
int i;
826827

827828
ini->ib_dev = NULL;
828-
spin_lock(&smc_ib_devices.lock);
829+
mutex_lock(&smc_ib_devices.mutex);
829830
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
830831
if (ibdev == known_dev)
831832
continue;
@@ -844,7 +845,7 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
844845
}
845846
}
846847
out:
847-
spin_unlock(&smc_ib_devices.lock);
848+
mutex_unlock(&smc_ib_devices.mutex);
848849
}
849850

850851
/* find alternate roce device with same pnet_id and vlan_id */
@@ -863,7 +864,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev,
863864
{
864865
struct smc_ib_device *ibdev;
865866

866-
spin_lock(&smc_ib_devices.lock);
867+
mutex_lock(&smc_ib_devices.mutex);
867868
list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
868869
struct net_device *ndev;
869870
int i;
@@ -888,7 +889,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev,
888889
}
889890
}
890891
}
891-
spin_unlock(&smc_ib_devices.lock);
892+
mutex_unlock(&smc_ib_devices.mutex);
892893
}
893894

894895
/* Determine the corresponding IB device port based on the hardware PNETID.

0 commit comments

Comments
 (0)