Skip to content

Commit 7a8bccd

Browse files
Shifeng Lijgunthorpe
authored andcommitted
RDMA/device: Fix a race between mad_client and cm_client init
The mad_client will be initialized in enable_device_and_get(), while the devices_rwsem will be downgraded to a read semaphore. There is a window that leads to the failed initialization for cm_client, since it can not get matched mad port from ib_mad_port_list, and the matched mad port will be added to the list after that. mad_client | cm_client ------------------|-------------------------------------------------------- ib_register_device| enable_device_and_get down_write(&devices_rwsem) xa_set_mark(&devices, DEVICE_REGISTERED) downgrade_write(&devices_rwsem) | |ib_cm_init |ib_register_client(&cm_client) |down_read(&devices_rwsem) |xa_for_each_marked (&devices, DEVICE_REGISTERED) |add_client_context |cm_add_one |ib_register_mad_agent |ib_get_mad_port |__ib_get_mad_port |list_for_each_entry(entry, &ib_mad_port_list, port_list) |return NULL |up_read(&devices_rwsem) | add_client_context| ib_mad_init_device| ib_mad_port_open | list_add_tail(&port_priv->port_list, &ib_mad_port_list) up_read(&devices_rwsem) | Fix it by using down_write(&devices_rwsem) in ib_register_client(). Fixes: d089989 ("RDMA/device: Provide APIs from the core code to help unregistration") Link: https://lore.kernel.org/r/[email protected] Suggested-by: Jason Gunthorpe <[email protected]> Signed-off-by: Shifeng Li <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent d20a7cf commit 7a8bccd

File tree

1 file changed

+22
-15
lines changed

1 file changed

+22
-15
lines changed

drivers/infiniband/core/device.c

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1730,7 +1730,7 @@ static int assign_client_id(struct ib_client *client)
17301730
{
17311731
int ret;
17321732

1733-
down_write(&clients_rwsem);
1733+
lockdep_assert_held(&clients_rwsem);
17341734
/*
17351735
* The add/remove callbacks must be called in FIFO/LIFO order. To
17361736
* achieve this we assign client_ids so they are sorted in
@@ -1739,14 +1739,11 @@ static int assign_client_id(struct ib_client *client)
17391739
client->client_id = highest_client_id;
17401740
ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL);
17411741
if (ret)
1742-
goto out;
1742+
return ret;
17431743

17441744
highest_client_id++;
17451745
xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
1746-
1747-
out:
1748-
up_write(&clients_rwsem);
1749-
return ret;
1746+
return 0;
17501747
}
17511748

17521749
static void remove_client_id(struct ib_client *client)
@@ -1776,25 +1773,35 @@ int ib_register_client(struct ib_client *client)
17761773
{
17771774
struct ib_device *device;
17781775
unsigned long index;
1776+
bool need_unreg = false;
17791777
int ret;
17801778

17811779
refcount_set(&client->uses, 1);
17821780
init_completion(&client->uses_zero);
1781+
1782+
/*
1783+
* The devices_rwsem is held in write mode to ensure that a racing
1784+
* ib_register_device() sees a consisent view of clients and devices.
1785+
*/
1786+
down_write(&devices_rwsem);
1787+
down_write(&clients_rwsem);
17831788
ret = assign_client_id(client);
17841789
if (ret)
1785-
return ret;
1790+
goto out;
17861791

1787-
down_read(&devices_rwsem);
1792+
need_unreg = true;
17881793
xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) {
17891794
ret = add_client_context(device, client);
1790-
if (ret) {
1791-
up_read(&devices_rwsem);
1792-
ib_unregister_client(client);
1793-
return ret;
1794-
}
1795+
if (ret)
1796+
goto out;
17951797
}
1796-
up_read(&devices_rwsem);
1797-
return 0;
1798+
ret = 0;
1799+
out:
1800+
up_write(&clients_rwsem);
1801+
up_write(&devices_rwsem);
1802+
if (need_unreg && ret)
1803+
ib_unregister_client(client);
1804+
return ret;
17981805
}
17991806
EXPORT_SYMBOL(ib_register_client);
18001807

0 commit comments

Comments
 (0)