Skip to content

Commit 0f06228

Browse files
msanallakuba-moo
authored andcommitted
net/mlx5: Reload only IB representors upon lag disable/enable
On lag disable, the bond IB device along with all of its representors are destroyed, and then the slaves' representors get reloaded. In case the slave IB representor load fails, the eswitch error flow unloads all representors, including ethernet representors, where the netdevs get detached and removed from lag bond. Such flow is inaccurate as the lag driver is not responsible for loading/unloading ethernet representors. Furthermore, the flow described above begins by holding lag lock to prevent bond changes during disable flow. However, when reaching the ethernet representors detachment from lag, the lag lock is required again, triggering the following deadlock: Call trace: __switch_to+0xf4/0x148 __schedule+0x2c8/0x7d0 schedule+0x50/0xe0 schedule_preempt_disabled+0x18/0x28 __mutex_lock.isra.13+0x2b8/0x570 __mutex_lock_slowpath+0x1c/0x28 mutex_lock+0x4c/0x68 mlx5_lag_remove_netdev+0x3c/0x1a0 [mlx5_core] mlx5e_uplink_rep_disable+0x70/0xa0 [mlx5_core] mlx5e_detach_netdev+0x6c/0xb0 [mlx5_core] mlx5e_netdev_change_profile+0x44/0x138 [mlx5_core] mlx5e_netdev_attach_nic_profile+0x28/0x38 [mlx5_core] mlx5e_vport_rep_unload+0x184/0x1b8 [mlx5_core] mlx5_esw_offloads_rep_load+0xd8/0xe0 [mlx5_core] mlx5_eswitch_reload_reps+0x74/0xd0 [mlx5_core] mlx5_disable_lag+0x130/0x138 [mlx5_core] mlx5_lag_disable_change+0x6c/0x70 [mlx5_core] // hold ldev->lock mlx5_devlink_eswitch_mode_set+0xc0/0x410 [mlx5_core] devlink_nl_cmd_eswitch_set_doit+0xdc/0x180 genl_family_rcv_msg_doit.isra.17+0xe8/0x138 genl_rcv_msg+0xe4/0x220 netlink_rcv_skb+0x44/0x108 genl_rcv+0x40/0x58 netlink_unicast+0x198/0x268 netlink_sendmsg+0x1d4/0x418 sock_sendmsg+0x54/0x60 __sys_sendto+0xf4/0x120 __arm64_sys_sendto+0x30/0x40 el0_svc_common+0x8c/0x120 do_el0_svc+0x30/0xa0 el0_svc+0x20/0x30 el0_sync_handler+0x90/0xb8 el0_sync+0x160/0x180 Thus, upon lag enable/disable, load and unload only the IB representors of the slaves preventing the deadlock mentioned above. While at it, refactor the mlx5_esw_offloads_rep_load() function to have a static helper method for its internal logic, in symmetry with the representor unload design. Fixes: 598fe77 ("net/mlx5: Lag, Create shared FDB when in switchdev mode") Co-developed-by: Mark Bloch <[email protected]> Signed-off-by: Mark Bloch <[email protected]> Signed-off-by: Maher Sanalla <[email protected]> Signed-off-by: Tariq Toukan <[email protected]> Reviewed-by: Simon Horman <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 3c453e8 commit 0f06228

File tree

4 files changed

+25
-17
lines changed

4 files changed

+25
-17
lines changed

drivers/net/ethernet/mellanox/mlx5/core/eswitch.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -833,7 +833,7 @@ int mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw,
833833
struct mlx5_eswitch *slave_esw, int max_slaves);
834834
void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw,
835835
struct mlx5_eswitch *slave_esw);
836-
int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw);
836+
int mlx5_eswitch_reload_ib_reps(struct mlx5_eswitch *esw);
837837

838838
bool mlx5_eswitch_block_encap(struct mlx5_core_dev *dev);
839839
void mlx5_eswitch_unblock_encap(struct mlx5_core_dev *dev);
@@ -925,7 +925,7 @@ mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw,
925925
static inline int mlx5_eswitch_get_npeers(struct mlx5_eswitch *esw) { return 0; }
926926

927927
static inline int
928-
mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
928+
mlx5_eswitch_reload_ib_reps(struct mlx5_eswitch *esw)
929929
{
930930
return 0;
931931
}

drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2502,6 +2502,16 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw)
25022502
esw_offloads_cleanup_reps(esw);
25032503
}
25042504

2505+
static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
2506+
struct mlx5_eswitch_rep *rep, u8 rep_type)
2507+
{
2508+
if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
2509+
REP_REGISTERED, REP_LOADED) == REP_REGISTERED)
2510+
return esw->offloads.rep_ops[rep_type]->load(esw->dev, rep);
2511+
2512+
return 0;
2513+
}
2514+
25052515
static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
25062516
struct mlx5_eswitch_rep *rep, u8 rep_type)
25072517
{
@@ -2526,13 +2536,11 @@ static int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num)
25262536
int err;
25272537

25282538
rep = mlx5_eswitch_get_rep(esw, vport_num);
2529-
for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
2530-
if (atomic_cmpxchg(&rep->rep_data[rep_type].state,
2531-
REP_REGISTERED, REP_LOADED) == REP_REGISTERED) {
2532-
err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep);
2533-
if (err)
2534-
goto err_reps;
2535-
}
2539+
for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) {
2540+
err = __esw_offloads_load_rep(esw, rep, rep_type);
2541+
if (err)
2542+
goto err_reps;
2543+
}
25362544

25372545
return 0;
25382546

@@ -3277,7 +3285,7 @@ static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
32773285
esw_vport_destroy_offloads_acl_tables(esw, vport);
32783286
}
32793287

3280-
int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
3288+
int mlx5_eswitch_reload_ib_reps(struct mlx5_eswitch *esw)
32813289
{
32823290
struct mlx5_eswitch_rep *rep;
32833291
unsigned long i;
@@ -3290,13 +3298,13 @@ int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
32903298
if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED)
32913299
return 0;
32923300

3293-
ret = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK);
3301+
ret = __esw_offloads_load_rep(esw, rep, REP_IB);
32943302
if (ret)
32953303
return ret;
32963304

32973305
mlx5_esw_for_each_rep(esw, i, rep) {
32983306
if (atomic_read(&rep->rep_data[REP_ETH].state) == REP_LOADED)
3299-
mlx5_esw_offloads_rep_load(esw, rep->vport);
3307+
__esw_offloads_load_rep(esw, rep, REP_IB);
33003308
}
33013309

33023310
return 0;

drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -814,7 +814,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev)
814814
if (shared_fdb)
815815
for (i = 0; i < ldev->ports; i++)
816816
if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
817-
mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch);
817+
mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
818818
}
819819

820820
static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
@@ -922,7 +922,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
922922
mlx5_rescan_drivers_locked(dev0);
923923

924924
for (i = 0; i < ldev->ports; i++) {
925-
err = mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch);
925+
err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
926926
if (err)
927927
break;
928928
}
@@ -933,7 +933,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
933933
mlx5_deactivate_lag(ldev);
934934
mlx5_lag_add_devices(ldev);
935935
for (i = 0; i < ldev->ports; i++)
936-
mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch);
936+
mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
937937
mlx5_core_err(dev0, "Failed to enable lag\n");
938938
return;
939939
}

drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ static int enable_mpesw(struct mlx5_lag *ldev)
9999
dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
100100
mlx5_rescan_drivers_locked(dev0);
101101
for (i = 0; i < ldev->ports; i++) {
102-
err = mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch);
102+
err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
103103
if (err)
104104
goto err_rescan_drivers;
105105
}
@@ -113,7 +113,7 @@ static int enable_mpesw(struct mlx5_lag *ldev)
113113
err_add_devices:
114114
mlx5_lag_add_devices(ldev);
115115
for (i = 0; i < ldev->ports; i++)
116-
mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch);
116+
mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
117117
mlx5_mpesw_metadata_cleanup(ldev);
118118
return err;
119119
}

0 commit comments

Comments
 (0)