Skip to content

Commit e80d655

Browse files
Shahar Shitritkuba-moo
authored andcommitted
net/mlx5e: Fix potential deadlock by deferring RX timeout recovery
mlx5e_reporter_rx_timeout() is currently invoked synchronously in the driver's open error flow. This causes the thread holding priv->state_lock to attempt acquiring the devlink lock, which can result in a circular dependency with other devlink operations. For example: - Devlink health diagnose flow: - __devlink_nl_pre_doit() acquires the devlink lock. - devlink_nl_health_reporter_diagnose_doit() invokes the driver's diagnose callback. - mlx5e_rx_reporter_diagnose() then attempts to acquire priv->state_lock. - Driver open flow: - mlx5e_open() acquires priv->state_lock. - If an error occurs, devlink_health_reporter may be called, attempting to acquire the devlink lock. To prevent this circular locking scenario, defer the RX timeout recovery by scheduling it via a workqueue. This ensures that the recovery work acquires locks in a consistent order: first the devlink lock, then priv->state_lock. Additionally, make the recovery work acquire the netdev instance lock to safely synchronize with the open/close channel flows, similar to mlx5e_tx_timeout_work. Repeatedly attempt to acquire the netdev instance lock until it is taken or the target RQ is no longer active, as indicated by the MLX5E_STATE_CHANNELS_ACTIVE bit. Fixes: 32c57fb ("net/mlx5e: Report and recover from rx timeout") Signed-off-by: Shahar Shitrit <[email protected]> Reviewed-by: Cosmin Ratiu <[email protected]> Reviewed-by: Dragos Tatulea <[email protected]> Signed-off-by: Tariq Toukan <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 6d19c44 commit e80d655

File tree

3 files changed

+33
-1
lines changed

3 files changed

+33
-1
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,7 @@ struct mlx5e_rq {
728728
struct xsk_buff_pool *xsk_pool;
729729

730730
struct work_struct recover_work;
731+
struct work_struct rx_timeout_work;
731732

732733
/* control */
733734
struct mlx5_wq_ctrl wq_ctrl;

drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,16 +170,23 @@ static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
170170
static int mlx5e_rx_reporter_timeout_recover(void *ctx)
171171
{
172172
struct mlx5_eq_comp *eq;
173+
struct mlx5e_priv *priv;
173174
struct mlx5e_rq *rq;
174175
int err;
175176

176177
rq = ctx;
178+
priv = rq->priv;
179+
180+
mutex_lock(&priv->state_lock);
181+
177182
eq = rq->cq.mcq.eq;
178183

179184
err = mlx5e_health_channel_eq_recover(rq->netdev, eq, rq->cq.ch_stats);
180185
if (err && rq->icosq)
181186
clear_bit(MLX5E_SQ_STATE_ENABLED, &rq->icosq->state);
182187

188+
mutex_unlock(&priv->state_lock);
189+
183190
return err;
184191
}
185192

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,27 @@ static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work)
707707
mlx5e_reporter_rq_cqe_err(rq);
708708
}
709709

710+
static void mlx5e_rq_timeout_work(struct work_struct *timeout_work)
711+
{
712+
struct mlx5e_rq *rq = container_of(timeout_work,
713+
struct mlx5e_rq,
714+
rx_timeout_work);
715+
716+
/* Acquire netdev instance lock to synchronize with channel close and
717+
* reopen flows. Either successfully obtain the lock, or detect that
718+
* channels are closing for another reason, making this work no longer
719+
* necessary.
720+
*/
721+
while (!netdev_trylock(rq->netdev)) {
722+
if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &rq->priv->state))
723+
return;
724+
msleep(20);
725+
}
726+
727+
mlx5e_reporter_rx_timeout(rq);
728+
netdev_unlock(rq->netdev);
729+
}
730+
710731
static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
711732
{
712733
rq->wqe_overflow.page = alloc_page(GFP_KERNEL);
@@ -830,6 +851,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
830851

831852
rqp->wq.db_numa_node = node;
832853
INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work);
854+
INIT_WORK(&rq->rx_timeout_work, mlx5e_rq_timeout_work);
833855

834856
if (params->xdp_prog)
835857
bpf_prog_inc(params->xdp_prog);
@@ -1204,7 +1226,8 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
12041226
netdev_warn(rq->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
12051227
rq->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
12061228

1207-
mlx5e_reporter_rx_timeout(rq);
1229+
queue_work(rq->priv->wq, &rq->rx_timeout_work);
1230+
12081231
return -ETIMEDOUT;
12091232
}
12101233

@@ -1375,6 +1398,7 @@ void mlx5e_close_rq(struct mlx5e_rq *rq)
13751398
if (rq->dim)
13761399
cancel_work_sync(&rq->dim->work);
13771400
cancel_work_sync(&rq->recover_work);
1401+
cancel_work_sync(&rq->rx_timeout_work);
13781402
mlx5e_destroy_rq(rq);
13791403
mlx5e_free_rx_descs(rq);
13801404
mlx5e_free_rq(rq);

0 commit comments

Comments
 (0)