Skip to content

Commit 5cfbe7e

Browse files
mosheshemesh2kuba-moo
authored andcommitted
net/mlx5: fw reset, add reset timeout work
Add sync reset timeout to stop poll_sync_reset in case there was no reset done or abort event within timeout. Otherwise poll sync reset will just continue and in case of fw fatal error no health reporting will be done. Fixes: 38b9f90 ("net/mlx5: Handle sync reset request event") Signed-off-by: Moshe Shemesh <[email protected]> Reviewed-by: Shay Drori <[email protected]> Signed-off-by: Tariq Toukan <[email protected]> Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 79a0e32 commit 5cfbe7e

File tree

1 file changed

+24
-0
lines changed

1 file changed

+24
-0
lines changed

drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ struct mlx5_fw_reset {
2727
struct work_struct reset_reload_work;
2828
struct work_struct reset_now_work;
2929
struct work_struct reset_abort_work;
30+
struct delayed_work reset_timeout_work;
3031
unsigned long reset_flags;
3132
u8 reset_method;
3233
struct timer_list timer;
@@ -259,6 +260,8 @@ static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool
259260
return -EALREADY;
260261
}
261262

263+
if (current_work() != &fw_reset->reset_timeout_work.work)
264+
cancel_delayed_work(&fw_reset->reset_timeout_work);
262265
mlx5_stop_sync_reset_poll(dev);
263266
if (poll_health)
264267
mlx5_start_health_poll(dev);
@@ -330,6 +333,11 @@ static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
330333
}
331334
mlx5_stop_health_poll(dev, true);
332335
mlx5_start_sync_reset_poll(dev);
336+
337+
if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS,
338+
&fw_reset->reset_flags))
339+
schedule_delayed_work(&fw_reset->reset_timeout_work,
340+
msecs_to_jiffies(mlx5_tout_ms(dev, PCI_SYNC_UPDATE)));
333341
return 0;
334342
}
335343

@@ -739,6 +747,19 @@ static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct
739747
}
740748
}
741749

750+
static void mlx5_sync_reset_timeout_work(struct work_struct *work)
751+
{
752+
struct delayed_work *dwork = container_of(work, struct delayed_work,
753+
work);
754+
struct mlx5_fw_reset *fw_reset =
755+
container_of(dwork, struct mlx5_fw_reset, reset_timeout_work);
756+
struct mlx5_core_dev *dev = fw_reset->dev;
757+
758+
if (mlx5_sync_reset_clear_reset_requested(dev, true))
759+
return;
760+
mlx5_core_warn(dev, "PCI Sync FW Update Reset Timeout.\n");
761+
}
762+
742763
static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long action, void *data)
743764
{
744765
struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb);
@@ -822,6 +843,7 @@ void mlx5_drain_fw_reset(struct mlx5_core_dev *dev)
822843
cancel_work_sync(&fw_reset->reset_reload_work);
823844
cancel_work_sync(&fw_reset->reset_now_work);
824845
cancel_work_sync(&fw_reset->reset_abort_work);
846+
cancel_delayed_work(&fw_reset->reset_timeout_work);
825847
}
826848

827849
static const struct devlink_param mlx5_fw_reset_devlink_params[] = {
@@ -865,6 +887,8 @@ int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
865887
INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work);
866888
INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event);
867889
INIT_WORK(&fw_reset->reset_abort_work, mlx5_sync_reset_abort_event);
890+
INIT_DELAYED_WORK(&fw_reset->reset_timeout_work,
891+
mlx5_sync_reset_timeout_work);
868892

869893
init_completion(&fw_reset->done);
870894
return 0;

0 commit comments

Comments
 (0)