Skip to content

Commit eb11f02

Browse files
committed
Merge branch 'mlx5-misc-fixes-2025-09-28'
Tariq Toukan says: ==================== mlx5 misc fixes 2025-09-28 misc bug fixes from the team to the mlx5 core driver. ==================== Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 8169a60 + 5cfbe7e commit eb11f02

File tree

3 files changed

+34
-3
lines changed

3 files changed

+34
-3
lines changed

drivers/net/ethernet/mellanox/mlx5/core/cmd.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,10 @@ static void poll_timeout(struct mlx5_cmd_work_ent *ent)
294294
return;
295295
}
296296
cond_resched();
297+
if (mlx5_cmd_is_down(dev)) {
298+
ent->ret = -ENXIO;
299+
return;
300+
}
297301
} while (time_before(jiffies, poll_end));
298302

299303
ent->ret = -ETIMEDOUT;
@@ -1070,7 +1074,7 @@ static void cmd_work_handler(struct work_struct *work)
10701074
poll_timeout(ent);
10711075
/* make sure we read the descriptor after ownership is SW */
10721076
rmb();
1073-
mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, (ent->ret == -ETIMEDOUT));
1077+
mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, !!ent->ret);
10741078
}
10751079
}
10761080

drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ struct mlx5_fw_reset {
2727
struct work_struct reset_reload_work;
2828
struct work_struct reset_now_work;
2929
struct work_struct reset_abort_work;
30+
struct delayed_work reset_timeout_work;
3031
unsigned long reset_flags;
3132
u8 reset_method;
3233
struct timer_list timer;
@@ -259,6 +260,8 @@ static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool
259260
return -EALREADY;
260261
}
261262

263+
if (current_work() != &fw_reset->reset_timeout_work.work)
264+
cancel_delayed_work(&fw_reset->reset_timeout_work);
262265
mlx5_stop_sync_reset_poll(dev);
263266
if (poll_health)
264267
mlx5_start_health_poll(dev);
@@ -330,6 +333,11 @@ static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
330333
}
331334
mlx5_stop_health_poll(dev, true);
332335
mlx5_start_sync_reset_poll(dev);
336+
337+
if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS,
338+
&fw_reset->reset_flags))
339+
schedule_delayed_work(&fw_reset->reset_timeout_work,
340+
msecs_to_jiffies(mlx5_tout_ms(dev, PCI_SYNC_UPDATE)));
333341
return 0;
334342
}
335343

@@ -739,6 +747,19 @@ static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct
739747
}
740748
}
741749

750+
static void mlx5_sync_reset_timeout_work(struct work_struct *work)
751+
{
752+
struct delayed_work *dwork = container_of(work, struct delayed_work,
753+
work);
754+
struct mlx5_fw_reset *fw_reset =
755+
container_of(dwork, struct mlx5_fw_reset, reset_timeout_work);
756+
struct mlx5_core_dev *dev = fw_reset->dev;
757+
758+
if (mlx5_sync_reset_clear_reset_requested(dev, true))
759+
return;
760+
mlx5_core_warn(dev, "PCI Sync FW Update Reset Timeout.\n");
761+
}
762+
742763
static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long action, void *data)
743764
{
744765
struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb);
@@ -822,6 +843,7 @@ void mlx5_drain_fw_reset(struct mlx5_core_dev *dev)
822843
cancel_work_sync(&fw_reset->reset_reload_work);
823844
cancel_work_sync(&fw_reset->reset_now_work);
824845
cancel_work_sync(&fw_reset->reset_abort_work);
846+
cancel_delayed_work(&fw_reset->reset_timeout_work);
825847
}
826848

827849
static const struct devlink_param mlx5_fw_reset_devlink_params[] = {
@@ -865,6 +887,8 @@ int mlx5_fw_reset_init(struct mlx5_core_dev *dev)
865887
INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work);
866888
INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event);
867889
INIT_WORK(&fw_reset->reset_abort_work, mlx5_sync_reset_abort_event);
890+
INIT_DELAYED_WORK(&fw_reset->reset_timeout_work,
891+
mlx5_sync_reset_timeout_work);
868892

869893
init_completion(&fw_reset->done);
870894
return 0;

drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -489,9 +489,12 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
489489
u32 func_id;
490490
u32 npages;
491491
u32 i = 0;
492+
int err;
492493

493-
if (!mlx5_cmd_is_down(dev))
494-
return mlx5_cmd_do(dev, in, in_size, out, out_size);
494+
err = mlx5_cmd_do(dev, in, in_size, out, out_size);
495+
/* If FW is gone (-ENXIO), proceed to forceful reclaim */
496+
if (err != -ENXIO)
497+
return err;
495498

496499
/* No hard feelings, we want our pages back! */
497500
npages = MLX5_GET(manage_pages_in, in, input_num_entries);

0 commit comments

Comments
 (0)