Skip to content

Commit 9ca05b0

Browse files
msanallarleon
authored andcommitted
RDMA/mlx5: Rely on RoCE fw cap instead of devlink when setting profile
When the RDMA auxiliary driver probes, it sets its profile based on devlink driverinit value. The latter might not be in sync with FW yet (In case devlink reload is not performed), thus causing a mismatch between RDMA driver and FW. This results in the following FW syndrome when the RDMA driver tries to adjust RoCE state, which fails the probe: "0xC1F678 | modify_nic_vport_context: roce_en set on a vport that doesn't support roce" To prevent this, select the PF profile based on FW RoCE capability instead of relying on devlink driverinit value. To provide backward compatibility of the RoCE disable feature, on older FW's where roce_rw is not set (FW RoCE capability is read-only), keep the current behavior e.g., rely on devlink driverinit value. Fixes: fbfa97b ("net/mlx5: Disable roce at HCA level") Reviewed-by: Shay Drory <[email protected]> Reviewed-by: Michael Guralnik <[email protected]> Reviewed-by: Saeed Mahameed <[email protected]> Signed-off-by: Maher Sanalla <[email protected]> Link: https://lore.kernel.org/r/cb34ce9a1df4a24c135cb804db87f7d2418bd6cc.1661763459.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky <[email protected]>
1 parent 85eaeb5 commit 9ca05b0

File tree

3 files changed

+32
-12
lines changed

3 files changed

+32
-12
lines changed

drivers/infiniband/hw/mlx5/main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4336,7 +4336,7 @@ static int mlx5r_probe(struct auxiliary_device *adev,
43364336
dev->mdev = mdev;
43374337
dev->num_ports = num_ports;
43384338

4339-
if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_is_roce_init_enabled(mdev))
4339+
if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_get_roce_state(mdev))
43404340
profile = &raw_eth_profile;
43414341
else
43424342
profile = &pf_profile;

drivers/net/ethernet/mellanox/mlx5/core/main.c

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,24 @@ static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev)
494494
return err;
495495
}
496496

497+
bool mlx5_is_roce_on(struct mlx5_core_dev *dev)
498+
{
499+
struct devlink *devlink = priv_to_devlink(dev);
500+
union devlink_param_value val;
501+
int err;
502+
503+
err = devlink_param_driverinit_value_get(devlink,
504+
DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
505+
&val);
506+
507+
if (!err)
508+
return val.vbool;
509+
510+
mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err);
511+
return MLX5_CAP_GEN(dev, roce);
512+
}
513+
EXPORT_SYMBOL(mlx5_is_roce_on);
514+
497515
static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx)
498516
{
499517
void *set_hca_cap;
@@ -597,7 +615,8 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
597615
MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix));
598616

599617
if (MLX5_CAP_GEN(dev, roce_rw_supported))
600-
MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_init_enabled(dev));
618+
MLX5_SET(cmd_hca_cap, set_hca_cap, roce,
619+
mlx5_is_roce_on(dev));
601620

602621
max_uc_list = max_uc_list_get_devlink_param(dev);
603622
if (max_uc_list > 0)
@@ -623,7 +642,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
623642
*/
624643
static bool is_roce_fw_disabled(struct mlx5_core_dev *dev)
625644
{
626-
return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_init_enabled(dev)) ||
645+
return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_on(dev)) ||
627646
(!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce));
628647
}
629648

include/linux/mlx5/driver.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1279,16 +1279,17 @@ enum {
12791279
MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
12801280
};
12811281

1282-
static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev)
1282+
bool mlx5_is_roce_on(struct mlx5_core_dev *dev);
1283+
1284+
static inline bool mlx5_get_roce_state(struct mlx5_core_dev *dev)
12831285
{
1284-
struct devlink *devlink = priv_to_devlink(dev);
1285-
union devlink_param_value val;
1286-
int err;
1287-
1288-
err = devlink_param_driverinit_value_get(devlink,
1289-
DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
1290-
&val);
1291-
return err ? MLX5_CAP_GEN(dev, roce) : val.vbool;
1286+
if (MLX5_CAP_GEN(dev, roce_rw_supported))
1287+
return MLX5_CAP_GEN(dev, roce);
1288+
1289+
/* If RoCE cap is read-only in FW, get RoCE state from devlink
1290+
* in order to support RoCE enable/disable feature
1291+
*/
1292+
return mlx5_is_roce_on(dev);
12921293
}
12931294

12941295
#endif /* MLX5_DRIVER_H */

0 commit comments

Comments
 (0)