|
| 1 | +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB |
| 2 | +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. |
| 3 | + |
| 4 | +#include "en.h" |
| 5 | +#include "pcie_cong_event.h" |
| 6 | + |
| 7 | +#define MLX5E_CONG_HIGH_STATE 0x7 |
| 8 | + |
| 9 | +enum { |
| 10 | + MLX5E_INBOUND_CONG = BIT(0), |
| 11 | + MLX5E_OUTBOUND_CONG = BIT(1), |
| 12 | +}; |
| 13 | + |
| 14 | +struct mlx5e_pcie_cong_thresh { |
| 15 | + u16 inbound_high; |
| 16 | + u16 inbound_low; |
| 17 | + u16 outbound_high; |
| 18 | + u16 outbound_low; |
| 19 | +}; |
| 20 | + |
| 21 | +struct mlx5e_pcie_cong_stats { |
| 22 | + u32 pci_bw_inbound_high; |
| 23 | + u32 pci_bw_inbound_low; |
| 24 | + u32 pci_bw_outbound_high; |
| 25 | + u32 pci_bw_outbound_low; |
| 26 | +}; |
| 27 | + |
| 28 | +struct mlx5e_pcie_cong_event { |
| 29 | + u64 obj_id; |
| 30 | + |
| 31 | + struct mlx5e_priv *priv; |
| 32 | + |
| 33 | + /* For event notifier and workqueue. */ |
| 34 | + struct work_struct work; |
| 35 | + struct mlx5_nb nb; |
| 36 | + |
| 37 | + /* Stores last read state. */ |
| 38 | + u8 state; |
| 39 | + |
| 40 | + /* For ethtool stats group. */ |
| 41 | + struct mlx5e_pcie_cong_stats stats; |
| 42 | +}; |
| 43 | + |
| 44 | +/* In units of 0.01 % */ |
| 45 | +static const struct mlx5e_pcie_cong_thresh default_thresh_config = { |
| 46 | + .inbound_high = 9000, |
| 47 | + .inbound_low = 7500, |
| 48 | + .outbound_high = 9000, |
| 49 | + .outbound_low = 7500, |
| 50 | +}; |
| 51 | + |
| 52 | +static const struct counter_desc mlx5e_pcie_cong_stats_desc[] = { |
| 53 | + { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats, |
| 54 | + pci_bw_inbound_high) }, |
| 55 | + { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats, |
| 56 | + pci_bw_inbound_low) }, |
| 57 | + { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats, |
| 58 | + pci_bw_outbound_high) }, |
| 59 | + { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats, |
| 60 | + pci_bw_outbound_low) }, |
| 61 | +}; |
| 62 | + |
| 63 | +#define NUM_PCIE_CONG_COUNTERS ARRAY_SIZE(mlx5e_pcie_cong_stats_desc) |
| 64 | + |
| 65 | +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pcie_cong) |
| 66 | +{ |
| 67 | + return priv->cong_event ? NUM_PCIE_CONG_COUNTERS : 0; |
| 68 | +} |
| 69 | + |
| 70 | +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pcie_cong) {} |
| 71 | + |
| 72 | +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pcie_cong) |
| 73 | +{ |
| 74 | + if (!priv->cong_event) |
| 75 | + return; |
| 76 | + |
| 77 | + for (int i = 0; i < NUM_PCIE_CONG_COUNTERS; i++) |
| 78 | + ethtool_puts(data, mlx5e_pcie_cong_stats_desc[i].format); |
| 79 | +} |
| 80 | + |
| 81 | +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pcie_cong) |
| 82 | +{ |
| 83 | + if (!priv->cong_event) |
| 84 | + return; |
| 85 | + |
| 86 | + for (int i = 0; i < NUM_PCIE_CONG_COUNTERS; i++) { |
| 87 | + u32 ctr = MLX5E_READ_CTR32_CPU(&priv->cong_event->stats, |
| 88 | + mlx5e_pcie_cong_stats_desc, |
| 89 | + i); |
| 90 | + |
| 91 | + mlx5e_ethtool_put_stat(data, ctr); |
| 92 | + } |
| 93 | +} |
| 94 | + |
| 95 | +MLX5E_DEFINE_STATS_GRP(pcie_cong, 0); |
| 96 | + |
| 97 | +static int |
| 98 | +mlx5_cmd_pcie_cong_event_set(struct mlx5_core_dev *dev, |
| 99 | + const struct mlx5e_pcie_cong_thresh *config, |
| 100 | + u64 *obj_id) |
| 101 | +{ |
| 102 | + u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {}; |
| 103 | + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; |
| 104 | + void *cong_obj; |
| 105 | + void *hdr; |
| 106 | + int err; |
| 107 | + |
| 108 | + hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr); |
| 109 | + cong_obj = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, cong_obj); |
| 110 | + |
| 111 | + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, |
| 112 | + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); |
| 113 | + |
| 114 | + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, |
| 115 | + MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT); |
| 116 | + |
| 117 | + MLX5_SET(pcie_cong_event_obj, cong_obj, inbound_event_en, 1); |
| 118 | + MLX5_SET(pcie_cong_event_obj, cong_obj, outbound_event_en, 1); |
| 119 | + |
| 120 | + MLX5_SET(pcie_cong_event_obj, cong_obj, |
| 121 | + inbound_cong_high_threshold, config->inbound_high); |
| 122 | + MLX5_SET(pcie_cong_event_obj, cong_obj, |
| 123 | + inbound_cong_low_threshold, config->inbound_low); |
| 124 | + |
| 125 | + MLX5_SET(pcie_cong_event_obj, cong_obj, |
| 126 | + outbound_cong_high_threshold, config->outbound_high); |
| 127 | + MLX5_SET(pcie_cong_event_obj, cong_obj, |
| 128 | + outbound_cong_low_threshold, config->outbound_low); |
| 129 | + |
| 130 | + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); |
| 131 | + if (err) |
| 132 | + return err; |
| 133 | + |
| 134 | + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); |
| 135 | + |
| 136 | + mlx5_core_dbg(dev, "PCIe congestion event (obj_id=%llu) created. Config: in: [%u, %u], out: [%u, %u]\n", |
| 137 | + *obj_id, |
| 138 | + config->inbound_high, config->inbound_low, |
| 139 | + config->outbound_high, config->outbound_low); |
| 140 | + |
| 141 | + return 0; |
| 142 | +} |
| 143 | + |
| 144 | +static int mlx5_cmd_pcie_cong_event_destroy(struct mlx5_core_dev *dev, |
| 145 | + u64 obj_id) |
| 146 | +{ |
| 147 | + u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {}; |
| 148 | + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; |
| 149 | + void *hdr; |
| 150 | + |
| 151 | + hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr); |
| 152 | + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, |
| 153 | + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); |
| 154 | + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, |
| 155 | + MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT); |
| 156 | + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_id, obj_id); |
| 157 | + |
| 158 | + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); |
| 159 | +} |
| 160 | + |
| 161 | +static int mlx5_cmd_pcie_cong_event_query(struct mlx5_core_dev *dev, |
| 162 | + u64 obj_id, |
| 163 | + u32 *state) |
| 164 | +{ |
| 165 | + u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {}; |
| 166 | + u32 out[MLX5_ST_SZ_DW(pcie_cong_event_cmd_out)]; |
| 167 | + void *obj; |
| 168 | + void *hdr; |
| 169 | + u8 cong; |
| 170 | + int err; |
| 171 | + |
| 172 | + hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr); |
| 173 | + |
| 174 | + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, |
| 175 | + MLX5_CMD_OP_QUERY_GENERAL_OBJECT); |
| 176 | + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, |
| 177 | + MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT); |
| 178 | + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_id, obj_id); |
| 179 | + |
| 180 | + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); |
| 181 | + if (err) |
| 182 | + return err; |
| 183 | + |
| 184 | + obj = MLX5_ADDR_OF(pcie_cong_event_cmd_out, out, cong_obj); |
| 185 | + |
| 186 | + if (state) { |
| 187 | + cong = MLX5_GET(pcie_cong_event_obj, obj, inbound_cong_state); |
| 188 | + if (cong == MLX5E_CONG_HIGH_STATE) |
| 189 | + *state |= MLX5E_INBOUND_CONG; |
| 190 | + |
| 191 | + cong = MLX5_GET(pcie_cong_event_obj, obj, outbound_cong_state); |
| 192 | + if (cong == MLX5E_CONG_HIGH_STATE) |
| 193 | + *state |= MLX5E_OUTBOUND_CONG; |
| 194 | + } |
| 195 | + |
| 196 | + return 0; |
| 197 | +} |
| 198 | + |
| 199 | +static void mlx5e_pcie_cong_event_work(struct work_struct *work) |
| 200 | +{ |
| 201 | + struct mlx5e_pcie_cong_event *cong_event; |
| 202 | + struct mlx5_core_dev *dev; |
| 203 | + struct mlx5e_priv *priv; |
| 204 | + u32 new_cong_state = 0; |
| 205 | + u32 changes; |
| 206 | + int err; |
| 207 | + |
| 208 | + cong_event = container_of(work, struct mlx5e_pcie_cong_event, work); |
| 209 | + priv = cong_event->priv; |
| 210 | + dev = priv->mdev; |
| 211 | + |
| 212 | + err = mlx5_cmd_pcie_cong_event_query(dev, cong_event->obj_id, |
| 213 | + &new_cong_state); |
| 214 | + if (err) { |
| 215 | + mlx5_core_warn(dev, "Error %d when querying PCIe cong event object (obj_id=%llu).\n", |
| 216 | + err, cong_event->obj_id); |
| 217 | + return; |
| 218 | + } |
| 219 | + |
| 220 | + changes = cong_event->state ^ new_cong_state; |
| 221 | + if (!changes) |
| 222 | + return; |
| 223 | + |
| 224 | + cong_event->state = new_cong_state; |
| 225 | + |
| 226 | + if (changes & MLX5E_INBOUND_CONG) { |
| 227 | + if (new_cong_state & MLX5E_INBOUND_CONG) |
| 228 | + cong_event->stats.pci_bw_inbound_high++; |
| 229 | + else |
| 230 | + cong_event->stats.pci_bw_inbound_low++; |
| 231 | + } |
| 232 | + |
| 233 | + if (changes & MLX5E_OUTBOUND_CONG) { |
| 234 | + if (new_cong_state & MLX5E_OUTBOUND_CONG) |
| 235 | + cong_event->stats.pci_bw_outbound_high++; |
| 236 | + else |
| 237 | + cong_event->stats.pci_bw_outbound_low++; |
| 238 | + } |
| 239 | +} |
| 240 | + |
| 241 | +static int mlx5e_pcie_cong_event_handler(struct notifier_block *nb, |
| 242 | + unsigned long event, void *eqe) |
| 243 | +{ |
| 244 | + struct mlx5e_pcie_cong_event *cong_event; |
| 245 | + |
| 246 | + cong_event = mlx5_nb_cof(nb, struct mlx5e_pcie_cong_event, nb); |
| 247 | + queue_work(cong_event->priv->wq, &cong_event->work); |
| 248 | + |
| 249 | + return NOTIFY_OK; |
| 250 | +} |
| 251 | + |
| 252 | +int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv) |
| 253 | +{ |
| 254 | + struct mlx5e_pcie_cong_event *cong_event; |
| 255 | + struct mlx5_core_dev *mdev = priv->mdev; |
| 256 | + int err; |
| 257 | + |
| 258 | + if (!mlx5_pcie_cong_event_supported(mdev)) |
| 259 | + return 0; |
| 260 | + |
| 261 | + cong_event = kvzalloc_node(sizeof(*cong_event), GFP_KERNEL, |
| 262 | + mdev->priv.numa_node); |
| 263 | + if (!cong_event) |
| 264 | + return -ENOMEM; |
| 265 | + |
| 266 | + INIT_WORK(&cong_event->work, mlx5e_pcie_cong_event_work); |
| 267 | + MLX5_NB_INIT(&cong_event->nb, mlx5e_pcie_cong_event_handler, |
| 268 | + OBJECT_CHANGE); |
| 269 | + |
| 270 | + cong_event->priv = priv; |
| 271 | + |
| 272 | + err = mlx5_cmd_pcie_cong_event_set(mdev, &default_thresh_config, |
| 273 | + &cong_event->obj_id); |
| 274 | + if (err) { |
| 275 | + mlx5_core_warn(mdev, "Error creating a PCIe congestion event object\n"); |
| 276 | + goto err_free; |
| 277 | + } |
| 278 | + |
| 279 | + err = mlx5_eq_notifier_register(mdev, &cong_event->nb); |
| 280 | + if (err) { |
| 281 | + mlx5_core_warn(mdev, "Error registering notifier for the PCIe congestion event\n"); |
| 282 | + goto err_obj_destroy; |
| 283 | + } |
| 284 | + |
| 285 | + priv->cong_event = cong_event; |
| 286 | + |
| 287 | + return 0; |
| 288 | + |
| 289 | +err_obj_destroy: |
| 290 | + mlx5_cmd_pcie_cong_event_destroy(mdev, cong_event->obj_id); |
| 291 | +err_free: |
| 292 | + kvfree(cong_event); |
| 293 | + |
| 294 | + return err; |
| 295 | +} |
| 296 | + |
| 297 | +void mlx5e_pcie_cong_event_cleanup(struct mlx5e_priv *priv) |
| 298 | +{ |
| 299 | + struct mlx5e_pcie_cong_event *cong_event = priv->cong_event; |
| 300 | + struct mlx5_core_dev *mdev = priv->mdev; |
| 301 | + |
| 302 | + if (!cong_event) |
| 303 | + return; |
| 304 | + |
| 305 | + priv->cong_event = NULL; |
| 306 | + |
| 307 | + mlx5_eq_notifier_unregister(mdev, &cong_event->nb); |
| 308 | + cancel_work_sync(&cong_event->work); |
| 309 | + |
| 310 | + if (mlx5_cmd_pcie_cong_event_destroy(mdev, cong_event->obj_id)) |
| 311 | + mlx5_core_warn(mdev, "Error destroying PCIe congestion event (obj_id=%llu)\n", |
| 312 | + cong_event->obj_id); |
| 313 | + |
| 314 | + kvfree(cong_event); |
| 315 | +} |
0 commit comments