Skip to content

Commit 93fd8eb

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma fixes from Jason Gunthorpe: "This is an unusually large bunch of bug fixes for the later rc cycle, rxe and mlx5 both dumped a lot of things at once. rxe continues to fix itself, and mlx5 is fixing a bunch of "queue counters" related bugs. There is one highly notable bug fix regarding the qkey. This small security check was missed in the original 2005 implementation and it allows some significant issues. Summary: - Two rtrs bug fixes for error unwind bugs - Several rxe bug fixes: * Incorrect Rx packet validation * Using memory without a refcount * Syzkaller found use before initialization * Regression fix for missing locking with the tasklet conversion from this merge window - Have bnxt report the correct link properties to userspace, this was a regression in v6.3 - Several mlx5 bug fixes: * Kernel crash triggerable by userspace for the RAW ethernet profile * Defend against steering refcounting issues created by userspace * Incorrect change of QP port affinity parameters in some LAG configurations - Fix mlx5 Q counters: * Do not over allocate Q counters to allow userspace to use the full port capacity * Kernel crash triggered by eswitch due to mis-use of Q counters * Incorrect mlx5_device for Q counters in some LAG configurations - Properly implement the IBA spec restricting privileged qkeys to root - Always an error when reading from a disassociated device's event queue - isert bug fixes: * Avoid a deadlock with the CM handler and CM ID destruction * Correct list corruption due to incorrect locking * Fix a use after free around connection tear down" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: RDMA/rxe: Fix rxe_cq_post IB/isert: Fix incorrect release of isert connection IB/isert: Fix possible list corruption in CMA handler IB/isert: Fix dead lock in ib_isert RDMA/mlx5: Fix affinity assignment IB/uverbs: Fix to consider event queue closing also upon non-blocking mode RDMA/uverbs: Restrict usage of privileged QKEYs RDMA/cma: Always set static rate to 0 for RoCE RDMA/mlx5: Fix Q-counters query in LAG mode RDMA/mlx5: Remove vport Q-counters dependency on normal Q-counters RDMA/mlx5: Fix Q-counters per vport allocation RDMA/mlx5: Create an indirect flow table for steering anchor RDMA/mlx5: Initiate dropless RQ for RAW Ethernet functions RDMA/rxe: Fix the use-before-initialization error of resp_pkts RDMA/bnxt_re: Fix reporting active_{speed,width} attributes RDMA/rxe: Fix ref count error in check_rkey() RDMA/rxe: Fix packet length checks RDMA/rtrs: Fix rxe_dealloc_pd warning RDMA/rtrs: Fix the last iu->buf leak in err path
2 parents b7feaa4 + 0c7e314 commit 93fd8eb

File tree

22 files changed

+447
-130
lines changed

22 files changed

+447
-130
lines changed

drivers/infiniband/core/cma.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3295,7 +3295,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
32953295
route->path_rec->traffic_class = tos;
32963296
route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
32973297
route->path_rec->rate_selector = IB_SA_EQ;
3298-
route->path_rec->rate = iboe_get_rate(ndev);
3298+
route->path_rec->rate = IB_RATE_PORT_CURRENT;
32993299
dev_put(ndev);
33003300
route->path_rec->packet_life_time_selector = IB_SA_EQ;
33013301
/* In case ACK timeout is set, use this value to calculate
@@ -4964,7 +4964,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
49644964
if (!ndev)
49654965
return -ENODEV;
49664966

4967-
ib.rec.rate = iboe_get_rate(ndev);
4967+
ib.rec.rate = IB_RATE_PORT_CURRENT;
49684968
ib.rec.hop_limit = 1;
49694969
ib.rec.mtu = iboe_get_mtu(ndev->mtu);
49704970

drivers/infiniband/core/uverbs_cmd.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1850,8 +1850,13 @@ static int modify_qp(struct uverbs_attr_bundle *attrs,
18501850
attr->path_mtu = cmd->base.path_mtu;
18511851
if (cmd->base.attr_mask & IB_QP_PATH_MIG_STATE)
18521852
attr->path_mig_state = cmd->base.path_mig_state;
1853-
if (cmd->base.attr_mask & IB_QP_QKEY)
1853+
if (cmd->base.attr_mask & IB_QP_QKEY) {
1854+
if (cmd->base.qkey & IB_QP_SET_QKEY && !capable(CAP_NET_RAW)) {
1855+
ret = -EPERM;
1856+
goto release_qp;
1857+
}
18541858
attr->qkey = cmd->base.qkey;
1859+
}
18551860
if (cmd->base.attr_mask & IB_QP_RQ_PSN)
18561861
attr->rq_psn = cmd->base.rq_psn;
18571862
if (cmd->base.attr_mask & IB_QP_SQ_PSN)

drivers/infiniband/core/uverbs_main.c

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -222,8 +222,12 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
222222
spin_lock_irq(&ev_queue->lock);
223223

224224
while (list_empty(&ev_queue->event_list)) {
225-
spin_unlock_irq(&ev_queue->lock);
225+
if (ev_queue->is_closed) {
226+
spin_unlock_irq(&ev_queue->lock);
227+
return -EIO;
228+
}
226229

230+
spin_unlock_irq(&ev_queue->lock);
227231
if (filp->f_flags & O_NONBLOCK)
228232
return -EAGAIN;
229233

@@ -233,12 +237,6 @@ static ssize_t ib_uverbs_event_read(struct ib_uverbs_event_queue *ev_queue,
233237
return -ERESTARTSYS;
234238

235239
spin_lock_irq(&ev_queue->lock);
236-
237-
/* If device was disassociated and no event exists set an error */
238-
if (list_empty(&ev_queue->event_list) && ev_queue->is_closed) {
239-
spin_unlock_irq(&ev_queue->lock);
240-
return -EIO;
241-
}
242240
}
243241

244242
event = list_entry(ev_queue->event_list.next, struct ib_uverbs_event, list);

drivers/infiniband/hw/bnxt_re/bnxt_re.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,6 @@ struct bnxt_re_dev {
135135

136136
struct delayed_work worker;
137137
u8 cur_prio_map;
138-
u16 active_speed;
139-
u8 active_width;
140138

141139
/* FP Notification Queue (CQ & SRQ) */
142140
struct tasklet_struct nq_task;

drivers/infiniband/hw/bnxt_re/ib_verbs.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
199199
{
200200
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
201201
struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
202+
int rc;
202203

203204
memset(port_attr, 0, sizeof(*port_attr));
204205

@@ -228,10 +229,10 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num,
228229
port_attr->sm_sl = 0;
229230
port_attr->subnet_timeout = 0;
230231
port_attr->init_type_reply = 0;
231-
port_attr->active_speed = rdev->active_speed;
232-
port_attr->active_width = rdev->active_width;
232+
rc = ib_get_eth_speed(&rdev->ibdev, port_num, &port_attr->active_speed,
233+
&port_attr->active_width);
233234

234-
return 0;
235+
return rc;
235236
}
236237

237238
int bnxt_re_get_port_immutable(struct ib_device *ibdev, u32 port_num,

drivers/infiniband/hw/bnxt_re/main.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,8 +1077,6 @@ static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
10771077
return rc;
10781078
}
10791079
dev_info(rdev_to_dev(rdev), "Device registered with IB successfully");
1080-
ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
1081-
&rdev->active_width);
10821080
set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
10831081

10841082
event = netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev) ?

drivers/infiniband/hw/mlx5/counters.c

Lines changed: 62 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,8 @@ static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
209209
!vport_qcounters_supported(dev)) || !port_num)
210210
return &dev->port[0].cnts;
211211

212-
return &dev->port[port_num - 1].cnts;
212+
return is_mdev_switchdev_mode(dev->mdev) ?
213+
&dev->port[1].cnts : &dev->port[port_num - 1].cnts;
213214
}
214215

215216
/**
@@ -262,7 +263,7 @@ static struct rdma_hw_stats *
262263
mlx5_ib_alloc_hw_port_stats(struct ib_device *ibdev, u32 port_num)
263264
{
264265
struct mlx5_ib_dev *dev = to_mdev(ibdev);
265-
const struct mlx5_ib_counters *cnts = &dev->port[port_num - 1].cnts;
266+
const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
266267

267268
return do_alloc_stats(cnts);
268269
}
@@ -329,19 +330,24 @@ static int mlx5_ib_query_q_counters_vport(struct mlx5_ib_dev *dev,
329330
{
330331
u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
331332
u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
333+
struct mlx5_core_dev *mdev;
332334
__be32 val;
333335
int ret, i;
334336

335337
if (!dev->port[port_num].rep ||
336338
dev->port[port_num].rep->vport == MLX5_VPORT_UPLINK)
337339
return 0;
338340

341+
mdev = mlx5_eswitch_get_core_dev(dev->port[port_num].rep->esw);
342+
if (!mdev)
343+
return -EOPNOTSUPP;
344+
339345
MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
340346
MLX5_SET(query_q_counter_in, in, other_vport, 1);
341347
MLX5_SET(query_q_counter_in, in, vport_number,
342348
dev->port[port_num].rep->vport);
343349
MLX5_SET(query_q_counter_in, in, aggregate, 1);
344-
ret = mlx5_cmd_exec_inout(dev->mdev, query_q_counter, in, out);
350+
ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
345351
if (ret)
346352
return ret;
347353

@@ -575,43 +581,53 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
575581
bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
576582
port_num != MLX5_VPORT_PF;
577583
const struct mlx5_ib_counter *names;
578-
int j = 0, i;
584+
int j = 0, i, size;
579585

580586
names = is_vport ? vport_basic_q_cnts : basic_q_cnts;
581-
for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
587+
size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
588+
ARRAY_SIZE(basic_q_cnts);
589+
for (i = 0; i < size; i++, j++) {
582590
descs[j].name = names[i].name;
583-
offsets[j] = basic_q_cnts[i].offset;
591+
offsets[j] = names[i].offset;
584592
}
585593

586594
names = is_vport ? vport_out_of_seq_q_cnts : out_of_seq_q_cnts;
595+
size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
596+
ARRAY_SIZE(out_of_seq_q_cnts);
587597
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
588-
for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
598+
for (i = 0; i < size; i++, j++) {
589599
descs[j].name = names[i].name;
590-
offsets[j] = out_of_seq_q_cnts[i].offset;
600+
offsets[j] = names[i].offset;
591601
}
592602
}
593603

594604
names = is_vport ? vport_retrans_q_cnts : retrans_q_cnts;
605+
size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
606+
ARRAY_SIZE(retrans_q_cnts);
595607
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
596-
for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
608+
for (i = 0; i < size; i++, j++) {
597609
descs[j].name = names[i].name;
598-
offsets[j] = retrans_q_cnts[i].offset;
610+
offsets[j] = names[i].offset;
599611
}
600612
}
601613

602614
names = is_vport ? vport_extended_err_cnts : extended_err_cnts;
615+
size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
616+
ARRAY_SIZE(extended_err_cnts);
603617
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
604-
for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
618+
for (i = 0; i < size; i++, j++) {
605619
descs[j].name = names[i].name;
606-
offsets[j] = extended_err_cnts[i].offset;
620+
offsets[j] = names[i].offset;
607621
}
608622
}
609623

610624
names = is_vport ? vport_roce_accl_cnts : roce_accl_cnts;
625+
size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
626+
ARRAY_SIZE(roce_accl_cnts);
611627
if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
612-
for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
628+
for (i = 0; i < size; i++, j++) {
613629
descs[j].name = names[i].name;
614-
offsets[j] = roce_accl_cnts[i].offset;
630+
offsets[j] = names[i].offset;
615631
}
616632
}
617633

@@ -661,25 +677,37 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
661677
static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
662678
struct mlx5_ib_counters *cnts, u32 port_num)
663679
{
664-
u32 num_counters, num_op_counters = 0;
680+
bool is_vport = is_mdev_switchdev_mode(dev->mdev) &&
681+
port_num != MLX5_VPORT_PF;
682+
u32 num_counters, num_op_counters = 0, size;
665683

666-
num_counters = ARRAY_SIZE(basic_q_cnts);
684+
size = is_vport ? ARRAY_SIZE(vport_basic_q_cnts) :
685+
ARRAY_SIZE(basic_q_cnts);
686+
num_counters = size;
667687

688+
size = is_vport ? ARRAY_SIZE(vport_out_of_seq_q_cnts) :
689+
ARRAY_SIZE(out_of_seq_q_cnts);
668690
if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
669-
num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
691+
num_counters += size;
670692

693+
size = is_vport ? ARRAY_SIZE(vport_retrans_q_cnts) :
694+
ARRAY_SIZE(retrans_q_cnts);
671695
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
672-
num_counters += ARRAY_SIZE(retrans_q_cnts);
696+
num_counters += size;
673697

698+
size = is_vport ? ARRAY_SIZE(vport_extended_err_cnts) :
699+
ARRAY_SIZE(extended_err_cnts);
674700
if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
675-
num_counters += ARRAY_SIZE(extended_err_cnts);
701+
num_counters += size;
676702

703+
size = is_vport ? ARRAY_SIZE(vport_roce_accl_cnts) :
704+
ARRAY_SIZE(roce_accl_cnts);
677705
if (MLX5_CAP_GEN(dev->mdev, roce_accl))
678-
num_counters += ARRAY_SIZE(roce_accl_cnts);
706+
num_counters += size;
679707

680708
cnts->num_q_counters = num_counters;
681709

682-
if (is_mdev_switchdev_mode(dev->mdev) && port_num != MLX5_VPORT_PF)
710+
if (is_vport)
683711
goto skip_non_qcounters;
684712

685713
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
@@ -725,11 +753,11 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
725753
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
726754
{
727755
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
728-
int num_cnt_ports;
756+
int num_cnt_ports = dev->num_ports;
729757
int i, j;
730758

731-
num_cnt_ports = (!is_mdev_switchdev_mode(dev->mdev) ||
732-
vport_qcounters_supported(dev)) ? dev->num_ports : 1;
759+
if (is_mdev_switchdev_mode(dev->mdev))
760+
num_cnt_ports = min(2, num_cnt_ports);
733761

734762
MLX5_SET(dealloc_q_counter_in, in, opcode,
735763
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
@@ -761,15 +789,22 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
761789
{
762790
u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
763791
u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
764-
int num_cnt_ports;
792+
int num_cnt_ports = dev->num_ports;
765793
int err = 0;
766794
int i;
767795
bool is_shared;
768796

769797
MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
770798
is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
771-
num_cnt_ports = (!is_mdev_switchdev_mode(dev->mdev) ||
772-
vport_qcounters_supported(dev)) ? dev->num_ports : 1;
799+
800+
/*
801+
* In switchdev we need to allocate two ports, one that is used for
802+
* the device Q_counters and it is essentially the real Q_counters of
803+
* this device, while the other is used as a helper for PF to be able to
804+
* query all other vports.
805+
*/
806+
if (is_mdev_switchdev_mode(dev->mdev))
807+
num_cnt_ports = min(2, num_cnt_ports);
773808

774809
for (i = 0; i < num_cnt_ports; i++) {
775810
err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts, i);

0 commit comments

Comments
 (0)