Skip to content

Commit 7ce4de1

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: - Various minor code cleanups and fixes for hns, iser, cxgb4, hfi1, rxe, erdma, mana_ib - Prefetch supprot for rxe ODP - Remove memory window support from hns as new device FW is no longer support it - Remove qib, it is very old and obsolete now, Cornelis wishes to restructure the hfi1/qib shared layer - Fix a race in destroying CQs where we can still end up with work running because the work is cancled before the driver stops triggering it - Improve interaction with namespaces: * Follow the devlink namespace for newly spawned RDMA devices * Create iopoib net devces in the parent IB device's namespace * Allow CAP_NET_RAW checks to pass in user namespaces - A new flow control scheme for IB MADs to try and avoid queue overflows in the network - Fix 2G message sizes in bnxt_re - Optimize mkey layout for mlx5 DMABUF - New "DMA Handle" concept to allow controlling PCI TPH and steering tags * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (71 commits) RDMA/siw: Change maintainer email address RDMA/mana_ib: add support of multiple ports RDMA/mlx5: Refactor optional counters steering code RDMA/mlx5: Add DMAH support for reg_user_mr/reg_user_dmabuf_mr IB: Extend UVERBS_METHOD_REG_MR to get DMAH RDMA/mlx5: Add DMAH object support RDMA/core: Introduce a DMAH object and its alloc/free APIs IB/core: Add UVERBS_METHOD_REG_MR on the MR object net/mlx5: Add support for device steering tag net/mlx5: Expose IFC bits for TPH PCI/TPH: Expose pcie_tph_get_st_table_size() RDMA/mlx5: Fix incorrect MKEY masking RDMA/mlx5: Fix returned type from _mlx5r_umr_zap_mkey() RDMA/mlx5: remove redundant check on err on return expression RDMA/mana_ib: add additional port counters RDMA/mana_ib: Fix DSCP value in modify QP RDMA/efa: Add CQ with external memory support RDMA/core: Add umem "is_contiguous" and "start_dma_addr" helpers RDMA/uverbs: Add a common way to create CQ with umem RDMA/mlx5: Optimize DMABUF mkey page size ...
2 parents 2c8c9aa + ee23592 commit 7ce4de1

File tree

153 files changed

+2869
-49167
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

153 files changed

+2869
-49167
lines changed

.mailmap

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ Benjamin Poirier <[email protected]> <[email protected]>
138138
Benjamin Tissoires <[email protected]> <[email protected]>
139139
Benjamin Tissoires <[email protected]> <[email protected]>
140140
141+
141142
142143
143144

MAINTAINERS

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20370,12 +20370,6 @@ S: Maintained
2037020370
F: drivers/firmware/qemu_fw_cfg.c
2037120371
F: include/uapi/linux/qemu_fw_cfg.h
2037220372

20373-
QIB DRIVER
20374-
M: Dennis Dalessandro <[email protected]>
20375-
20376-
S: Supported
20377-
F: drivers/infiniband/hw/qib/
20378-
2037920373
QLOGIC QL41xxx FCOE DRIVER
2038020374
M: Saurav Kashyap <[email protected]>
2038120375
M: Javed Hasan <[email protected]>
@@ -23222,7 +23216,7 @@ S: Maintained
2322223216
F: drivers/leds/leds-net48xx.c
2322323217

2322423218
SOFT-IWARP DRIVER (siw)
23225-
M: Bernard Metzler <[email protected]>
23219+
M: Bernard Metzler <[email protected]>
2322623220
2322723221
S: Supported
2322823222
F: drivers/infiniband/sw/siw/

drivers/infiniband/Kconfig

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ source "drivers/infiniband/hw/mlx5/Kconfig"
9292
source "drivers/infiniband/hw/mthca/Kconfig"
9393
source "drivers/infiniband/hw/ocrdma/Kconfig"
9494
source "drivers/infiniband/hw/qedr/Kconfig"
95-
source "drivers/infiniband/hw/qib/Kconfig"
9695
source "drivers/infiniband/hw/usnic/Kconfig"
9796
source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
9897
source "drivers/infiniband/sw/rdmavt/Kconfig"

drivers/infiniband/core/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ ib_umad-y := user_mad.o
3333
ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
3434
rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
3535
uverbs_std_types_cq.o \
36+
uverbs_std_types_dmah.o \
3637
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
3738
uverbs_std_types_mr.o uverbs_std_types_counters.o \
3839
uverbs_uapi.o uverbs_std_types_device.o \

drivers/infiniband/core/cm.c

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ struct cm_counter_attribute {
161161
struct cm_port {
162162
struct cm_device *cm_dev;
163163
struct ib_mad_agent *mad_agent;
164+
struct ib_mad_agent *rep_agent;
164165
u32 port_num;
165166
atomic_long_t counters[CM_COUNTER_GROUPS][CM_ATTR_COUNT];
166167
};
@@ -274,7 +275,8 @@ static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
274275
complete(&cm_id_priv->comp);
275276
}
276277

277-
static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
278+
static struct ib_mad_send_buf *
279+
cm_alloc_msg_agent(struct cm_id_private *cm_id_priv, bool rep_agent)
278280
{
279281
struct ib_mad_agent *mad_agent;
280282
struct ib_mad_send_buf *m;
@@ -286,7 +288,8 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
286288
return ERR_PTR(-EINVAL);
287289

288290
read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
289-
mad_agent = cm_id_priv->av.port->mad_agent;
291+
mad_agent = rep_agent ? cm_id_priv->av.port->rep_agent :
292+
cm_id_priv->av.port->mad_agent;
290293
if (!mad_agent) {
291294
m = ERR_PTR(-EINVAL);
292295
goto out;
@@ -315,6 +318,11 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
315318
return m;
316319
}
317320

321+
static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
322+
{
323+
return cm_alloc_msg_agent(cm_id_priv, false);
324+
}
325+
318326
static void cm_free_msg(struct ib_mad_send_buf *msg)
319327
{
320328
if (msg->ah)
@@ -323,13 +331,14 @@ static void cm_free_msg(struct ib_mad_send_buf *msg)
323331
}
324332

325333
static struct ib_mad_send_buf *
326-
cm_alloc_priv_msg(struct cm_id_private *cm_id_priv, enum ib_cm_state state)
334+
cm_alloc_priv_msg_rep(struct cm_id_private *cm_id_priv, enum ib_cm_state state,
335+
bool rep_agent)
327336
{
328337
struct ib_mad_send_buf *msg;
329338

330339
lockdep_assert_held(&cm_id_priv->lock);
331340

332-
msg = cm_alloc_msg(cm_id_priv);
341+
msg = cm_alloc_msg_agent(cm_id_priv, rep_agent);
333342
if (IS_ERR(msg))
334343
return msg;
335344

@@ -344,6 +353,12 @@ cm_alloc_priv_msg(struct cm_id_private *cm_id_priv, enum ib_cm_state state)
344353
return msg;
345354
}
346355

356+
static struct ib_mad_send_buf *
357+
cm_alloc_priv_msg(struct cm_id_private *cm_id_priv, enum ib_cm_state state)
358+
{
359+
return cm_alloc_priv_msg_rep(cm_id_priv, state, false);
360+
}
361+
347362
static void cm_free_priv_msg(struct ib_mad_send_buf *msg)
348363
{
349364
struct cm_id_private *cm_id_priv = msg->context[0];
@@ -2295,7 +2310,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
22952310
goto out;
22962311
}
22972312

2298-
msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_REP_SENT);
2313+
msg = cm_alloc_priv_msg_rep(cm_id_priv, IB_CM_REP_SENT, true);
22992314
if (IS_ERR(msg)) {
23002315
ret = PTR_ERR(msg);
23012316
goto out;
@@ -4380,9 +4395,22 @@ static int cm_add_one(struct ib_device *ib_device)
43804395
goto error2;
43814396
}
43824397

4398+
port->rep_agent = ib_register_mad_agent(ib_device, i,
4399+
IB_QPT_GSI,
4400+
NULL,
4401+
0,
4402+
cm_send_handler,
4403+
NULL,
4404+
port,
4405+
0);
4406+
if (IS_ERR(port->rep_agent)) {
4407+
ret = PTR_ERR(port->rep_agent);
4408+
goto error3;
4409+
}
4410+
43834411
ret = ib_modify_port(ib_device, i, 0, &port_modify);
43844412
if (ret)
4385-
goto error3;
4413+
goto error4;
43864414

43874415
count++;
43884416
}
@@ -4397,6 +4425,8 @@ static int cm_add_one(struct ib_device *ib_device)
43974425
write_unlock_irqrestore(&cm.device_lock, flags);
43984426
return 0;
43994427

4428+
error4:
4429+
ib_unregister_mad_agent(port->rep_agent);
44004430
error3:
44014431
ib_unregister_mad_agent(port->mad_agent);
44024432
error2:
@@ -4410,6 +4440,7 @@ static int cm_add_one(struct ib_device *ib_device)
44104440

44114441
port = cm_dev->port[i-1];
44124442
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4443+
ib_unregister_mad_agent(port->rep_agent);
44134444
ib_unregister_mad_agent(port->mad_agent);
44144445
ib_port_unregister_client_groups(ib_device, i,
44154446
cm_counter_groups);
@@ -4439,12 +4470,14 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
44394470

44404471
rdma_for_each_port (ib_device, i) {
44414472
struct ib_mad_agent *mad_agent;
4473+
struct ib_mad_agent *rep_agent;
44424474

44434475
if (!rdma_cap_ib_cm(ib_device, i))
44444476
continue;
44454477

44464478
port = cm_dev->port[i-1];
44474479
mad_agent = port->mad_agent;
4480+
rep_agent = port->rep_agent;
44484481
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
44494482
/*
44504483
* We flush the queue here after the going_down set, this
@@ -4458,8 +4491,10 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
44584491
*/
44594492
write_lock(&cm_dev->mad_agent_lock);
44604493
port->mad_agent = NULL;
4494+
port->rep_agent = NULL;
44614495
write_unlock(&cm_dev->mad_agent_lock);
44624496
ib_unregister_mad_agent(mad_agent);
4497+
ib_unregister_mad_agent(rep_agent);
44634498
ib_port_unregister_client_groups(ib_device, i,
44644499
cm_counter_groups);
44654500
}

drivers/infiniband/core/counters.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ static struct ib_qp *rdma_counter_get_qp(struct ib_device *dev, u32 qp_num)
461461
return NULL;
462462

463463
qp = container_of(res, struct ib_qp, res);
464-
if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
464+
if (qp->qp_type == IB_QPT_RAW_PACKET && !rdma_dev_has_raw_cap(dev))
465465
goto err;
466466

467467
return qp;

drivers/infiniband/core/cq.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -317,13 +317,18 @@ EXPORT_SYMBOL(__ib_alloc_cq_any);
317317
*/
318318
void ib_free_cq(struct ib_cq *cq)
319319
{
320-
int ret;
320+
int ret = 0;
321321

322322
if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
323323
return;
324324
if (WARN_ON_ONCE(cq->cqe_used))
325325
return;
326326

327+
if (cq->device->ops.pre_destroy_cq) {
328+
ret = cq->device->ops.pre_destroy_cq(cq);
329+
WARN_ONCE(ret, "Disable of kernel CQ shouldn't fail");
330+
}
331+
327332
switch (cq->poll_ctx) {
328333
case IB_POLL_DIRECT:
329334
break;
@@ -340,7 +345,10 @@ void ib_free_cq(struct ib_cq *cq)
340345

341346
rdma_dim_destroy(cq);
342347
trace_cq_free(cq);
343-
ret = cq->device->ops.destroy_cq(cq, NULL);
348+
if (cq->device->ops.post_destroy_cq)
349+
cq->device->ops.post_destroy_cq(cq);
350+
else
351+
ret = cq->device->ops.destroy_cq(cq, NULL);
344352
WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail");
345353
rdma_restrack_del(&cq->res);
346354
kfree(cq->wc);

drivers/infiniband/core/device.c

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,33 @@ bool rdma_dev_access_netns(const struct ib_device *dev, const struct net *net)
145145
}
146146
EXPORT_SYMBOL(rdma_dev_access_netns);
147147

148+
/**
149+
* rdma_dev_has_raw_cap() - Returns whether a specified rdma device has
150+
* CAP_NET_RAW capability or not.
151+
*
152+
* @dev: Pointer to rdma device whose capability to be checked
153+
*
154+
* Returns true if a rdma device's owning user namespace has CAP_NET_RAW
155+
* capability, otherwise false. When rdma subsystem is in legacy shared network,
156+
* namespace mode, the default net namespace is considered.
157+
*/
158+
bool rdma_dev_has_raw_cap(const struct ib_device *dev)
159+
{
160+
const struct net *net;
161+
162+
/* Network namespace is the resource whose user namespace
163+
* to be considered. When in shared mode, there is no reliable
164+
* network namespace resource, so consider the default net namespace.
165+
*/
166+
if (ib_devices_shared_netns)
167+
net = &init_net;
168+
else
169+
net = read_pnet(&dev->coredev.rdma_net);
170+
171+
return ns_capable(net->user_ns, CAP_NET_RAW);
172+
}
173+
EXPORT_SYMBOL(rdma_dev_has_raw_cap);
174+
148175
/*
149176
* xarray has this behavior where it won't iterate over NULL values stored in
150177
* allocated arrays. So we need our own iterator to see all values stored in
@@ -557,14 +584,16 @@ static void rdma_init_coredev(struct ib_core_device *coredev,
557584
/**
558585
* _ib_alloc_device - allocate an IB device struct
559586
* @size:size of structure to allocate
587+
* @net: network namespace device should be located in, namespace
588+
* must stay valid until ib_register_device() is completed.
560589
*
561590
* Low-level drivers should use ib_alloc_device() to allocate &struct
562591
* ib_device. @size is the size of the structure to be allocated,
563592
* including any private data used by the low-level driver.
564593
* ib_dealloc_device() must be used to free structures allocated with
565594
* ib_alloc_device().
566595
*/
567-
struct ib_device *_ib_alloc_device(size_t size)
596+
struct ib_device *_ib_alloc_device(size_t size, struct net *net)
568597
{
569598
struct ib_device *device;
570599
unsigned int i;
@@ -581,7 +610,15 @@ struct ib_device *_ib_alloc_device(size_t size)
581610
return NULL;
582611
}
583612

584-
rdma_init_coredev(&device->coredev, device, &init_net);
613+
/* ib_devices_shared_netns can't change while we have active namespaces
614+
* in the system which means either init_net is passed or the user has
615+
* no idea what they are doing.
616+
*
617+
* To avoid breaking backward compatibility, when in shared mode,
618+
* force to init the device in the init_net.
619+
*/
620+
net = ib_devices_shared_netns ? &init_net : net;
621+
rdma_init_coredev(&device->coredev, device, net);
585622

586623
INIT_LIST_HEAD(&device->event_handler_list);
587624
spin_lock_init(&device->qp_open_list_lock);
@@ -2671,6 +2708,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
26712708
SET_DEVICE_OP(dev_ops, add_sub_dev);
26722709
SET_DEVICE_OP(dev_ops, advise_mr);
26732710
SET_DEVICE_OP(dev_ops, alloc_dm);
2711+
SET_DEVICE_OP(dev_ops, alloc_dmah);
26742712
SET_DEVICE_OP(dev_ops, alloc_hw_device_stats);
26752713
SET_DEVICE_OP(dev_ops, alloc_hw_port_stats);
26762714
SET_DEVICE_OP(dev_ops, alloc_mr);
@@ -2691,13 +2729,15 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
26912729
SET_DEVICE_OP(dev_ops, create_ah);
26922730
SET_DEVICE_OP(dev_ops, create_counters);
26932731
SET_DEVICE_OP(dev_ops, create_cq);
2732+
SET_DEVICE_OP(dev_ops, create_cq_umem);
26942733
SET_DEVICE_OP(dev_ops, create_flow);
26952734
SET_DEVICE_OP(dev_ops, create_qp);
26962735
SET_DEVICE_OP(dev_ops, create_rwq_ind_table);
26972736
SET_DEVICE_OP(dev_ops, create_srq);
26982737
SET_DEVICE_OP(dev_ops, create_user_ah);
26992738
SET_DEVICE_OP(dev_ops, create_wq);
27002739
SET_DEVICE_OP(dev_ops, dealloc_dm);
2740+
SET_DEVICE_OP(dev_ops, dealloc_dmah);
27012741
SET_DEVICE_OP(dev_ops, dealloc_driver);
27022742
SET_DEVICE_OP(dev_ops, dealloc_mw);
27032743
SET_DEVICE_OP(dev_ops, dealloc_pd);
@@ -2763,8 +2803,10 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
27632803
SET_DEVICE_OP(dev_ops, modify_srq);
27642804
SET_DEVICE_OP(dev_ops, modify_wq);
27652805
SET_DEVICE_OP(dev_ops, peek_cq);
2806+
SET_DEVICE_OP(dev_ops, pre_destroy_cq);
27662807
SET_DEVICE_OP(dev_ops, poll_cq);
27672808
SET_DEVICE_OP(dev_ops, port_groups);
2809+
SET_DEVICE_OP(dev_ops, post_destroy_cq);
27682810
SET_DEVICE_OP(dev_ops, post_recv);
27692811
SET_DEVICE_OP(dev_ops, post_send);
27702812
SET_DEVICE_OP(dev_ops, post_srq_recv);
@@ -2793,6 +2835,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
27932835
SET_OBJ_SIZE(dev_ops, ib_ah);
27942836
SET_OBJ_SIZE(dev_ops, ib_counters);
27952837
SET_OBJ_SIZE(dev_ops, ib_cq);
2838+
SET_OBJ_SIZE(dev_ops, ib_dmah);
27962839
SET_OBJ_SIZE(dev_ops, ib_mw);
27972840
SET_OBJ_SIZE(dev_ops, ib_pd);
27982841
SET_OBJ_SIZE(dev_ops, ib_qp);

0 commit comments

Comments
 (0)