Skip to content

Commit c741e49

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma fixes from Jason Gunthorpe: "Quite a few small bug fixes old and new, also Doug Ledford is retiring now, we thank him for his work. Details: - Use after free in rxe - mlx5 DM regression - hns bugs triggred by device reset - Two fixes for CONFIG_DEBUG_PREEMPT - Several longstanding corner case bugs in hfi1 - Two irdma data path bugs in rare cases and some memory issues" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: RDMA/irdma: Don't arm the CQ more than two times if no CE for this CQ RDMA/irdma: Report correct WC errors RDMA/irdma: Fix a potential memory allocation issue in 'irdma_prm_add_pble_mem()' RDMA/irdma: Fix a user-after-free in add_pble_prm IB/hfi1: Fix leak of rcvhdrtail_dummy_kvaddr IB/hfi1: Fix early init panic IB/hfi1: Insure use of smp_processor_id() is preempt disabled IB/hfi1: Correct guard on eager buffer deallocation RDMA/rtrs: Call {get,put}_cpu_ptr to silence a debug kernel warning RDMA/hns: Do not destroy QP resources in the hw resetting phase RDMA/hns: Do not halt commands during reset until later Remove Doug Ledford from MAINTAINERS RDMA/mlx5: Fix releasing unallocated memory in dereg MR flow RDMA: Fix use-after-free in rxe_queue_cleanup
2 parents ded746b + 10467ce commit c741e49

File tree

17 files changed

+102
-67
lines changed

17 files changed

+102
-67
lines changed

MAINTAINERS

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9329,7 +9329,6 @@ S: Maintained
93299329
F: drivers/iio/pressure/dps310.c
93309330

93319331
INFINIBAND SUBSYSTEM
9332-
M: Doug Ledford <[email protected]>
93339332
M: Jason Gunthorpe <[email protected]>
93349333
93359334
S: Supported

drivers/infiniband/hw/hfi1/chip.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8415,6 +8415,8 @@ static void receive_interrupt_common(struct hfi1_ctxtdata *rcd)
84158415
*/
84168416
static void __hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
84178417
{
8418+
if (!rcd->rcvhdrq)
8419+
return;
84188420
clear_recv_intr(rcd);
84198421
if (check_packet_present(rcd))
84208422
force_recv_intr(rcd);

drivers/infiniband/hw/hfi1/driver.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,6 +1012,8 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
10121012
struct hfi1_packet packet;
10131013
int skip_pkt = 0;
10141014

1015+
if (!rcd->rcvhdrq)
1016+
return RCV_PKT_OK;
10151017
/* Control context will always use the slow path interrupt handler */
10161018
needset = (rcd->ctxt == HFI1_CTRL_CTXT) ? 0 : 1;
10171019

drivers/infiniband/hw/hfi1/init.c

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,6 @@ static int hfi1_create_kctxt(struct hfi1_devdata *dd,
113113
rcd->fast_handler = get_dma_rtail_setting(rcd) ?
114114
handle_receive_interrupt_dma_rtail :
115115
handle_receive_interrupt_nodma_rtail;
116-
rcd->slow_handler = handle_receive_interrupt;
117116

118117
hfi1_set_seq_cnt(rcd, 1);
119118

@@ -334,6 +333,8 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
334333
rcd->numa_id = numa;
335334
rcd->rcv_array_groups = dd->rcv_entries.ngroups;
336335
rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
336+
rcd->slow_handler = handle_receive_interrupt;
337+
rcd->do_interrupt = rcd->slow_handler;
337338
rcd->msix_intr = CCE_NUM_MSIX_VECTORS;
338339

339340
mutex_init(&rcd->exp_mutex);
@@ -874,18 +875,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
874875
if (ret)
875876
goto done;
876877

877-
/* allocate dummy tail memory for all receive contexts */
878-
dd->rcvhdrtail_dummy_kvaddr = dma_alloc_coherent(&dd->pcidev->dev,
879-
sizeof(u64),
880-
&dd->rcvhdrtail_dummy_dma,
881-
GFP_KERNEL);
882-
883-
if (!dd->rcvhdrtail_dummy_kvaddr) {
884-
dd_dev_err(dd, "cannot allocate dummy tail memory\n");
885-
ret = -ENOMEM;
886-
goto done;
887-
}
888-
889878
/* dd->rcd can be NULL if early initialization failed */
890879
for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) {
891880
/*
@@ -898,8 +887,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
898887
if (!rcd)
899888
continue;
900889

901-
rcd->do_interrupt = &handle_receive_interrupt;
902-
903890
lastfail = hfi1_create_rcvhdrq(dd, rcd);
904891
if (!lastfail)
905892
lastfail = hfi1_setup_eagerbufs(rcd);
@@ -1120,7 +1107,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
11201107
rcd->egrbufs.rcvtids = NULL;
11211108

11221109
for (e = 0; e < rcd->egrbufs.alloced; e++) {
1123-
if (rcd->egrbufs.buffers[e].dma)
1110+
if (rcd->egrbufs.buffers[e].addr)
11241111
dma_free_coherent(&dd->pcidev->dev,
11251112
rcd->egrbufs.buffers[e].len,
11261113
rcd->egrbufs.buffers[e].addr,
@@ -1201,6 +1188,11 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
12011188
dd->tx_opstats = NULL;
12021189
kfree(dd->comp_vect);
12031190
dd->comp_vect = NULL;
1191+
if (dd->rcvhdrtail_dummy_kvaddr)
1192+
dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
1193+
(void *)dd->rcvhdrtail_dummy_kvaddr,
1194+
dd->rcvhdrtail_dummy_dma);
1195+
dd->rcvhdrtail_dummy_kvaddr = NULL;
12041196
sdma_clean(dd, dd->num_sdma);
12051197
rvt_dealloc_device(&dd->verbs_dev.rdi);
12061198
}
@@ -1298,6 +1290,15 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
12981290
goto bail;
12991291
}
13001292

1293+
/* allocate dummy tail memory for all receive contexts */
1294+
dd->rcvhdrtail_dummy_kvaddr =
1295+
dma_alloc_coherent(&dd->pcidev->dev, sizeof(u64),
1296+
&dd->rcvhdrtail_dummy_dma, GFP_KERNEL);
1297+
if (!dd->rcvhdrtail_dummy_kvaddr) {
1298+
ret = -ENOMEM;
1299+
goto bail;
1300+
}
1301+
13011302
atomic_set(&dd->ipoib_rsm_usr_num, 0);
13021303
return dd;
13031304

@@ -1505,13 +1506,6 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
15051506

15061507
free_credit_return(dd);
15071508

1508-
if (dd->rcvhdrtail_dummy_kvaddr) {
1509-
dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
1510-
(void *)dd->rcvhdrtail_dummy_kvaddr,
1511-
dd->rcvhdrtail_dummy_dma);
1512-
dd->rcvhdrtail_dummy_kvaddr = NULL;
1513-
}
1514-
15151509
/*
15161510
* Free any resources still in use (usually just kernel contexts)
15171511
* at unload; we do for ctxtcnt, because that's what we allocate.

drivers/infiniband/hw/hfi1/sdma.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -838,8 +838,8 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
838838
if (current->nr_cpus_allowed != 1)
839839
goto out;
840840

841-
cpu_id = smp_processor_id();
842841
rcu_read_lock();
842+
cpu_id = smp_processor_id();
843843
rht_node = rhashtable_lookup(dd->sdma_rht, &cpu_id,
844844
sdma_rht_params);
845845

drivers/infiniband/hw/hns/hns_roce_hw_v2.c

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <linux/acpi.h>
3434
#include <linux/etherdevice.h>
3535
#include <linux/interrupt.h>
36+
#include <linux/iopoll.h>
3637
#include <linux/kernel.h>
3738
#include <linux/types.h>
3839
#include <net/addrconf.h>
@@ -1050,9 +1051,14 @@ static u32 hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev,
10501051
unsigned long instance_stage,
10511052
unsigned long reset_stage)
10521053
{
1054+
#define HW_RESET_TIMEOUT_US 1000000
1055+
#define HW_RESET_SLEEP_US 1000
1056+
10531057
struct hns_roce_v2_priv *priv = hr_dev->priv;
10541058
struct hnae3_handle *handle = priv->handle;
10551059
const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
1060+
unsigned long val;
1061+
int ret;
10561062

10571063
/* When hardware reset is detected, we should stop sending mailbox&cmq&
10581064
* doorbell to hardware. If now in .init_instance() function, we should
@@ -1064,7 +1070,11 @@ static u32 hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev,
10641070
* again.
10651071
*/
10661072
hr_dev->dis_db = true;
1067-
if (!ops->get_hw_reset_stat(handle))
1073+
1074+
ret = read_poll_timeout(ops->ae_dev_reset_cnt, val,
1075+
val > hr_dev->reset_cnt, HW_RESET_SLEEP_US,
1076+
HW_RESET_TIMEOUT_US, false, handle);
1077+
if (!ret)
10681078
hr_dev->is_reset = true;
10691079

10701080
if (!hr_dev->is_reset || reset_stage == HNS_ROCE_STATE_RST_INIT ||
@@ -6387,10 +6397,8 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
63876397
if (!hr_dev)
63886398
return 0;
63896399

6390-
hr_dev->is_reset = true;
63916400
hr_dev->active = false;
63926401
hr_dev->dis_db = true;
6393-
63946402
hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN;
63956403

63966404
return 0;

drivers/infiniband/hw/irdma/hw.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ static void irdma_iwarp_ce_handler(struct irdma_sc_cq *iwcq)
6060
{
6161
struct irdma_cq *cq = iwcq->back_cq;
6262

63+
if (!cq->user_mode)
64+
cq->armed = false;
6365
if (cq->ibcq.comp_handler)
6466
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
6567
}
@@ -146,6 +148,7 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp,
146148
qp->flush_code = FLUSH_PROT_ERR;
147149
break;
148150
case IRDMA_AE_AMP_BAD_QP:
151+
case IRDMA_AE_WQE_UNEXPECTED_OPCODE:
149152
qp->flush_code = FLUSH_LOC_QP_OP_ERR;
150153
break;
151154
case IRDMA_AE_AMP_BAD_STAG_KEY:
@@ -156,7 +159,6 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp,
156159
case IRDMA_AE_PRIV_OPERATION_DENIED:
157160
case IRDMA_AE_IB_INVALID_REQUEST:
158161
case IRDMA_AE_IB_REMOTE_ACCESS_ERROR:
159-
case IRDMA_AE_IB_REMOTE_OP_ERROR:
160162
qp->flush_code = FLUSH_REM_ACCESS_ERR;
161163
qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
162164
break;
@@ -184,6 +186,9 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp,
184186
case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS:
185187
qp->flush_code = FLUSH_MW_BIND_ERR;
186188
break;
189+
case IRDMA_AE_IB_REMOTE_OP_ERROR:
190+
qp->flush_code = FLUSH_REM_OP_ERR;
191+
break;
187192
default:
188193
qp->flush_code = FLUSH_FATAL_ERR;
189194
break;

drivers/infiniband/hw/irdma/main.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,7 @@ int irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd,
542542
void (*callback_fcn)(struct irdma_cqp_request *cqp_request),
543543
void *cb_param);
544544
void irdma_gsi_ud_qp_ah_cb(struct irdma_cqp_request *cqp_request);
545+
bool irdma_cq_empty(struct irdma_cq *iwcq);
545546
int irdma_inetaddr_event(struct notifier_block *notifier, unsigned long event,
546547
void *ptr);
547548
int irdma_inet6addr_event(struct notifier_block *notifier, unsigned long event,

drivers/infiniband/hw/irdma/pble.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@ void irdma_destroy_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
2525
list_del(&chunk->list);
2626
if (chunk->type == PBLE_SD_PAGED)
2727
irdma_pble_free_paged_mem(chunk);
28-
if (chunk->bitmapbuf)
29-
kfree(chunk->bitmapmem.va);
28+
bitmap_free(chunk->bitmapbuf);
3029
kfree(chunk->chunkmem.va);
3130
}
3231
}
@@ -283,7 +282,6 @@ add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
283282
"PBLE: next_fpm_addr = %llx chunk_size[%llu] = 0x%llx\n",
284283
pble_rsrc->next_fpm_addr, chunk->size, chunk->size);
285284
pble_rsrc->unallocated_pble -= (u32)(chunk->size >> 3);
286-
list_add(&chunk->list, &pble_rsrc->pinfo.clist);
287285
sd_reg_val = (sd_entry_type == IRDMA_SD_TYPE_PAGED) ?
288286
sd_entry->u.pd_table.pd_page_addr.pa :
289287
sd_entry->u.bp.addr.pa;
@@ -295,12 +293,12 @@ add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
295293
goto error;
296294
}
297295

296+
list_add(&chunk->list, &pble_rsrc->pinfo.clist);
298297
sd_entry->valid = true;
299298
return 0;
300299

301300
error:
302-
if (chunk->bitmapbuf)
303-
kfree(chunk->bitmapmem.va);
301+
bitmap_free(chunk->bitmapbuf);
304302
kfree(chunk->chunkmem.va);
305303

306304
return ret_code;

drivers/infiniband/hw/irdma/pble.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ struct irdma_chunk {
7878
u32 pg_cnt;
7979
enum irdma_alloc_type type;
8080
struct irdma_sc_dev *dev;
81-
struct irdma_virt_mem bitmapmem;
8281
struct irdma_virt_mem chunkmem;
8382
};
8483

0 commit comments

Comments
 (0)