Skip to content

Commit be86383

Browse files
mmarcinijgunthorpe
authored andcommitted
IB/hfi1: Close window for pq and request coliding
Cleaning up a pq can result in the following warning and panic: WARNING: CPU: 52 PID: 77418 at lib/list_debug.c:53 __list_del_entry+0x63/0xd0 list_del corruption, ffff88cb2c6ac068->next is LIST_POISON1 (dead000000000100) Modules linked in: mmfs26(OE) mmfslinux(OE) tracedev(OE) 8021q garp mrp ib_isert iscsi_target_mod target_core_mod crc_t10dif crct10dif_generic opa_vnic rpcrdma ib_iser libiscsi scsi_transport_iscsi ib_ipoib(OE) bridge stp llc iTCO_wdt iTCO_vendor_support intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crct10dif_pclmul crct10dif_common crc32_pclmul ghash_clmulni_intel ast aesni_intel ttm lrw gf128mul glue_helper ablk_helper drm_kms_helper cryptd syscopyarea sysfillrect sysimgblt fb_sys_fops drm pcspkr joydev lpc_ich mei_me drm_panel_orientation_quirks i2c_i801 mei wmi ipmi_si ipmi_devintf ipmi_msghandler nfit libnvdimm acpi_power_meter acpi_pad hfi1(OE) rdmavt(OE) rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_core binfmt_misc numatools(OE) xpmem(OE) ip_tables nfsv3 nfs_acl nfs lockd grace sunrpc fscache igb ahci i2c_algo_bit libahci dca ptp libata pps_core crc32c_intel [last unloaded: i2c_algo_bit] CPU: 52 PID: 77418 Comm: pvbatch Kdump: loaded Tainted: G OE ------------ 3.10.0-957.38.3.el7.x86_64 #1 Hardware name: HPE.COM HPE SGI 8600-XA730i Gen10/X11DPT-SB-SG007, BIOS SBED1229 01/22/2019 Call Trace: [<ffffffff90365ac0>] dump_stack+0x19/0x1b [<ffffffff8fc98b78>] __warn+0xd8/0x100 [<ffffffff8fc98bff>] warn_slowpath_fmt+0x5f/0x80 [<ffffffff8ff970c3>] __list_del_entry+0x63/0xd0 [<ffffffff8ff9713d>] list_del+0xd/0x30 [<ffffffff8fddda70>] kmem_cache_destroy+0x50/0x110 [<ffffffffc0328130>] hfi1_user_sdma_free_queues+0xf0/0x200 [hfi1] [<ffffffffc02e2350>] hfi1_file_close+0x70/0x1e0 [hfi1] [<ffffffff8fe4519c>] __fput+0xec/0x260 [<ffffffff8fe453fe>] ____fput+0xe/0x10 [<ffffffff8fcbfd1b>] task_work_run+0xbb/0xe0 [<ffffffff8fc2bc65>] do_notify_resume+0xa5/0xc0 [<ffffffff90379134>] int_signal+0x12/0x17 BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: [<ffffffff8fe1f93e>] kmem_cache_close+0x7e/0x300 PGD 2cdab19067 PUD 2f7bfdb067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: mmfs26(OE) mmfslinux(OE) tracedev(OE) 8021q garp mrp ib_isert iscsi_target_mod target_core_mod crc_t10dif crct10dif_generic opa_vnic rpcrdma ib_iser libiscsi scsi_transport_iscsi ib_ipoib(OE) bridge stp llc iTCO_wdt iTCO_vendor_support intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crct10dif_pclmul crct10dif_common crc32_pclmul ghash_clmulni_intel ast aesni_intel ttm lrw gf128mul glue_helper ablk_helper drm_kms_helper cryptd syscopyarea sysfillrect sysimgblt fb_sys_fops drm pcspkr joydev lpc_ich mei_me drm_panel_orientation_quirks i2c_i801 mei wmi ipmi_si ipmi_devintf ipmi_msghandler nfit libnvdimm acpi_power_meter acpi_pad hfi1(OE) rdmavt(OE) rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_core binfmt_misc numatools(OE) xpmem(OE) ip_tables nfsv3 nfs_acl nfs lockd grace sunrpc fscache igb ahci i2c_algo_bit libahci dca ptp libata pps_core crc32c_intel [last unloaded: i2c_algo_bit] CPU: 52 PID: 77418 Comm: pvbatch Kdump: loaded Tainted: G W OE ------------ 3.10.0-957.38.3.el7.x86_64 #1 Hardware name: HPE.COM HPE SGI 8600-XA730i Gen10/X11DPT-SB-SG007, BIOS SBED1229 01/22/2019 task: ffff88cc26db9040 ti: ffff88b5393a8000 task.ti: ffff88b5393a8000 RIP: 0010:[<ffffffff8fe1f93e>] [<ffffffff8fe1f93e>] kmem_cache_close+0x7e/0x300 RSP: 0018:ffff88b5393abd60 EFLAGS: 00010287 RAX: 0000000000000000 RBX: ffff88cb2c6ac000 RCX: 0000000000000003 RDX: 0000000000000400 RSI: 0000000000000400 RDI: ffffffff9095b800 RBP: ffff88b5393abdb0 R08: ffffffff9095b808 R09: ffffffff8ff77c19 R10: ffff88b73ce1f160 R11: ffffddecddde9800 R12: ffff88cb2c6ac000 R13: 000000000000000c R14: ffff88cf3fdca780 R15: 0000000000000000 FS: 00002aaaaab52500(0000) GS:ffff88b73ce00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 0000002d27664000 CR4: 00000000007607e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: [<ffffffff8fe20d44>] __kmem_cache_shutdown+0x14/0x80 [<ffffffff8fddda78>] kmem_cache_destroy+0x58/0x110 [<ffffffffc0328130>] hfi1_user_sdma_free_queues+0xf0/0x200 [hfi1] [<ffffffffc02e2350>] hfi1_file_close+0x70/0x1e0 [hfi1] [<ffffffff8fe4519c>] __fput+0xec/0x260 [<ffffffff8fe453fe>] ____fput+0xe/0x10 [<ffffffff8fcbfd1b>] task_work_run+0xbb/0xe0 [<ffffffff8fc2bc65>] do_notify_resume+0xa5/0xc0 [<ffffffff90379134>] int_signal+0x12/0x17 Code: 00 00 ba 00 04 00 00 0f 4f c2 3d 00 04 00 00 89 45 bc 0f 84 e7 01 00 00 48 63 45 bc 49 8d 04 c4 48 89 45 b0 48 8b 80 c8 00 00 00 <48> 8b 78 10 48 89 45 c0 48 83 c0 10 48 89 45 d0 48 8b 17 48 39 RIP [<ffffffff8fe1f93e>] kmem_cache_close+0x7e/0x300 RSP <ffff88b5393abd60> CR2: 0000000000000010 The panic is the result of slab entries being freed during the destruction of the pq slab. The code attempts to quiesce the pq, but looking for n_req == 0 doesn't account for new requests. Fix the issue by using SRCU to get a pq pointer and adjust the pq free logic to NULL the fd pq pointer prior to the quiesce. Fixes: e87473b ("IB/hfi1: Only set fd pointer when base context is completely initialized") Link: https://lore.kernel.org/r/[email protected] Reviewed-by: Kaike Wan <[email protected]> Signed-off-by: Mike Marciniszyn <[email protected]> Signed-off-by: Dennis Dalessandro <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent a70ed0f commit be86383

File tree

4 files changed

+48
-29
lines changed

4 files changed

+48
-29
lines changed

drivers/infiniband/hw/hfi1/file_ops.c

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -200,23 +200,24 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
200200

201201
fd = kzalloc(sizeof(*fd), GFP_KERNEL);
202202

203-
if (fd) {
204-
fd->rec_cpu_num = -1; /* no cpu affinity by default */
205-
fd->mm = current->mm;
206-
mmgrab(fd->mm);
207-
fd->dd = dd;
208-
kobject_get(&fd->dd->kobj);
209-
fp->private_data = fd;
210-
} else {
211-
fp->private_data = NULL;
212-
213-
if (atomic_dec_and_test(&dd->user_refcount))
214-
complete(&dd->user_comp);
215-
216-
return -ENOMEM;
217-
}
218-
203+
if (!fd || init_srcu_struct(&fd->pq_srcu))
204+
goto nomem;
205+
spin_lock_init(&fd->pq_rcu_lock);
206+
spin_lock_init(&fd->tid_lock);
207+
spin_lock_init(&fd->invalid_lock);
208+
fd->rec_cpu_num = -1; /* no cpu affinity by default */
209+
fd->mm = current->mm;
210+
mmgrab(fd->mm);
211+
fd->dd = dd;
212+
kobject_get(&fd->dd->kobj);
213+
fp->private_data = fd;
219214
return 0;
215+
nomem:
216+
kfree(fd);
217+
fp->private_data = NULL;
218+
if (atomic_dec_and_test(&dd->user_refcount))
219+
complete(&dd->user_comp);
220+
return -ENOMEM;
220221
}
221222

222223
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
@@ -301,21 +302,30 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
301302
static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
302303
{
303304
struct hfi1_filedata *fd = kiocb->ki_filp->private_data;
304-
struct hfi1_user_sdma_pkt_q *pq = fd->pq;
305+
struct hfi1_user_sdma_pkt_q *pq;
305306
struct hfi1_user_sdma_comp_q *cq = fd->cq;
306307
int done = 0, reqs = 0;
307308
unsigned long dim = from->nr_segs;
309+
int idx;
308310

309-
if (!cq || !pq)
311+
idx = srcu_read_lock(&fd->pq_srcu);
312+
pq = srcu_dereference(fd->pq, &fd->pq_srcu);
313+
if (!cq || !pq) {
314+
srcu_read_unlock(&fd->pq_srcu, idx);
310315
return -EIO;
316+
}
311317

312-
if (!iter_is_iovec(from) || !dim)
318+
if (!iter_is_iovec(from) || !dim) {
319+
srcu_read_unlock(&fd->pq_srcu, idx);
313320
return -EINVAL;
321+
}
314322

315323
trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim);
316324

317-
if (atomic_read(&pq->n_reqs) == pq->n_max_reqs)
325+
if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) {
326+
srcu_read_unlock(&fd->pq_srcu, idx);
318327
return -ENOSPC;
328+
}
319329

320330
while (dim) {
321331
int ret;
@@ -333,6 +343,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
333343
reqs++;
334344
}
335345

346+
srcu_read_unlock(&fd->pq_srcu, idx);
336347
return reqs;
337348
}
338349

@@ -707,6 +718,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
707718
if (atomic_dec_and_test(&dd->user_refcount))
708719
complete(&dd->user_comp);
709720

721+
cleanup_srcu_struct(&fdata->pq_srcu);
710722
kfree(fdata);
711723
return 0;
712724
}

drivers/infiniband/hw/hfi1/hfi.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1444,10 +1444,13 @@ struct mmu_rb_handler;
14441444

14451445
/* Private data for file operations */
14461446
struct hfi1_filedata {
1447+
struct srcu_struct pq_srcu;
14471448
struct hfi1_devdata *dd;
14481449
struct hfi1_ctxtdata *uctxt;
14491450
struct hfi1_user_sdma_comp_q *cq;
1450-
struct hfi1_user_sdma_pkt_q *pq;
1451+
/* update side lock for SRCU */
1452+
spinlock_t pq_rcu_lock;
1453+
struct hfi1_user_sdma_pkt_q __rcu *pq;
14511454
u16 subctxt;
14521455
/* for cpu affinity; -1 if none */
14531456
int rec_cpu_num;

drivers/infiniband/hw/hfi1/user_exp_rcv.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
8787
{
8888
int ret = 0;
8989

90-
spin_lock_init(&fd->tid_lock);
91-
spin_lock_init(&fd->invalid_lock);
92-
9390
fd->entry_to_rb = kcalloc(uctxt->expected_count,
9491
sizeof(struct rb_node *),
9592
GFP_KERNEL);

drivers/infiniband/hw/hfi1/user_sdma.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
179179
pq = kzalloc(sizeof(*pq), GFP_KERNEL);
180180
if (!pq)
181181
return -ENOMEM;
182-
183182
pq->dd = dd;
184183
pq->ctxt = uctxt->ctxt;
185184
pq->subctxt = fd->subctxt;
@@ -236,7 +235,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
236235
goto pq_mmu_fail;
237236
}
238237

239-
fd->pq = pq;
238+
rcu_assign_pointer(fd->pq, pq);
240239
fd->cq = cq;
241240

242241
return 0;
@@ -264,8 +263,14 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
264263

265264
trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt);
266265

267-
pq = fd->pq;
266+
spin_lock(&fd->pq_rcu_lock);
267+
pq = srcu_dereference_check(fd->pq, &fd->pq_srcu,
268+
lockdep_is_held(&fd->pq_rcu_lock));
268269
if (pq) {
270+
rcu_assign_pointer(fd->pq, NULL);
271+
spin_unlock(&fd->pq_rcu_lock);
272+
synchronize_srcu(&fd->pq_srcu);
273+
/* at this point there can be no more new requests */
269274
if (pq->handler)
270275
hfi1_mmu_rb_unregister(pq->handler);
271276
iowait_sdma_drain(&pq->busy);
@@ -277,7 +282,8 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
277282
kfree(pq->req_in_use);
278283
kmem_cache_destroy(pq->txreq_cache);
279284
kfree(pq);
280-
fd->pq = NULL;
285+
} else {
286+
spin_unlock(&fd->pq_rcu_lock);
281287
}
282288
if (fd->cq) {
283289
vfree(fd->cq->comps);
@@ -321,7 +327,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
321327
{
322328
int ret = 0, i;
323329
struct hfi1_ctxtdata *uctxt = fd->uctxt;
324-
struct hfi1_user_sdma_pkt_q *pq = fd->pq;
330+
struct hfi1_user_sdma_pkt_q *pq =
331+
srcu_dereference(fd->pq, &fd->pq_srcu);
325332
struct hfi1_user_sdma_comp_q *cq = fd->cq;
326333
struct hfi1_devdata *dd = pq->dd;
327334
unsigned long idx = 0;

0 commit comments

Comments
 (0)