Skip to content

Commit f57aec4

Browse files
committed
cxl/pmem: Fix nvdimm registration races
A loop of the form: while true; do modprobe cxl_pci; modprobe -r cxl_pci; done ...fails with the following crash signature: BUG: kernel NULL pointer dereference, address: 0000000000000040 [..] RIP: 0010:cxl_internal_send_cmd+0x5/0xb0 [cxl_core] [..] Call Trace: <TASK> cxl_pmem_ctl+0x121/0x240 [cxl_pmem] nvdimm_get_config_data+0xd6/0x1a0 [libnvdimm] nd_label_data_init+0x135/0x7e0 [libnvdimm] nvdimm_probe+0xd6/0x1c0 [libnvdimm] nvdimm_bus_probe+0x7a/0x1e0 [libnvdimm] really_probe+0xde/0x380 __driver_probe_device+0x78/0x170 driver_probe_device+0x1f/0x90 __device_attach_driver+0x85/0x110 bus_for_each_drv+0x7d/0xc0 __device_attach+0xb4/0x1e0 bus_probe_device+0x9f/0xc0 device_add+0x445/0x9c0 nd_async_device_register+0xe/0x40 [libnvdimm] async_run_entry_fn+0x30/0x130 ...namely that the bottom half of async nvdimm device registration runs after the CXL has already torn down the context that cxl_pmem_ctl() needs. Unlike the ACPI NFIT case that benefits from launching multiple nvdimm device registrations in parallel from those listed in the table, CXL is already marked PROBE_PREFER_ASYNCHRONOUS. So provide for a synchronous registration path to preclude this scenario. Fixes: 21083f5 ("cxl/pmem: Register 'pmem' / cxl_nvdimm devices") Cc: <[email protected]> Reported-by: Dave Jiang <[email protected]> Signed-off-by: Dan Williams <[email protected]>
1 parent b8b9ffc commit f57aec4

File tree

5 files changed

+25
-4
lines changed

5 files changed

+25
-4
lines changed

drivers/cxl/pmem.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ static int cxl_nvdimm_probe(struct device *dev)
7676
return rc;
7777

7878
set_bit(NDD_LABELING, &flags);
79+
set_bit(NDD_REGISTER_SYNC, &flags);
7980
set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
8081
set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
8182
set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);

drivers/nvdimm/bus.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,7 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie)
508508
put_device(dev);
509509
}
510510

511-
void nd_device_register(struct device *dev)
511+
static void __nd_device_register(struct device *dev, bool sync)
512512
{
513513
if (!dev)
514514
return;
@@ -531,11 +531,24 @@ void nd_device_register(struct device *dev)
531531
}
532532
get_device(dev);
533533

534-
async_schedule_dev_domain(nd_async_device_register, dev,
535-
&nd_async_domain);
534+
if (sync)
535+
nd_async_device_register(dev, 0);
536+
else
537+
async_schedule_dev_domain(nd_async_device_register, dev,
538+
&nd_async_domain);
539+
}
540+
541+
void nd_device_register(struct device *dev)
542+
{
543+
__nd_device_register(dev, false);
536544
}
537545
EXPORT_SYMBOL(nd_device_register);
538546

547+
void nd_device_register_sync(struct device *dev)
548+
{
549+
__nd_device_register(dev, true);
550+
}
551+
539552
void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
540553
{
541554
bool killed;

drivers/nvdimm/dimm_devs.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -624,7 +624,10 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus,
624624
nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER);
625625
device_initialize(dev);
626626
lockdep_set_class(&dev->mutex, &nvdimm_key);
627-
nd_device_register(dev);
627+
if (test_bit(NDD_REGISTER_SYNC, &flags))
628+
nd_device_register_sync(dev);
629+
else
630+
nd_device_register(dev);
628631

629632
return nvdimm;
630633
}

drivers/nvdimm/nd-core.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus);
107107
void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus);
108108
void nd_synchronize(void);
109109
void nd_device_register(struct device *dev);
110+
void nd_device_register_sync(struct device *dev);
110111
struct nd_label_id;
111112
char *nd_label_gen_id(struct nd_label_id *label_id, const uuid_t *uuid,
112113
u32 flags);

include/linux/libnvdimm.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ enum {
4141
*/
4242
NDD_INCOHERENT = 7,
4343

44+
/* dimm provider wants synchronous registration by __nvdimm_create() */
45+
NDD_REGISTER_SYNC = 8,
46+
4447
/* need to set a limit somewhere, but yes, this is likely overkill */
4548
ND_IOCTL_MAX_BUFLEN = SZ_4M,
4649
ND_CMD_MAX_ELEM = 5,

0 commit comments

Comments
 (0)