Skip to content

Commit 1939882

Browse files
committed
cxl/pmem: Fix nvdimm unregistration when cxl_pmem driver is absent
The cxl_pmem.ko module houses the driver for both cxl_nvdimm_bridge objects and cxl_nvdimm objects. When the core creates a cxl_nvdimm it arranges for it to be autoremoved when the bridge goes down. However, if the bridge never initialized because the cxl_pmem.ko module never loaded, it sets up a the following crash scenario: BUG: kernel NULL pointer dereference, address: 0000000000000478 [..] RIP: 0010:cxl_nvdimm_probe+0x99/0x140 [cxl_pmem] [..] Call Trace: <TASK> cxl_bus_probe+0x17/0x50 [cxl_core] really_probe+0xde/0x380 __driver_probe_device+0x78/0x170 driver_probe_device+0x1f/0x90 __driver_attach+0xd2/0x1c0 bus_for_each_dev+0x79/0xc0 bus_add_driver+0x1b1/0x200 driver_register+0x89/0xe0 cxl_pmem_init+0x50/0xff0 [cxl_pmem] It turns out the recent rework to simplify nvdimm probing obviated the need to unregister cxl_nvdimm objects at cxl_nvdimm_bridge ->remove() time. Leave the cxl_nvdimm device registered until the hosting cxl_memdev departs. The alternative is that the cxl_memdev needs to be reattached whenever the cxl_nvdimm_bridge attach state cycles, which is awkward and unnecessary. The only requirement is to make sure that when the cxl_nvdimm_bridge goes away any dependent cxl_nvdimm objects are shutdown. Handle that in unregister_nvdimm_bus(). With these registration entanglements removed there is no longer a need to pre-load the cxl_pmem module in cxl_acpi. Fixes: cb9cfff ("cxl/acpi: Simplify cxl_nvdimm_bridge probing") Reported-by: Gregory Price <[email protected]> Debugged-by: Jonathan Cameron <[email protected]> Tested-by: Jonathan Cameron <[email protected]> Reviewed-by: Jonathan Cameron <[email protected]> Reviewed-by: Dave Jiang <[email protected]> Link: https://lore.kernel.org/r/167426077263.3955046.9695309346988027311.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams <[email protected]>
1 parent 2ec1b17 commit 1939882

File tree

3 files changed

+28
-39
lines changed

3 files changed

+28
-39
lines changed

drivers/cxl/acpi.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -736,4 +736,3 @@ module_exit(cxl_acpi_exit);
736736
MODULE_LICENSE("GPL v2");
737737
MODULE_IMPORT_NS(CXL);
738738
MODULE_IMPORT_NS(ACPI);
739-
MODULE_SOFTDEP("pre: cxl_pmem");

drivers/cxl/core/pmem.c

Lines changed: 4 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -227,34 +227,16 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_nvdimm_bridge *cxl_nvb,
227227
return cxl_nvd;
228228
}
229229

230-
static void cxl_nvd_unregister(void *_cxl_nvd)
230+
static void cxlmd_release_nvdimm(void *_cxlmd)
231231
{
232-
struct cxl_nvdimm *cxl_nvd = _cxl_nvd;
233-
struct cxl_memdev *cxlmd = cxl_nvd->cxlmd;
232+
struct cxl_memdev *cxlmd = _cxlmd;
233+
struct cxl_nvdimm *cxl_nvd = cxlmd->cxl_nvd;
234234
struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb;
235235

236-
/*
237-
* Either the bridge is in ->remove() context under the device_lock(),
238-
* or cxlmd_release_nvdimm() is cancelling the bridge's release action
239-
* for @cxl_nvd and doing it itself (while manually holding the bridge
240-
* lock).
241-
*/
242-
device_lock_assert(&cxl_nvb->dev);
243236
cxl_nvd->cxlmd = NULL;
244237
cxlmd->cxl_nvd = NULL;
238+
cxlmd->cxl_nvb = NULL;
245239
device_unregister(&cxl_nvd->dev);
246-
}
247-
248-
static void cxlmd_release_nvdimm(void *_cxlmd)
249-
{
250-
struct cxl_memdev *cxlmd = _cxlmd;
251-
struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb;
252-
253-
device_lock(&cxl_nvb->dev);
254-
if (cxlmd->cxl_nvd)
255-
devm_release_action(&cxl_nvb->dev, cxl_nvd_unregister,
256-
cxlmd->cxl_nvd);
257-
device_unlock(&cxl_nvb->dev);
258240
put_device(&cxl_nvb->dev);
259241
}
260242

@@ -293,22 +275,6 @@ int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd)
293275

294276
dev_dbg(&cxlmd->dev, "register %s\n", dev_name(dev));
295277

296-
/*
297-
* The two actions below arrange for @cxl_nvd to be deleted when either
298-
* the top-level PMEM bridge goes down, or the endpoint device goes
299-
* through ->remove().
300-
*/
301-
device_lock(&cxl_nvb->dev);
302-
if (cxl_nvb->dev.driver)
303-
rc = devm_add_action_or_reset(&cxl_nvb->dev, cxl_nvd_unregister,
304-
cxl_nvd);
305-
else
306-
rc = -ENXIO;
307-
device_unlock(&cxl_nvb->dev);
308-
309-
if (rc)
310-
goto err_alloc;
311-
312278
/* @cxlmd carries a reference on @cxl_nvb until cxlmd_release_nvdimm */
313279
return devm_add_action_or_reset(&cxlmd->dev, cxlmd_release_nvdimm, cxlmd);
314280

drivers/cxl/pmem.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,11 +225,35 @@ static int cxl_pmem_ctl(struct nvdimm_bus_descriptor *nd_desc,
225225
return cxl_pmem_nvdimm_ctl(nvdimm, cmd, buf, buf_len);
226226
}
227227

228+
static int detach_nvdimm(struct device *dev, void *data)
229+
{
230+
struct cxl_nvdimm *cxl_nvd;
231+
bool release = false;
232+
233+
if (!is_cxl_nvdimm(dev))
234+
return 0;
235+
236+
device_lock(dev);
237+
if (!dev->driver)
238+
goto out;
239+
240+
cxl_nvd = to_cxl_nvdimm(dev);
241+
if (cxl_nvd->cxlmd && cxl_nvd->cxlmd->cxl_nvb == data)
242+
release = true;
243+
out:
244+
device_unlock(dev);
245+
if (release)
246+
device_release_driver(dev);
247+
return 0;
248+
}
249+
228250
static void unregister_nvdimm_bus(void *_cxl_nvb)
229251
{
230252
struct cxl_nvdimm_bridge *cxl_nvb = _cxl_nvb;
231253
struct nvdimm_bus *nvdimm_bus = cxl_nvb->nvdimm_bus;
232254

255+
bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb, detach_nvdimm);
256+
233257
cxl_nvb->nvdimm_bus = NULL;
234258
nvdimm_bus_unregister(nvdimm_bus);
235259
}

0 commit comments

Comments
 (0)