Skip to content

Commit add0d16

Browse files
committed
dax/region: Create resources on sparse DAX regions
DAX regions which map dynamic capacity partitions require that memory be allowed to come and go. Recall sparse regions were created for this purpose. Now that extents can be realized within DAX regions the DAX region driver can start tracking sub-resource information. The tight relationship between DAX region operations and extent operations require memory changes to be controlled synchronously with the user of the region. Synchronize through the dax_region_rwsem and by having the region driver drive both the region device as well as the extent sub-devices. Recall requests to remove extents can happen at any time and that a host is not obligated to release the memory until it is not being used. If an extent is not used allow a release response. When extents are eligible for release. No mappings exist but data may reside in caches not yet written to the device. Call cxl_region_invalidate_memregion() to write back data to the device prior to signaling the release complete. Speculative writes after a release may dirty the cache such that a read from a newly surfaced extent may not come from the device. Call cxl_region_invalidate_memregion() prior to bringing a new extent online to ensure the cache is marked invalid. While these invalidate calls are inefficient they are the best we can do to ensure cache consistency without back invalidate. Furthermore this should occur infrequently with sufficiently large extents that real work loads should not be impacted much. The DAX layer has no need for the details of the CXL memory extent devices. Expose extents to the DAX layer as device children of the DAX region device. A single callback from the driver aids the DAX layer to determine if the child device is an extent. The DAX layer also registers a devres function to automatically clean up when the device is removed from the region. There is a race between extents being surfaced and the dax_cxl driver being loaded. Synchronizes the driver during probe by scanning for existing extents while under the device lock. Respond to extent notifications. Manage the DAX region resource tree based on the extents lifetime. Return the status of remove notifications to lower layers such that it can manage the hardware appropriately. Based on an original patch by Navneet Singh. Reviewed-by: Jonathan Cameron <[email protected]> Signed-off-by: Ira Weiny <[email protected]> --- Changes: [iweiny: convert range prints to %pra]
1 parent a1e16b6 commit add0d16

File tree

11 files changed

+411
-39
lines changed

11 files changed

+411
-39
lines changed

drivers/cxl/core/core.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ cxled_to_mds(struct cxl_endpoint_decoder *cxled)
2222
return container_of(cxlds, struct cxl_memdev_state, cxlds);
2323
}
2424

25+
int cxl_region_invalidate_memregion(struct cxl_region *cxlr);
26+
2527
#ifdef CONFIG_CXL_REGION
2628
extern struct device_attribute dev_attr_create_pmem_region;
2729
extern struct device_attribute dev_attr_create_ram_region;

drivers/cxl/core/extent.c

Lines changed: 77 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,12 @@ static void region_extent_unregister(void *ext)
116116

117117
dev_dbg(&region_extent->dev, "DAX region rm extent HPA %pra\n",
118118
&region_extent->hpa_range);
119+
/*
120+
* Extent is not in use or an error has occur. No mappings
121+
* exist at this point. Write and invalidate caches to ensure
122+
* the device has all data prior to final release.
123+
*/
124+
cxl_region_invalidate_memregion(region_extent->cxlr_dax->cxlr);
119125
device_unregister(&region_extent->dev);
120126
}
121127

@@ -269,20 +275,65 @@ static void calc_hpa_range(struct cxl_endpoint_decoder *cxled,
269275
hpa_range->end = hpa_range->start + range_len(dpa_range) - 1;
270276
}
271277

278+
static int cxlr_notify_extent(struct cxl_region *cxlr, enum dc_event event,
279+
struct region_extent *region_extent)
280+
{
281+
struct device *dev = &cxlr->cxlr_dax->dev;
282+
struct cxl_notify_data notify_data;
283+
struct cxl_driver *driver;
284+
285+
dev_dbg(dev, "Trying notify: type %d HPA %pra\n", event,
286+
&region_extent->hpa_range);
287+
288+
guard(device)(dev);
289+
290+
/*
291+
* The lack of a driver indicates a notification has failed. No user
292+
* space coordination was possible.
293+
*/
294+
if (!dev->driver)
295+
return 0;
296+
driver = to_cxl_drv(dev->driver);
297+
if (!driver->notify)
298+
return 0;
299+
300+
notify_data = (struct cxl_notify_data) {
301+
.event = event,
302+
.region_extent = region_extent,
303+
};
304+
305+
dev_dbg(dev, "Notify: type %d HPA %pra\n", event,
306+
&region_extent->hpa_range);
307+
return driver->notify(dev, &notify_data);
308+
}
309+
310+
struct rm_data {
311+
struct cxl_region *cxlr;
312+
struct range *range;
313+
};
314+
272315
static int cxlr_rm_extent(struct device *dev, void *data)
273316
{
274317
struct region_extent *region_extent = to_region_extent(dev);
275-
struct range *region_hpa_range = data;
318+
struct rm_data *rm_data = data;
319+
int rc;
276320

277321
if (!region_extent)
278322
return 0;
279323

280324
/*
281-
* Any extent which 'touches' the released range is removed.
325+
* Any extent which 'touches' the released range is attempted to be
326+
* removed.
282327
*/
283-
if (range_overlaps(region_hpa_range, &region_extent->hpa_range)) {
328+
if (range_overlaps(rm_data->range, &region_extent->hpa_range)) {
329+
struct cxl_region *cxlr = rm_data->cxlr;
330+
284331
dev_dbg(dev, "Remove region extent HPA %pra\n",
285332
&region_extent->hpa_range);
333+
rc = cxlr_notify_extent(cxlr, DCD_RELEASE_CAPACITY, region_extent);
334+
if (rc == -EBUSY)
335+
return 0;
336+
286337
region_rm_extent(region_extent);
287338
}
288339
return 0;
@@ -327,8 +378,13 @@ int cxl_rm_extent(struct cxl_memdev_state *mds, struct cxl_extent *extent)
327378

328379
calc_hpa_range(cxled, cxlr->cxlr_dax, &dpa_range, &hpa_range);
329380

381+
struct rm_data rm_data = {
382+
.cxlr = cxlr,
383+
.range = &hpa_range,
384+
};
385+
330386
/* Remove region extents which overlap */
331-
return device_for_each_child(&cxlr->cxlr_dax->dev, &hpa_range,
387+
return device_for_each_child(&cxlr->cxlr_dax->dev, &rm_data,
332388
cxlr_rm_extent);
333389
}
334390

@@ -353,8 +409,23 @@ static int cxlr_add_extent(struct cxl_dax_region *cxlr_dax,
353409
return rc;
354410
}
355411

356-
/* device model handles freeing region_extent */
357-
return online_region_extent(region_extent);
412+
/* Ensure caches are clean prior onlining */
413+
cxl_region_invalidate_memregion(cxlr_dax->cxlr);
414+
415+
rc = online_region_extent(region_extent);
416+
/* device model handled freeing region_extent */
417+
if (rc)
418+
return rc;
419+
420+
rc = cxlr_notify_extent(cxlr_dax->cxlr, DCD_ADD_CAPACITY, region_extent);
421+
/*
422+
* The region device was briefly live but DAX layer ensures it was not
423+
* used
424+
*/
425+
if (rc)
426+
region_rm_extent(region_extent);
427+
428+
return rc;
358429
}
359430

360431
/* Callers are expected to ensure cxled has been attached to a region */

drivers/cxl/core/region.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ static struct cxl_region_ref *cxl_rr_load(struct cxl_port *port,
223223
return xa_load(&port->regions, (unsigned long)cxlr);
224224
}
225225

226-
static int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
226+
int cxl_region_invalidate_memregion(struct cxl_region *cxlr)
227227
{
228228
if (!cpu_cache_has_invalidate_memregion()) {
229229
if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) {

drivers/cxl/cxl.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -867,10 +867,16 @@ bool is_cxl_region(struct device *dev);
867867

868868
extern struct bus_type cxl_bus_type;
869869

870+
struct cxl_notify_data {
871+
enum dc_event event;
872+
struct region_extent *region_extent;
873+
};
874+
870875
struct cxl_driver {
871876
const char *name;
872877
int (*probe)(struct device *dev);
873878
void (*remove)(struct device *dev);
879+
int (*notify)(struct device *dev, struct cxl_notify_data *notify_data);
874880
struct device_driver drv;
875881
int id;
876882
};

0 commit comments

Comments
 (0)