Skip to content

Commit 51293c5

Browse files
committed
cxl: Fix incorrect region perf data calculation
Current math in cxl_region_perf_data_calculate divides the latency by 1000 every time the function gets called. This causes the region latency to be divided by 1000 per memory device and the math is incorrect. This is user visible as the latency access_coordinate exposed via sysfs will show incorrect latency data. Normalize values from CDAT to nanoseconds. Adjust sub-nanoseconds latency to at least 1. Remove adjustment of perf numbers from the generic target since hmat handling code has already normalized those numbers. Now all computation and stored numbers should be in nanoseconds. cxl_hb_get_perf_coordinates() is removed and HB coords are calculated in the port access_coordinate calculation path since it no longer need to be treated special. Fixes: 3d9f4a1 ("cxl/region: Calculate performance data for a region") Reviewed-by: Jonathan Cameron <[email protected]> Reviewed-by: Dan Williams <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Dave Jiang <[email protected]>
1 parent 592780b commit 51293c5

File tree

4 files changed

+45
-100
lines changed

4 files changed

+45
-100
lines changed

drivers/cxl/acpi.c

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -525,22 +525,11 @@ static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport)
525525
{
526526
struct acpi_device *hb = to_cxl_host_bridge(NULL, dev);
527527
u32 uid;
528-
int rc;
529528

530529
if (kstrtou32(acpi_device_uid(hb), 0, &uid))
531530
return -EINVAL;
532531

533-
rc = acpi_get_genport_coordinates(uid, dport->hb_coord);
534-
if (rc < 0)
535-
return rc;
536-
537-
/* Adjust back to picoseconds from nanoseconds */
538-
for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
539-
dport->hb_coord[i].read_latency *= 1000;
540-
dport->hb_coord[i].write_latency *= 1000;
541-
}
542-
543-
return 0;
532+
return acpi_get_genport_coordinates(uid, dport->hb_coord);
544533
}
545534

546535
static int add_host_bridge_dport(struct device *match, void *arg)

drivers/cxl/core/cdat.c

Lines changed: 40 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,36 @@ struct dsmas_entry {
2020
int qos_class;
2121
};
2222

23+
static u32 cdat_normalize(u16 entry, u64 base, u8 type)
24+
{
25+
u32 value;
26+
27+
/*
28+
* Check for invalid and overflow values
29+
*/
30+
if (entry == 0xffff || !entry)
31+
return 0;
32+
else if (base > (UINT_MAX / (entry)))
33+
return 0;
34+
35+
/*
36+
* CDAT fields follow the format of HMAT fields. See table 5 Device
37+
* Scoped Latency and Bandwidth Information Structure in Coherent Device
38+
* Attribute Table (CDAT) Specification v1.01.
39+
*/
40+
value = entry * base;
41+
switch (type) {
42+
case ACPI_HMAT_ACCESS_LATENCY:
43+
case ACPI_HMAT_READ_LATENCY:
44+
case ACPI_HMAT_WRITE_LATENCY:
45+
value = DIV_ROUND_UP(value, 1000);
46+
break;
47+
default:
48+
break;
49+
}
50+
return value;
51+
}
52+
2353
static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg,
2454
const unsigned long end)
2555
{
@@ -97,7 +127,6 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
97127
__le16 le_val;
98128
u64 val;
99129
u16 len;
100-
int rc;
101130

102131
len = le16_to_cpu((__force __le16)hdr->length);
103132
if (len != size || (unsigned long)hdr + len > end) {
@@ -124,10 +153,8 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
124153

125154
le_base = (__force __le64)dslbis->entry_base_unit;
126155
le_val = (__force __le16)dslbis->entry[0];
127-
rc = check_mul_overflow(le64_to_cpu(le_base),
128-
le16_to_cpu(le_val), &val);
129-
if (rc)
130-
pr_warn("DSLBIS value overflowed.\n");
156+
val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
157+
dslbis->data_type);
131158

132159
cxl_access_coordinate_set(&dent->coord, dslbis->data_type, val);
133160

@@ -164,7 +191,6 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
164191
struct xarray *dsmas_xa)
165192
{
166193
struct access_coordinate ep_c;
167-
struct access_coordinate coord[ACCESS_COORDINATE_MAX];
168194
struct dsmas_entry *dent;
169195
int valid_entries = 0;
170196
unsigned long index;
@@ -176,12 +202,6 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
176202
return rc;
177203
}
178204

179-
rc = cxl_hb_get_perf_coordinates(port, coord);
180-
if (rc) {
181-
dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
182-
return rc;
183-
}
184-
185205
struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
186206

187207
if (!cxl_root)
@@ -194,18 +214,9 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
194214
int qos_class;
195215

196216
cxl_coordinates_combine(&dent->coord, &dent->coord, &ep_c);
197-
/*
198-
* Keeping the host bridge coordinates separate from the dsmas
199-
* coordinates in order to allow calculation of access class
200-
* 0 and 1 for region later.
201-
*/
202-
cxl_coordinates_combine(&coord[ACCESS_COORDINATE_CPU],
203-
&coord[ACCESS_COORDINATE_CPU],
204-
&dent->coord);
205217
dent->entries = 1;
206-
rc = cxl_root->ops->qos_class(cxl_root,
207-
&coord[ACCESS_COORDINATE_CPU],
208-
1, &qos_class);
218+
rc = cxl_root->ops->qos_class(cxl_root, &dent->coord, 1,
219+
&qos_class);
209220
if (rc != 1)
210221
continue;
211222

@@ -461,10 +472,8 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg,
461472

462473
le_base = (__force __le64)tbl->sslbis_header.entry_base_unit;
463474
le_val = (__force __le16)tbl->entries[i].latency_or_bandwidth;
464-
465-
if (check_mul_overflow(le64_to_cpu(le_base),
466-
le16_to_cpu(le_val), &val))
467-
dev_warn(dev, "SSLBIS value overflowed!\n");
475+
val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
476+
sslbis->data_type);
468477

469478
xa_for_each(&port->dports, index, dport) {
470479
if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT ||
@@ -521,17 +530,13 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
521530
struct cxl_endpoint_decoder *cxled)
522531
{
523532
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
524-
struct cxl_port *port = cxlmd->endpoint;
525533
struct cxl_dev_state *cxlds = cxlmd->cxlds;
526534
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
527-
struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX];
528-
struct access_coordinate coord;
529535
struct range dpa = {
530536
.start = cxled->dpa_res->start,
531537
.end = cxled->dpa_res->end,
532538
};
533539
struct cxl_dpa_perf *perf;
534-
int rc;
535540

536541
switch (cxlr->mode) {
537542
case CXL_DECODER_RAM:
@@ -549,35 +554,16 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
549554
if (!range_contains(&perf->dpa_range, &dpa))
550555
return;
551556

552-
rc = cxl_hb_get_perf_coordinates(port, hb_coord);
553-
if (rc) {
554-
dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
555-
return;
556-
}
557-
558557
for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
559-
/* Pickup the host bridge coords */
560-
cxl_coordinates_combine(&coord, &hb_coord[i], &perf->coord);
561-
562558
/* Get total bandwidth and the worst latency for the cxl region */
563559
cxlr->coord[i].read_latency = max_t(unsigned int,
564560
cxlr->coord[i].read_latency,
565-
coord.read_latency);
561+
perf->coord.read_latency);
566562
cxlr->coord[i].write_latency = max_t(unsigned int,
567563
cxlr->coord[i].write_latency,
568-
coord.write_latency);
569-
cxlr->coord[i].read_bandwidth += coord.read_bandwidth;
570-
cxlr->coord[i].write_bandwidth += coord.write_bandwidth;
571-
572-
/*
573-
* Convert latency to nanosec from picosec to be consistent
574-
* with the resulting latency coordinates computed by the
575-
* HMAT_REPORTING code.
576-
*/
577-
cxlr->coord[i].read_latency =
578-
DIV_ROUND_UP(cxlr->coord[i].read_latency, 1000);
579-
cxlr->coord[i].write_latency =
580-
DIV_ROUND_UP(cxlr->coord[i].write_latency, 1000);
564+
perf->coord.write_latency);
565+
cxlr->coord[i].read_bandwidth += perf->coord.read_bandwidth;
566+
cxlr->coord[i].write_bandwidth += perf->coord.write_bandwidth;
581567
}
582568
}
583569

drivers/cxl/core/port.c

Lines changed: 4 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2133,38 +2133,6 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
21332133
}
21342134
EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL);
21352135

2136-
/**
2137-
* cxl_hb_get_perf_coordinates - Retrieve performance numbers between initiator
2138-
* and host bridge
2139-
*
2140-
* @port: endpoint cxl_port
2141-
* @coord: output access coordinates
2142-
*
2143-
* Return: errno on failure, 0 on success.
2144-
*/
2145-
int cxl_hb_get_perf_coordinates(struct cxl_port *port,
2146-
struct access_coordinate *coord)
2147-
{
2148-
struct cxl_port *iter = port;
2149-
struct cxl_dport *dport;
2150-
2151-
if (!is_cxl_endpoint(port))
2152-
return -EINVAL;
2153-
2154-
dport = iter->parent_dport;
2155-
while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
2156-
iter = to_cxl_port(iter->dev.parent);
2157-
dport = iter->parent_dport;
2158-
}
2159-
2160-
coord[ACCESS_COORDINATE_LOCAL] =
2161-
dport->hb_coord[ACCESS_COORDINATE_LOCAL];
2162-
coord[ACCESS_COORDINATE_CPU] =
2163-
dport->hb_coord[ACCESS_COORDINATE_CPU];
2164-
2165-
return 0;
2166-
}
2167-
21682136
static bool parent_port_is_cxl_root(struct cxl_port *port)
21692137
{
21702138
return is_cxl_root(to_cxl_port(port->dev.parent));
@@ -2215,6 +2183,10 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
22152183
c.read_latency += dport->link_latency;
22162184
} while (!is_cxl_root);
22172185

2186+
dport = iter->parent_dport;
2187+
/* Retrieve HB coords */
2188+
cxl_coordinates_combine(&c, &c, dport->hb_coord);
2189+
22182190
/* Get the calculated PCI paths bandwidth */
22192191
pdev = to_pci_dev(port->uport_dev->parent);
22202192
bw = pcie_bandwidth_available(pdev, NULL, NULL, NULL);

drivers/cxl/cxl.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -884,8 +884,6 @@ void cxl_switch_parse_cdat(struct cxl_port *port);
884884

885885
int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
886886
struct access_coordinate *coord);
887-
int cxl_hb_get_perf_coordinates(struct cxl_port *port,
888-
struct access_coordinate *coord);
889887
void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
890888
struct cxl_endpoint_decoder *cxled);
891889

0 commit comments

Comments
 (0)