Skip to content

Commit 26fd9f7

Browse files
committed
Merge tag 'cxl-fixes-6.16-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
Pull Compute Express Link (CXL) fixes from Dave Jiang: "These fixes address a few issues in the CXL subsystem, including dealing with some bugs in the CXL EDAC and RAS drivers: - Fix return value of cxlctl_validate_set_features() - Fix min_scrub_cycle of a region miscaculation and add additional documentation - Fix potential memory leak issues for CXL EDAC - Fix CPER handler device confusion for CXL RAS - Fix using wrong repair type to check DRAM event record" * tag 'cxl-fixes-6.16-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl: cxl/edac: Fix using wrong repair type to check dram event record cxl/ras: Fix CPER handler device confusion cxl/edac: Fix potential memory leak issues cxl/Documentation: Add more description about min/max scrub cycle cxl/edac: Fix the min_scrub_cycle of a region miscalculation cxl: fix return value in cxlctl_validate_set_features()
2 parents 5683cd6 + 0a46f60 commit 26fd9f7

File tree

4 files changed

+57
-26
lines changed

4 files changed

+57
-26
lines changed

Documentation/ABI/testing/sysfs-edac-scrub

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ Description:
4949
(RO) Supported minimum scrub cycle duration in seconds
5050
by the memory scrubber.
5151

52+
Device-based scrub: returns the minimum scrub cycle
53+
supported by the memory device.
54+
55+
Region-based scrub: returns the max of minimum scrub cycles
56+
supported by individual memory devices that back the region.
57+
5258
What: /sys/bus/edac/devices/<dev-name>/scrubX/max_cycle_duration
5359
Date: March 2025
5460
KernelVersion: 6.15
@@ -57,6 +63,16 @@ Description:
5763
(RO) Supported maximum scrub cycle duration in seconds
5864
by the memory scrubber.
5965

66+
Device-based scrub: returns the maximum scrub cycle supported
67+
by the memory device.
68+
69+
Region-based scrub: returns the min of maximum scrub cycles
70+
supported by individual memory devices that back the region.
71+
72+
If the memory device does not provide maximum scrub cycle
73+
information, return the maximum supported value of the scrub
74+
cycle field.
75+
6076
What: /sys/bus/edac/devices/<dev-name>/scrubX/current_cycle_duration
6177
Date: March 2025
6278
KernelVersion: 6.15

drivers/cxl/core/edac.c

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,10 @@ static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx,
103103
u8 *cap, u16 *cycle, u8 *flags, u8 *min_cycle)
104104
{
105105
struct cxl_mailbox *cxl_mbox;
106-
u8 min_scrub_cycle = U8_MAX;
107106
struct cxl_region_params *p;
108107
struct cxl_memdev *cxlmd;
109108
struct cxl_region *cxlr;
109+
u8 min_scrub_cycle = 0;
110110
int i, ret;
111111

112112
if (!cxl_ps_ctx->cxlr) {
@@ -133,8 +133,12 @@ static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx,
133133
if (ret)
134134
return ret;
135135

136+
/*
137+
* The min_scrub_cycle of a region is the max of minimum scrub
138+
* cycles supported by memdevs that back the region.
139+
*/
136140
if (min_cycle)
137-
min_scrub_cycle = min(*min_cycle, min_scrub_cycle);
141+
min_scrub_cycle = max(*min_cycle, min_scrub_cycle);
138142
}
139143

140144
if (min_cycle)
@@ -1099,8 +1103,10 @@ int cxl_store_rec_gen_media(struct cxl_memdev *cxlmd, union cxl_event *evt)
10991103
old_rec = xa_store(&array_rec->rec_gen_media,
11001104
le64_to_cpu(rec->media_hdr.phys_addr), rec,
11011105
GFP_KERNEL);
1102-
if (xa_is_err(old_rec))
1106+
if (xa_is_err(old_rec)) {
1107+
kfree(rec);
11031108
return xa_err(old_rec);
1109+
}
11041110

11051111
kfree(old_rec);
11061112

@@ -1127,8 +1133,10 @@ int cxl_store_rec_dram(struct cxl_memdev *cxlmd, union cxl_event *evt)
11271133
old_rec = xa_store(&array_rec->rec_dram,
11281134
le64_to_cpu(rec->media_hdr.phys_addr), rec,
11291135
GFP_KERNEL);
1130-
if (xa_is_err(old_rec))
1136+
if (xa_is_err(old_rec)) {
1137+
kfree(rec);
11311138
return xa_err(old_rec);
1139+
}
11321140

11331141
kfree(old_rec);
11341142

@@ -1315,7 +1323,7 @@ cxl_mem_get_rec_dram(struct cxl_memdev *cxlmd,
13151323
attrbs.bank = ctx->bank;
13161324
break;
13171325
case EDAC_REPAIR_RANK_SPARING:
1318-
attrbs.repair_type = CXL_BANK_SPARING;
1326+
attrbs.repair_type = CXL_RANK_SPARING;
13191327
break;
13201328
default:
13211329
return NULL;

drivers/cxl/core/features.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,7 @@ static bool cxlctl_validate_set_features(struct cxl_features_state *cxlfs,
544544
u32 flags;
545545

546546
if (rpc_in->op_size < sizeof(uuid_t))
547-
return ERR_PTR(-EINVAL);
547+
return false;
548548

549549
feat = cxl_feature_info(cxlfs, &rpc_in->set_feat_in.uuid);
550550
if (IS_ERR(feat))

drivers/cxl/core/ras.c

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -31,40 +31,38 @@ static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev,
3131
ras_cap.header_log);
3232
}
3333

34-
static void cxl_cper_trace_corr_prot_err(struct pci_dev *pdev,
35-
struct cxl_ras_capability_regs ras_cap)
34+
static void cxl_cper_trace_corr_prot_err(struct cxl_memdev *cxlmd,
35+
struct cxl_ras_capability_regs ras_cap)
3636
{
3737
u32 status = ras_cap.cor_status & ~ras_cap.cor_mask;
38-
struct cxl_dev_state *cxlds;
3938

40-
cxlds = pci_get_drvdata(pdev);
41-
if (!cxlds)
42-
return;
43-
44-
trace_cxl_aer_correctable_error(cxlds->cxlmd, status);
39+
trace_cxl_aer_correctable_error(cxlmd, status);
4540
}
4641

47-
static void cxl_cper_trace_uncorr_prot_err(struct pci_dev *pdev,
48-
struct cxl_ras_capability_regs ras_cap)
42+
static void
43+
cxl_cper_trace_uncorr_prot_err(struct cxl_memdev *cxlmd,
44+
struct cxl_ras_capability_regs ras_cap)
4945
{
5046
u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask;
51-
struct cxl_dev_state *cxlds;
5247
u32 fe;
5348

54-
cxlds = pci_get_drvdata(pdev);
55-
if (!cxlds)
56-
return;
57-
5849
if (hweight32(status) > 1)
5950
fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
6051
ras_cap.cap_control));
6152
else
6253
fe = status;
6354

64-
trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe,
55+
trace_cxl_aer_uncorrectable_error(cxlmd, status, fe,
6556
ras_cap.header_log);
6657
}
6758

59+
static int match_memdev_by_parent(struct device *dev, const void *uport)
60+
{
61+
if (is_cxl_memdev(dev) && dev->parent == uport)
62+
return 1;
63+
return 0;
64+
}
65+
6866
static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
6967
{
7068
unsigned int devfn = PCI_DEVFN(data->prot_err.agent_addr.device,
@@ -73,13 +71,12 @@ static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
7371
pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment,
7472
data->prot_err.agent_addr.bus,
7573
devfn);
74+
struct cxl_memdev *cxlmd;
7675
int port_type;
7776

7877
if (!pdev)
7978
return;
8079

81-
guard(device)(&pdev->dev);
82-
8380
port_type = pci_pcie_type(pdev);
8481
if (port_type == PCI_EXP_TYPE_ROOT_PORT ||
8582
port_type == PCI_EXP_TYPE_DOWNSTREAM ||
@@ -92,10 +89,20 @@ static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
9289
return;
9390
}
9491

92+
guard(device)(&pdev->dev);
93+
if (!pdev->dev.driver)
94+
return;
95+
96+
struct device *mem_dev __free(put_device) = bus_find_device(
97+
&cxl_bus_type, NULL, pdev, match_memdev_by_parent);
98+
if (!mem_dev)
99+
return;
100+
101+
cxlmd = to_cxl_memdev(mem_dev);
95102
if (data->severity == AER_CORRECTABLE)
96-
cxl_cper_trace_corr_prot_err(pdev, data->ras_cap);
103+
cxl_cper_trace_corr_prot_err(cxlmd, data->ras_cap);
97104
else
98-
cxl_cper_trace_uncorr_prot_err(pdev, data->ras_cap);
105+
cxl_cper_trace_uncorr_prot_err(cxlmd, data->ras_cap);
99106
}
100107

101108
static void cxl_cper_prot_err_work_fn(struct work_struct *work)

0 commit comments

Comments
 (0)