Skip to content

Commit d2e1b84

Browse files
rchatrebp3tk0v
authored andcommitted
fs/resctrl: Eliminate false positive lockdep warning when reading SNC counters
Running resctrl_tests on an SNC-2 system with lockdep debugging enabled triggers several warnings with following trace: WARNING: CPU: 0 PID: 1914 at kernel/cpu.c:528 lockdep_assert_cpus_held ... Call Trace: __mon_event_count ? __lock_acquire ? __pfx___mon_event_count mon_event_count ? __pfx_smp_mon_event_count smp_mon_event_count smp_call_on_cpu_callback get_cpu_cacheinfo_level() called from __mon_event_count() requires CPU hotplug lock to be held. The hotplug lock is indeed held during this time, as confirmed by the lockdep_assert_cpus_held() within mon_event_read() that calls mon_event_count() via IPI, but the lockdep tracking is not able to follow the IPI. Fresh CPU cache information via get_cpu_cacheinfo_level() from __mon_event_count() was added to support the fix for the issue where resctrl inappropriately maintained links to L3 cache information that will be stale in the case when the associated CPU goes offline. Keep the cacheinfo ID in struct rdt_mon_domain to ensure that resctrl does not maintain stale cache information while CPUs can go offline. Return to using a pointer to the L3 cache information (struct cacheinfo) in struct rmid_read, rmid_read::ci. Initialize rmid_read::ci before the IPI where it is used. CPU hotplug lock is held across rmid_read::ci initialization and use to ensure that it points to accurate cache information. Fixes: 594902c ("x86,fs/resctrl: Remove inappropriate references to cacheinfo in the resctrl subsystem") Signed-off-by: Reinette Chatre <[email protected]> Signed-off-by: Borislav Petkov (AMD) <[email protected]>
1 parent cba4262 commit d2e1b84

File tree

3 files changed

+5
-7
lines changed

3 files changed

+5
-7
lines changed

fs/resctrl/ctrlmondata.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -625,11 +625,11 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
625625
*/
626626
list_for_each_entry(d, &r->mon_domains, hdr.list) {
627627
if (d->ci_id == domid) {
628-
rr.ci_id = d->ci_id;
629628
cpu = cpumask_any(&d->hdr.cpu_mask);
630629
ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
631630
if (!ci)
632631
continue;
632+
rr.ci = ci;
633633
mon_event_read(&rr, r, NULL, rdtgrp,
634634
&ci->shared_cpu_map, evtid, false);
635635
goto checkresult;

fs/resctrl/internal.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ struct mon_data {
9898
* domains in @r sharing L3 @ci.id
9999
* @evtid: Which monitor event to read.
100100
* @first: Initialize MBM counter when true.
101-
* @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when summing domains.
101+
* @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains.
102102
* @err: Error encountered when reading counter.
103103
* @val: Returned value of event counter. If @rgrp is a parent resource group,
104104
* @val includes the sum of event counts from its child resource groups.
@@ -112,7 +112,7 @@ struct rmid_read {
112112
struct rdt_mon_domain *d;
113113
enum resctrl_event_id evtid;
114114
bool first;
115-
unsigned int ci_id;
115+
struct cacheinfo *ci;
116116
int err;
117117
u64 val;
118118
void *arch_mon_ctx;

fs/resctrl/monitor.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,6 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
361361
{
362362
int cpu = smp_processor_id();
363363
struct rdt_mon_domain *d;
364-
struct cacheinfo *ci;
365364
struct mbm_state *m;
366365
int err, ret;
367366
u64 tval = 0;
@@ -389,8 +388,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
389388
}
390389

391390
/* Summing domains that share a cache, must be on a CPU for that cache. */
392-
ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
393-
if (!ci || ci->id != rr->ci_id)
391+
if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
394392
return -EINVAL;
395393

396394
/*
@@ -402,7 +400,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
402400
*/
403401
ret = -EINVAL;
404402
list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
405-
if (d->ci_id != rr->ci_id)
403+
if (d->ci_id != rr->ci->id)
406404
continue;
407405
err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
408406
rr->evtid, &tval, rr->arch_mon_ctx);

0 commit comments

Comments
 (0)