Skip to content

Commit 594902c

Browse files
Qinyun Tanbp3tk0v
authored andcommitted
x86,fs/resctrl: Remove inappropriate references to cacheinfo in the resctrl subsystem
In the resctrl subsystem's Sub-NUMA Cluster (SNC) mode, the rdt_mon_domain structure representing a NUMA node relies on the cacheinfo interface (rdt_mon_domain::ci) to store L3 cache information (e.g., shared_cpu_map) for monitoring. The L3 cache information of a SNC NUMA node determines which domains are summed for the "top level" L3-scoped events. rdt_mon_domain::ci is initialized using the first online CPU of a NUMA node. When this CPU goes offline, its shared_cpu_map is cleared to contain only the offline CPU itself. Subsequently, attempting to read counters via smp_call_on_cpu(offline_cpu) fails (and error ignored), returning zero values for "top-level events" without any error indication. Replace the cacheinfo references in struct rdt_mon_domain and struct rmid_read with the cacheinfo ID (a unique identifier for the L3 cache). rdt_domain_hdr::cpu_mask contains the online CPUs associated with that domain. When reading "top-level events", select a CPU from rdt_domain_hdr::cpu_mask and utilize its L3 shared_cpu_map to determine valid CPUs for reading RMID counter via the MSR interface. Considering all CPUs associated with the L3 cache improves the chances of picking a housekeeping CPU on which the counter reading work can be queued, avoiding an unnecessary IPI. Fixes: 328ea68 ("x86/resctrl: Prepare for new Sub-NUMA Cluster (SNC) monitor files") Signed-off-by: Qinyun Tan <[email protected]> Signed-off-by: Borislav Petkov (AMD) <[email protected]> Reviewed-by: Reinette Chatre <[email protected]> Tested-by: Tony Luck <[email protected]> Link: https://lore.kernel.org/[email protected]
1 parent 9afe652 commit 594902c

File tree

6 files changed

+24
-15
lines changed

6 files changed

+24
-15
lines changed

arch/x86/kernel/cpu/resctrl/core.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
498498
struct rdt_hw_mon_domain *hw_dom;
499499
struct rdt_domain_hdr *hdr;
500500
struct rdt_mon_domain *d;
501+
struct cacheinfo *ci;
501502
int err;
502503

503504
lockdep_assert_held(&domain_list_lock);
@@ -525,12 +526,13 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
525526
d = &hw_dom->d_resctrl;
526527
d->hdr.id = id;
527528
d->hdr.type = RESCTRL_MON_DOMAIN;
528-
d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
529-
if (!d->ci) {
529+
ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
530+
if (!ci) {
530531
pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name);
531532
mon_domain_free(hw_dom);
532533
return;
533534
}
535+
d->ci_id = ci->id;
534536
cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
535537

536538
arch_mon_domain_online(r, d);

fs/resctrl/ctrlmondata.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -594,9 +594,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
594594
struct rmid_read rr = {0};
595595
struct rdt_mon_domain *d;
596596
struct rdtgroup *rdtgrp;
597+
int domid, cpu, ret = 0;
597598
struct rdt_resource *r;
599+
struct cacheinfo *ci;
598600
struct mon_data *md;
599-
int domid, ret = 0;
600601

601602
rdtgrp = rdtgroup_kn_lock_live(of->kn);
602603
if (!rdtgrp) {
@@ -623,10 +624,14 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
623624
* one that matches this cache id.
624625
*/
625626
list_for_each_entry(d, &r->mon_domains, hdr.list) {
626-
if (d->ci->id == domid) {
627-
rr.ci = d->ci;
627+
if (d->ci_id == domid) {
628+
rr.ci_id = d->ci_id;
629+
cpu = cpumask_any(&d->hdr.cpu_mask);
630+
ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
631+
if (!ci)
632+
continue;
628633
mon_event_read(&rr, r, NULL, rdtgrp,
629-
&d->ci->shared_cpu_map, evtid, false);
634+
&ci->shared_cpu_map, evtid, false);
630635
goto checkresult;
631636
}
632637
}

fs/resctrl/internal.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ struct mon_data {
9898
* domains in @r sharing L3 @ci.id
9999
* @evtid: Which monitor event to read.
100100
* @first: Initialize MBM counter when true.
101-
* @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains.
101+
* @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when summing domains.
102102
* @err: Error encountered when reading counter.
103103
* @val: Returned value of event counter. If @rgrp is a parent resource group,
104104
* @val includes the sum of event counts from its child resource groups.
@@ -112,7 +112,7 @@ struct rmid_read {
112112
struct rdt_mon_domain *d;
113113
enum resctrl_event_id evtid;
114114
bool first;
115-
struct cacheinfo *ci;
115+
unsigned int ci_id;
116116
int err;
117117
u64 val;
118118
void *arch_mon_ctx;

fs/resctrl/monitor.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
361361
{
362362
int cpu = smp_processor_id();
363363
struct rdt_mon_domain *d;
364+
struct cacheinfo *ci;
364365
struct mbm_state *m;
365366
int err, ret;
366367
u64 tval = 0;
@@ -388,7 +389,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
388389
}
389390

390391
/* Summing domains that share a cache, must be on a CPU for that cache. */
391-
if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
392+
ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
393+
if (!ci || ci->id != rr->ci_id)
392394
return -EINVAL;
393395

394396
/*
@@ -400,7 +402,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
400402
*/
401403
ret = -EINVAL;
402404
list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
403-
if (d->ci->id != rr->ci->id)
405+
if (d->ci_id != rr->ci_id)
404406
continue;
405407
err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
406408
rr->evtid, &tval, rr->arch_mon_ctx);

fs/resctrl/rdtgroup.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3036,7 +3036,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
30363036
char name[32];
30373037

30383038
snc_mode = r->mon_scope == RESCTRL_L3_NODE;
3039-
sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
3039+
sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
30403040
if (snc_mode)
30413041
sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
30423042

@@ -3061,7 +3061,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
30613061
return -EPERM;
30623062

30633063
list_for_each_entry(mevt, &r->evt_list, list) {
3064-
domid = do_sum ? d->ci->id : d->hdr.id;
3064+
domid = do_sum ? d->ci_id : d->hdr.id;
30653065
priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum);
30663066
if (WARN_ON_ONCE(!priv))
30673067
return -EINVAL;
@@ -3089,7 +3089,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
30893089
lockdep_assert_held(&rdtgroup_mutex);
30903090

30913091
snc_mode = r->mon_scope == RESCTRL_L3_NODE;
3092-
sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
3092+
sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
30933093
kn = kernfs_find_and_get(parent_kn, name);
30943094
if (kn) {
30953095
/*

include/linux/resctrl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ struct rdt_ctrl_domain {
159159
/**
160160
* struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource
161161
* @hdr: common header for different domain types
162-
* @ci: cache info for this domain
162+
* @ci_id: cache info id for this domain
163163
* @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold
164164
* @mbm_total: saved state for MBM total bandwidth
165165
* @mbm_local: saved state for MBM local bandwidth
@@ -170,7 +170,7 @@ struct rdt_ctrl_domain {
170170
*/
171171
struct rdt_mon_domain {
172172
struct rdt_domain_hdr hdr;
173-
struct cacheinfo *ci;
173+
unsigned int ci_id;
174174
unsigned long *rmid_busy_llc;
175175
struct mbm_state *mbm_total;
176176
struct mbm_state *mbm_local;

0 commit comments

Comments
 (0)