Skip to content

Commit 7633ec2

Browse files
rmurphy-armwilldeacon
authored andcommitted
perf/arm-cmn: Rework DTC counters (again)
The bitmap-based scheme for tracking DTC counter usage turns out to be a complete dead-end for its imagined purpose, since by the time we have to keep track of a per-DTC counter index anyway, we already have enough information to make the bitmap itself redundant. Revert the remains of it back to almost the original scheme, but now expanded to track per-DTC indices, in preparation for making use of them in anger. Note that since cycle count events always use a dedicated counter on a single DTC, we reuse the field to encode their DTC index directly. Signed-off-by: Robin Murphy <[email protected]> Reviewed-by: Ilkka Koskinen <[email protected]> Link: https://lore.kernel.org/r/5f6ade76b47f033836d7a36c03555da896dfb4a3.1697824215.git.robin.murphy@arm.com Signed-off-by: Will Deacon <[email protected]>
1 parent e3e73f5 commit 7633ec2

File tree

1 file changed

+64
-62
lines changed

1 file changed

+64
-62
lines changed

drivers/perf/arm-cmn.c

Lines changed: 64 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -281,16 +281,13 @@ struct arm_cmn_node {
281281
u16 id, logid;
282282
enum cmn_node_type type;
283283

284-
int dtm;
285-
union {
286-
/* DN/HN-F/CXHA */
287-
struct {
288-
u8 val : 4;
289-
u8 count : 4;
290-
} occupid[SEL_MAX];
291-
/* XP */
292-
u8 dtc;
293-
};
284+
u8 dtm;
285+
s8 dtc;
286+
/* DN/HN-F/CXHA */
287+
struct {
288+
u8 val : 4;
289+
u8 count : 4;
290+
} occupid[SEL_MAX];
294291
union {
295292
u8 event[4];
296293
__le32 event_sel;
@@ -540,12 +537,12 @@ static int arm_cmn_map_show(struct seq_file *s, void *data)
540537

541538
seq_puts(s, "\n |");
542539
for (x = 0; x < cmn->mesh_x; x++) {
543-
u8 dtc = cmn->xps[xp_base + x].dtc;
540+
s8 dtc = cmn->xps[xp_base + x].dtc;
544541

545-
if (dtc & (dtc - 1))
542+
if (dtc < 0)
546543
seq_puts(s, " DTC ?? |");
547544
else
548-
seq_printf(s, " DTC %ld |", __ffs(dtc));
545+
seq_printf(s, " DTC %d |", dtc);
549546
}
550547
seq_puts(s, "\n |");
551548
for (x = 0; x < cmn->mesh_x; x++)
@@ -589,8 +586,7 @@ static void arm_cmn_debugfs_init(struct arm_cmn *cmn, int id) {}
589586
struct arm_cmn_hw_event {
590587
struct arm_cmn_node *dn;
591588
u64 dtm_idx[4];
592-
unsigned int dtc_idx;
593-
u8 dtcs_used;
589+
s8 dtc_idx[CMN_MAX_DTCS];
594590
u8 num_dns;
595591
u8 dtm_offset;
596592
bool wide_sel;
@@ -600,6 +596,10 @@ struct arm_cmn_hw_event {
600596
#define for_each_hw_dn(hw, dn, i) \
601597
for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++)
602598

599+
/* @i is the DTC number, @idx is the counter index on that DTC */
600+
#define for_each_hw_dtc_idx(hw, i, idx) \
601+
for (int i = 0, idx; i < CMN_MAX_DTCS; i++) if ((idx = hw->dtc_idx[i]) >= 0)
602+
603603
static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event)
604604
{
605605
BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target));
@@ -1429,12 +1429,11 @@ static void arm_cmn_init_counter(struct perf_event *event)
14291429
{
14301430
struct arm_cmn *cmn = to_cmn(event->pmu);
14311431
struct arm_cmn_hw_event *hw = to_cmn_hw(event);
1432-
unsigned int i, pmevcnt = CMN_DT_PMEVCNT(hw->dtc_idx);
14331432
u64 count;
14341433

1435-
for (i = 0; hw->dtcs_used & (1U << i); i++) {
1436-
writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + pmevcnt);
1437-
cmn->dtc[i].counters[hw->dtc_idx] = event;
1434+
for_each_hw_dtc_idx(hw, i, idx) {
1435+
writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + CMN_DT_PMEVCNT(idx));
1436+
cmn->dtc[i].counters[idx] = event;
14381437
}
14391438

14401439
count = arm_cmn_read_dtm(cmn, hw, false);
@@ -1447,11 +1446,9 @@ static void arm_cmn_event_read(struct perf_event *event)
14471446
struct arm_cmn_hw_event *hw = to_cmn_hw(event);
14481447
u64 delta, new, prev;
14491448
unsigned long flags;
1450-
unsigned int i;
14511449

1452-
if (hw->dtc_idx == CMN_DT_NUM_COUNTERS) {
1453-
i = __ffs(hw->dtcs_used);
1454-
delta = arm_cmn_read_cc(cmn->dtc + i);
1450+
if (CMN_EVENT_TYPE(event) == CMN_TYPE_DTC) {
1451+
delta = arm_cmn_read_cc(cmn->dtc + hw->dtc_idx[0]);
14551452
local64_add(delta, &event->count);
14561453
return;
14571454
}
@@ -1461,8 +1458,8 @@ static void arm_cmn_event_read(struct perf_event *event)
14611458
delta = new - prev;
14621459

14631460
local_irq_save(flags);
1464-
for (i = 0; hw->dtcs_used & (1U << i); i++) {
1465-
new = arm_cmn_read_counter(cmn->dtc + i, hw->dtc_idx);
1461+
for_each_hw_dtc_idx(hw, i, idx) {
1462+
new = arm_cmn_read_counter(cmn->dtc + i, idx);
14661463
delta += new << 16;
14671464
}
14681465
local_irq_restore(flags);
@@ -1518,7 +1515,7 @@ static void arm_cmn_event_start(struct perf_event *event, int flags)
15181515
int i;
15191516

15201517
if (type == CMN_TYPE_DTC) {
1521-
i = __ffs(hw->dtcs_used);
1518+
i = hw->dtc_idx[0];
15221519
writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR);
15231520
cmn->dtc[i].cc_active = true;
15241521
} else if (type == CMN_TYPE_WP) {
@@ -1549,7 +1546,7 @@ static void arm_cmn_event_stop(struct perf_event *event, int flags)
15491546
int i;
15501547

15511548
if (type == CMN_TYPE_DTC) {
1552-
i = __ffs(hw->dtcs_used);
1549+
i = hw->dtc_idx[0];
15531550
cmn->dtc[i].cc_active = false;
15541551
} else if (type == CMN_TYPE_WP) {
15551552
int wp_idx = arm_cmn_wp_idx(event);
@@ -1735,12 +1732,19 @@ static int arm_cmn_event_init(struct perf_event *event)
17351732
hw->dn = arm_cmn_node(cmn, type);
17361733
if (!hw->dn)
17371734
return -EINVAL;
1735+
1736+
memset(hw->dtc_idx, -1, sizeof(hw->dtc_idx));
17381737
for (dn = hw->dn; dn->type == type; dn++) {
17391738
if (bynodeid && dn->id != nodeid) {
17401739
hw->dn++;
17411740
continue;
17421741
}
17431742
hw->num_dns++;
1743+
if (dn->dtc < 0)
1744+
memset(hw->dtc_idx, 0, cmn->num_dtcs);
1745+
else
1746+
hw->dtc_idx[dn->dtc] = 0;
1747+
17441748
if (bynodeid)
17451749
break;
17461750
}
@@ -1752,12 +1756,6 @@ static int arm_cmn_event_init(struct perf_event *event)
17521756
nodeid, nid.x, nid.y, nid.port, nid.dev, type);
17531757
return -EINVAL;
17541758
}
1755-
/*
1756-
* Keep assuming non-cycles events count in all DTC domains; turns out
1757-
* it's hard to make a worthwhile optimisation around this, short of
1758-
* going all-in with domain-local counter allocation as well.
1759-
*/
1760-
hw->dtcs_used = (1U << cmn->num_dtcs) - 1;
17611759

17621760
return arm_cmn_validate_group(cmn, event);
17631761
}
@@ -1783,46 +1781,48 @@ static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
17831781
}
17841782
memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx));
17851783

1786-
for (i = 0; hw->dtcs_used & (1U << i); i++)
1787-
cmn->dtc[i].counters[hw->dtc_idx] = NULL;
1784+
for_each_hw_dtc_idx(hw, j, idx)
1785+
cmn->dtc[j].counters[idx] = NULL;
17881786
}
17891787

17901788
static int arm_cmn_event_add(struct perf_event *event, int flags)
17911789
{
17921790
struct arm_cmn *cmn = to_cmn(event->pmu);
17931791
struct arm_cmn_hw_event *hw = to_cmn_hw(event);
1794-
struct arm_cmn_dtc *dtc = &cmn->dtc[0];
17951792
struct arm_cmn_node *dn;
17961793
enum cmn_node_type type = CMN_EVENT_TYPE(event);
1797-
unsigned int i, dtc_idx, input_sel;
1794+
unsigned int input_sel, i = 0;
17981795

17991796
if (type == CMN_TYPE_DTC) {
1800-
i = 0;
18011797
while (cmn->dtc[i].cycles)
18021798
if (++i == cmn->num_dtcs)
18031799
return -ENOSPC;
18041800

18051801
cmn->dtc[i].cycles = event;
1806-
hw->dtc_idx = CMN_DT_NUM_COUNTERS;
1807-
hw->dtcs_used = 1U << i;
1802+
hw->dtc_idx[0] = i;
18081803

18091804
if (flags & PERF_EF_START)
18101805
arm_cmn_event_start(event, 0);
18111806
return 0;
18121807
}
18131808

18141809
/* Grab a free global counter first... */
1815-
dtc_idx = 0;
1816-
while (dtc->counters[dtc_idx])
1817-
if (++dtc_idx == CMN_DT_NUM_COUNTERS)
1818-
return -ENOSPC;
1819-
1820-
hw->dtc_idx = dtc_idx;
1810+
for_each_hw_dtc_idx(hw, j, idx) {
1811+
if (j > 0) {
1812+
idx = hw->dtc_idx[0];
1813+
} else {
1814+
idx = 0;
1815+
while (cmn->dtc[j].counters[idx])
1816+
if (++idx == CMN_DT_NUM_COUNTERS)
1817+
goto free_dtms;
1818+
}
1819+
hw->dtc_idx[j] = idx;
1820+
}
18211821

18221822
/* ...then the local counters to feed it. */
18231823
for_each_hw_dn(hw, dn, i) {
18241824
struct arm_cmn_dtm *dtm = &cmn->dtms[dn->dtm] + hw->dtm_offset;
1825-
unsigned int dtm_idx, shift;
1825+
unsigned int dtm_idx, shift, d = 0;
18261826
u64 reg;
18271827

18281828
dtm_idx = 0;
@@ -1841,11 +1841,11 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
18411841

18421842
tmp = dtm->wp_event[wp_idx ^ 1];
18431843
if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
1844-
CMN_EVENT_WP_COMBINE(dtc->counters[tmp]))
1844+
CMN_EVENT_WP_COMBINE(cmn->dtc[d].counters[tmp]))
18451845
goto free_dtms;
18461846

18471847
input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx;
1848-
dtm->wp_event[wp_idx] = dtc_idx;
1848+
dtm->wp_event[wp_idx] = hw->dtc_idx[d];
18491849
writel_relaxed(cfg, dtm->base + CMN_DTM_WPn_CONFIG(wp_idx));
18501850
} else {
18511851
struct arm_cmn_nodeid nid = arm_cmn_nid(cmn, dn->id);
@@ -1865,7 +1865,7 @@ static int arm_cmn_event_add(struct perf_event *event, int flags)
18651865
dtm->input_sel[dtm_idx] = input_sel;
18661866
shift = CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtm_idx);
18671867
dtm->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << shift);
1868-
dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << shift;
1868+
dtm->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, hw->dtc_idx[d]) << shift;
18691869
dtm->pmu_config_low |= CMN__PMEVCNT_PAIRED(dtm_idx);
18701870
reg = (u64)le32_to_cpu(dtm->pmu_config_high) << 32 | dtm->pmu_config_low;
18711871
writeq_relaxed(reg, dtm->base + CMN_DTM_PMU_CONFIG);
@@ -1893,7 +1893,7 @@ static void arm_cmn_event_del(struct perf_event *event, int flags)
18931893
arm_cmn_event_stop(event, PERF_EF_UPDATE);
18941894

18951895
if (type == CMN_TYPE_DTC)
1896-
cmn->dtc[__ffs(hw->dtcs_used)].cycles = NULL;
1896+
cmn->dtc[hw->dtc_idx[0]].cycles = NULL;
18971897
else
18981898
arm_cmn_event_clear(cmn, event, hw->num_dns);
18991899
}
@@ -2074,7 +2074,6 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
20742074
{
20752075
struct arm_cmn_node *dn, *xp;
20762076
int dtc_idx = 0;
2077-
u8 dtcs_present = (1 << cmn->num_dtcs) - 1;
20782077

20792078
cmn->dtc = devm_kcalloc(cmn->dev, cmn->num_dtcs, sizeof(cmn->dtc[0]), GFP_KERNEL);
20802079
if (!cmn->dtc)
@@ -2084,23 +2083,26 @@ static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
20842083

20852084
cmn->xps = arm_cmn_node(cmn, CMN_TYPE_XP);
20862085

2086+
if (cmn->part == PART_CMN600 && cmn->num_dtcs > 1) {
2087+
/* We do at least know that a DTC's XP must be in that DTC's domain */
2088+
dn = arm_cmn_node(cmn, CMN_TYPE_DTC);
2089+
for (int i = 0; i < cmn->num_dtcs; i++)
2090+
arm_cmn_node_to_xp(cmn, dn + i)->dtc = i;
2091+
}
2092+
20872093
for (dn = cmn->dns; dn->type; dn++) {
2088-
if (dn->type == CMN_TYPE_XP) {
2089-
dn->dtc &= dtcs_present;
2094+
if (dn->type == CMN_TYPE_XP)
20902095
continue;
2091-
}
20922096

20932097
xp = arm_cmn_node_to_xp(cmn, dn);
2098+
dn->dtc = xp->dtc;
20942099
dn->dtm = xp->dtm;
20952100
if (cmn->multi_dtm)
20962101
dn->dtm += arm_cmn_nid(cmn, dn->id).port / 2;
20972102

20982103
if (dn->type == CMN_TYPE_DTC) {
2099-
int err;
2100-
/* We do at least know that a DTC's XP must be in that DTC's domain */
2101-
if (xp->dtc == 0xf)
2102-
xp->dtc = 1 << dtc_idx;
2103-
err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
2104+
int err = arm_cmn_init_dtc(cmn, dn, dtc_idx++);
2105+
21042106
if (err)
21052107
return err;
21062108
}
@@ -2258,9 +2260,9 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
22582260
cmn->mesh_x = xp->logid;
22592261

22602262
if (cmn->part == PART_CMN600)
2261-
xp->dtc = 0xf;
2263+
xp->dtc = -1;
22622264
else
2263-
xp->dtc = 1 << arm_cmn_dtc_domain(cmn, xp_region);
2265+
xp->dtc = arm_cmn_dtc_domain(cmn, xp_region);
22642266

22652267
xp->dtm = dtm - cmn->dtms;
22662268
arm_cmn_init_dtm(dtm++, xp, 0);

0 commit comments

Comments
 (0)