Skip to content

Commit 53a810a

Browse files
ilkka-koskinenwilldeacon
authored andcommitted
perf: arm_cspmu: ampere_cspmu: Add support for Ampere SoC PMU
Ampere SoC PMU follows CoreSight PMU architecture. It uses implementation specific registers to filter events rather than PMEVFILTnR registers. Signed-off-by: Ilkka Koskinen <[email protected]> Link: https://lore.kernel.org/r/[email protected] [will: Include linux/io.h in ampere_cspmu.c for writel()] Signed-off-by: Will Deacon <[email protected]>
1 parent 647d5c5 commit 53a810a

File tree

6 files changed

+322
-0
lines changed

6 files changed

+322
-0
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
.. SPDX-License-Identifier: GPL-2.0
2+
3+
============================================
4+
Ampere SoC Performance Monitoring Unit (PMU)
5+
============================================
6+
7+
Ampere SoC PMU is a generic PMU IP that follows Arm CoreSight PMU architecture.
8+
Therefore, the driver is implemented as a submodule of arm_cspmu driver. At the
9+
first phase it's used for counting MCU events on AmpereOne.
10+
11+
12+
MCU PMU events
13+
--------------
14+
15+
The PMU driver supports setting filters for "rank", "bank", and "threshold".
16+
Note, that the filters are per PMU instance rather than per event.
17+
18+
19+
Example for perf tool use::
20+
21+
/ # perf list ampere
22+
23+
ampere_mcu_pmu_0/act_sent/ [Kernel PMU event]
24+
<...>
25+
ampere_mcu_pmu_1/rd_sent/ [Kernel PMU event]
26+
<...>
27+
28+
/ # perf stat -a -e ampere_mcu_pmu_0/act_sent,bank=5,rank=3,threshold=2/,ampere_mcu_pmu_1/rd_sent/ \
29+
sleep 1

drivers/perf/arm_cspmu/Kconfig

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,13 @@ config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU
1717
help
1818
Provides NVIDIA specific attributes for performance monitoring unit
1919
(PMU) devices based on ARM CoreSight PMU architecture.
20+
21+
config AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU
22+
tristate "Ampere Coresight Architecture PMU"
23+
depends on ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
24+
help
25+
Provides Ampere specific attributes for performance monitoring unit
26+
(PMU) devices based on ARM CoreSight PMU architecture.
27+
28+
In the first phase, the driver enables support on MCU PMU used in
29+
AmpereOne SoC family.

drivers/perf/arm_cspmu/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
# SPDX-License-Identifier: GPL-2.0
44

55
obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o
6+
67
arm_cspmu_module-y := arm_cspmu.o
78

89
obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += nvidia_cspmu.o
10+
obj-$(CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += ampere_cspmu.o

drivers/perf/arm_cspmu/ampere_cspmu.c

Lines changed: 272 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,272 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* Ampere SoC PMU (Performance Monitor Unit)
4+
*
5+
* Copyright (c) 2023, Ampere Computing LLC
6+
*/
7+
#include <linux/io.h>
8+
#include <linux/module.h>
9+
#include <linux/topology.h>
10+
11+
#include "arm_cspmu.h"
12+
13+
#define PMAUXR0 0xD80
14+
#define PMAUXR1 0xD84
15+
#define PMAUXR2 0xD88
16+
#define PMAUXR3 0xD8C
17+
18+
#define to_ampere_cspmu_ctx(cspmu) ((struct ampere_cspmu_ctx *)(cspmu->impl.ctx))
19+
20+
struct ampere_cspmu_ctx {
21+
const char *name;
22+
struct attribute **event_attr;
23+
struct attribute **format_attr;
24+
};
25+
26+
static DEFINE_IDA(mcu_pmu_ida);
27+
28+
#define SOC_PMU_EVENT_ATTR_EXTRACTOR(_name, _config, _start, _end) \
29+
static inline u32 get_##_name(const struct perf_event *event) \
30+
{ \
31+
return FIELD_GET(GENMASK_ULL(_end, _start), \
32+
event->attr._config); \
33+
} \
34+
35+
SOC_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 8);
36+
SOC_PMU_EVENT_ATTR_EXTRACTOR(threshold, config1, 0, 7);
37+
SOC_PMU_EVENT_ATTR_EXTRACTOR(rank, config1, 8, 23);
38+
SOC_PMU_EVENT_ATTR_EXTRACTOR(bank, config1, 24, 55);
39+
40+
static struct attribute *ampereone_mcu_pmu_event_attrs[] = {
41+
ARM_CSPMU_EVENT_ATTR(cycle_count, 0x00),
42+
ARM_CSPMU_EVENT_ATTR(act_sent, 0x01),
43+
ARM_CSPMU_EVENT_ATTR(pre_sent, 0x02),
44+
ARM_CSPMU_EVENT_ATTR(rd_sent, 0x03),
45+
ARM_CSPMU_EVENT_ATTR(rda_sent, 0x04),
46+
ARM_CSPMU_EVENT_ATTR(wr_sent, 0x05),
47+
ARM_CSPMU_EVENT_ATTR(wra_sent, 0x06),
48+
ARM_CSPMU_EVENT_ATTR(pd_entry_vld, 0x07),
49+
ARM_CSPMU_EVENT_ATTR(sref_entry_vld, 0x08),
50+
ARM_CSPMU_EVENT_ATTR(prea_sent, 0x09),
51+
ARM_CSPMU_EVENT_ATTR(pre_sb_sent, 0x0a),
52+
ARM_CSPMU_EVENT_ATTR(ref_sent, 0x0b),
53+
ARM_CSPMU_EVENT_ATTR(rfm_sent, 0x0c),
54+
ARM_CSPMU_EVENT_ATTR(ref_sb_sent, 0x0d),
55+
ARM_CSPMU_EVENT_ATTR(rfm_sb_sent, 0x0e),
56+
ARM_CSPMU_EVENT_ATTR(rd_rda_sent, 0x0f),
57+
ARM_CSPMU_EVENT_ATTR(wr_wra_sent, 0x10),
58+
ARM_CSPMU_EVENT_ATTR(raw_hazard, 0x11),
59+
ARM_CSPMU_EVENT_ATTR(war_hazard, 0x12),
60+
ARM_CSPMU_EVENT_ATTR(waw_hazard, 0x13),
61+
ARM_CSPMU_EVENT_ATTR(rar_hazard, 0x14),
62+
ARM_CSPMU_EVENT_ATTR(raw_war_waw_hazard, 0x15),
63+
ARM_CSPMU_EVENT_ATTR(hprd_lprd_wr_req_vld, 0x16),
64+
ARM_CSPMU_EVENT_ATTR(lprd_req_vld, 0x17),
65+
ARM_CSPMU_EVENT_ATTR(hprd_req_vld, 0x18),
66+
ARM_CSPMU_EVENT_ATTR(hprd_lprd_req_vld, 0x19),
67+
ARM_CSPMU_EVENT_ATTR(prefetch_tgt, 0x1a),
68+
ARM_CSPMU_EVENT_ATTR(wr_req_vld, 0x1b),
69+
ARM_CSPMU_EVENT_ATTR(partial_wr_req_vld, 0x1c),
70+
ARM_CSPMU_EVENT_ATTR(rd_retry, 0x1d),
71+
ARM_CSPMU_EVENT_ATTR(wr_retry, 0x1e),
72+
ARM_CSPMU_EVENT_ATTR(retry_gnt, 0x1f),
73+
ARM_CSPMU_EVENT_ATTR(rank_change, 0x20),
74+
ARM_CSPMU_EVENT_ATTR(dir_change, 0x21),
75+
ARM_CSPMU_EVENT_ATTR(rank_dir_change, 0x22),
76+
ARM_CSPMU_EVENT_ATTR(rank_active, 0x23),
77+
ARM_CSPMU_EVENT_ATTR(rank_idle, 0x24),
78+
ARM_CSPMU_EVENT_ATTR(rank_pd, 0x25),
79+
ARM_CSPMU_EVENT_ATTR(rank_sref, 0x26),
80+
ARM_CSPMU_EVENT_ATTR(queue_fill_gt_thresh, 0x27),
81+
ARM_CSPMU_EVENT_ATTR(queue_rds_gt_thresh, 0x28),
82+
ARM_CSPMU_EVENT_ATTR(queue_wrs_gt_thresh, 0x29),
83+
ARM_CSPMU_EVENT_ATTR(phy_updt_complt, 0x2a),
84+
ARM_CSPMU_EVENT_ATTR(tz_fail, 0x2b),
85+
ARM_CSPMU_EVENT_ATTR(dram_errc, 0x2c),
86+
ARM_CSPMU_EVENT_ATTR(dram_errd, 0x2d),
87+
ARM_CSPMU_EVENT_ATTR(read_data_return, 0x32),
88+
ARM_CSPMU_EVENT_ATTR(chi_wr_data_delta, 0x33),
89+
ARM_CSPMU_EVENT_ATTR(zq_start, 0x34),
90+
ARM_CSPMU_EVENT_ATTR(zq_latch, 0x35),
91+
ARM_CSPMU_EVENT_ATTR(wr_fifo_full, 0x36),
92+
ARM_CSPMU_EVENT_ATTR(info_fifo_full, 0x37),
93+
ARM_CSPMU_EVENT_ATTR(cmd_fifo_full, 0x38),
94+
ARM_CSPMU_EVENT_ATTR(dfi_nop, 0x39),
95+
ARM_CSPMU_EVENT_ATTR(dfi_cmd, 0x3a),
96+
ARM_CSPMU_EVENT_ATTR(rd_run_len, 0x3b),
97+
ARM_CSPMU_EVENT_ATTR(wr_run_len, 0x3c),
98+
99+
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
100+
NULL,
101+
};
102+
103+
static struct attribute *ampereone_mcu_format_attrs[] = {
104+
ARM_CSPMU_FORMAT_EVENT_ATTR,
105+
ARM_CSPMU_FORMAT_ATTR(threshold, "config1:0-7"),
106+
ARM_CSPMU_FORMAT_ATTR(rank, "config1:8-23"),
107+
ARM_CSPMU_FORMAT_ATTR(bank, "config1:24-55"),
108+
NULL,
109+
};
110+
111+
static struct attribute **
112+
ampere_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
113+
{
114+
const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
115+
116+
return ctx->event_attr;
117+
}
118+
119+
static struct attribute **
120+
ampere_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
121+
{
122+
const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
123+
124+
return ctx->format_attr;
125+
}
126+
127+
static const char *
128+
ampere_cspmu_get_name(const struct arm_cspmu *cspmu)
129+
{
130+
const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
131+
132+
return ctx->name;
133+
}
134+
135+
static u32 ampere_cspmu_event_filter(const struct perf_event *event)
136+
{
137+
/*
138+
* PMEVFILTR or PMCCFILTR aren't used in Ampere SoC PMU but are marked
139+
* as RES0. Make sure, PMCCFILTR is written zero.
140+
*/
141+
return 0;
142+
}
143+
144+
static void ampere_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
145+
struct hw_perf_event *hwc,
146+
u32 filter)
147+
{
148+
struct perf_event *event;
149+
unsigned int idx;
150+
u32 threshold, rank, bank;
151+
152+
/*
153+
* At this point, all the events have the same filter settings.
154+
* Therefore, take the first event and use its configuration.
155+
*/
156+
idx = find_first_bit(cspmu->hw_events.used_ctrs,
157+
cspmu->cycle_counter_logical_idx);
158+
159+
event = cspmu->hw_events.events[idx];
160+
161+
threshold = get_threshold(event);
162+
rank = get_rank(event);
163+
bank = get_bank(event);
164+
165+
writel(threshold, cspmu->base0 + PMAUXR0);
166+
writel(rank, cspmu->base0 + PMAUXR1);
167+
writel(bank, cspmu->base0 + PMAUXR2);
168+
}
169+
170+
static int ampere_cspmu_validate_configs(struct perf_event *event,
171+
struct perf_event *event2)
172+
{
173+
if (get_threshold(event) != get_threshold(event2) ||
174+
get_rank(event) != get_rank(event2) ||
175+
get_bank(event) != get_bank(event2))
176+
return -EINVAL;
177+
178+
return 0;
179+
}
180+
181+
static int ampere_cspmu_validate_event(struct arm_cspmu *cspmu,
182+
struct perf_event *new)
183+
{
184+
struct perf_event *curr, *leader = new->group_leader;
185+
unsigned int idx;
186+
int ret;
187+
188+
ret = ampere_cspmu_validate_configs(new, leader);
189+
if (ret)
190+
return ret;
191+
192+
/* We compare the global filter settings to the existing events */
193+
idx = find_first_bit(cspmu->hw_events.used_ctrs,
194+
cspmu->cycle_counter_logical_idx);
195+
196+
/* This is the first event, thus any configuration is fine */
197+
if (idx == cspmu->cycle_counter_logical_idx)
198+
return 0;
199+
200+
curr = cspmu->hw_events.events[idx];
201+
202+
return ampere_cspmu_validate_configs(curr, new);
203+
}
204+
205+
static char *ampere_cspmu_format_name(const struct arm_cspmu *cspmu,
206+
const char *name_pattern)
207+
{
208+
struct device *dev = cspmu->dev;
209+
int id;
210+
211+
id = ida_alloc(&mcu_pmu_ida, GFP_KERNEL);
212+
if (id < 0)
213+
return ERR_PTR(id);
214+
215+
return devm_kasprintf(dev, GFP_KERNEL, name_pattern, id);
216+
}
217+
218+
static int ampere_cspmu_init_ops(struct arm_cspmu *cspmu)
219+
{
220+
struct device *dev = cspmu->dev;
221+
struct ampere_cspmu_ctx *ctx;
222+
struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
223+
224+
ctx = devm_kzalloc(dev, sizeof(struct ampere_cspmu_ctx), GFP_KERNEL);
225+
if (!ctx)
226+
return -ENOMEM;
227+
228+
ctx->event_attr = ampereone_mcu_pmu_event_attrs;
229+
ctx->format_attr = ampereone_mcu_format_attrs;
230+
ctx->name = ampere_cspmu_format_name(cspmu, "ampere_mcu_pmu_%d");
231+
if (IS_ERR_OR_NULL(ctx->name))
232+
return ctx->name ? PTR_ERR(ctx->name) : -ENOMEM;
233+
234+
cspmu->impl.ctx = ctx;
235+
236+
impl_ops->event_filter = ampere_cspmu_event_filter;
237+
impl_ops->set_ev_filter = ampere_cspmu_set_ev_filter;
238+
impl_ops->validate_event = ampere_cspmu_validate_event;
239+
impl_ops->get_name = ampere_cspmu_get_name;
240+
impl_ops->get_event_attrs = ampere_cspmu_get_event_attrs;
241+
impl_ops->get_format_attrs = ampere_cspmu_get_format_attrs;
242+
243+
return 0;
244+
}
245+
246+
/* Match all Ampere Coresight PMU devices */
247+
static const struct arm_cspmu_impl_match ampere_cspmu_param = {
248+
.pmiidr_val = ARM_CSPMU_IMPL_ID_AMPERE,
249+
.module = THIS_MODULE,
250+
.impl_init_ops = ampere_cspmu_init_ops
251+
};
252+
253+
static int __init ampere_cspmu_init(void)
254+
{
255+
int ret;
256+
257+
ret = arm_cspmu_impl_register(&ampere_cspmu_param);
258+
if (ret)
259+
pr_err("ampere_cspmu backend registration error: %d\n", ret);
260+
261+
return ret;
262+
}
263+
264+
static void __exit ampere_cspmu_exit(void)
265+
{
266+
arm_cspmu_impl_unregister(&ampere_cspmu_param);
267+
}
268+
269+
module_init(ampere_cspmu_init);
270+
module_exit(ampere_cspmu_exit);
271+
272+
MODULE_LICENSE("GPL");

drivers/perf/arm_cspmu/arm_cspmu.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,14 @@ static struct arm_cspmu_impl_match impl_match[] = {
383383
.module = NULL,
384384
.impl_init_ops = NULL,
385385
},
386+
{
387+
.module_name = "ampere_cspmu",
388+
.pmiidr_val = ARM_CSPMU_IMPL_ID_AMPERE,
389+
.pmiidr_mask = ARM_CSPMU_PMIIDR_IMPLEMENTER,
390+
.module = NULL,
391+
.impl_init_ops = NULL,
392+
},
393+
386394
{0}
387395
};
388396

drivers/perf/arm_cspmu/arm_cspmu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171

7272
/* JEDEC-assigned JEP106 identification code */
7373
#define ARM_CSPMU_IMPL_ID_NVIDIA 0x36B
74+
#define ARM_CSPMU_IMPL_ID_AMPERE 0xA16
7475

7576
struct arm_cspmu;
7677

0 commit comments

Comments
 (0)