Skip to content

Commit a54ca19

Browse files
t00214307acmel
authored andcommitted
perf arm-spe: Support synthetic events
After the commit ffd3d18 ("perf tools: Add ARM Statistical Profiling Extensions (SPE) support") has been merged, it supports to output raw data with option "--dump-raw-trace". However, it misses for support synthetic events so cannot output any statistical info. This patch is to improve the "perf report" support for ARM SPE for four types synthetic events: First level cache synthetic events, including L1 data cache accessing and missing events; Last level cache synthetic events, including last level cache accessing and missing events; TLB synthetic events, including TLB accessing and missing events; Remote access events, which is used to account load/store operations caused to another socket. Example usage: $ perf record -c 1024 -e arm_spe_0/branch_filter=1,ts_enable=1,pct_enable=1,pa_enable=1,load_filter=1,jitter=1,store_filter=1,min_latency=0/ dd if=/dev/zero of=/dev/null count=10000 $ perf report --stdio # Samples: 59 of event 'l1d-miss' # Event count (approx.): 59 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................. .................................. # 23.73% 23.73% dd [kernel.kallsyms] [k] perf_iterate_ctx.constprop.135 20.34% 20.34% dd [kernel.kallsyms] [k] filemap_map_pages 5.08% 5.08% dd [kernel.kallsyms] [k] perf_event_mmap 5.08% 5.08% dd [kernel.kallsyms] [k] unlock_page_memcg 5.08% 5.08% dd [kernel.kallsyms] [k] unmap_page_range 3.39% 3.39% dd [kernel.kallsyms] [k] PageHuge 3.39% 3.39% dd [kernel.kallsyms] [k] release_pages 3.39% 3.39% dd ld-2.28.so [.] 0x0000000000008b5c 1.69% 1.69% dd [kernel.kallsyms] [k] __alloc_fd [...] # Samples: 3K of event 'l1d-access' # Event count (approx.): 3980 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................. ...................................... # 26.98% 26.98% dd [kernel.kallsyms] [k] ret_to_user 10.53% 10.53% dd [kernel.kallsyms] [k] fsnotify 7.51% 7.51% dd [kernel.kallsyms] [k] new_sync_read 4.57% 4.57% dd [kernel.kallsyms] [k] vfs_read 4.35% 4.35% dd [kernel.kallsyms] [k] vfs_write 3.69% 3.69% dd [kernel.kallsyms] [k] __fget_light 3.69% 3.69% dd [kernel.kallsyms] [k] rw_verify_area 3.44% 3.44% dd [kernel.kallsyms] [k] security_file_permission 2.76% 2.76% dd [kernel.kallsyms] [k] __fsnotify_parent 2.44% 2.44% dd [kernel.kallsyms] [k] ksys_write 2.24% 2.24% dd [kernel.kallsyms] [k] iov_iter_zero 2.19% 2.19% dd [kernel.kallsyms] [k] read_iter_zero 1.81% 1.81% dd dd [.] 0x0000000000002960 1.78% 1.78% dd dd [.] 0x0000000000002980 [...] # Samples: 35 of event 'llc-miss' # Event count (approx.): 35 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................. ........................... # 34.29% 34.29% dd [kernel.kallsyms] [k] filemap_map_pages 8.57% 8.57% dd [kernel.kallsyms] [k] unlock_page_memcg 8.57% 8.57% dd [kernel.kallsyms] [k] unmap_page_range 5.71% 5.71% dd [kernel.kallsyms] [k] PageHuge 5.71% 5.71% dd [kernel.kallsyms] [k] release_pages 5.71% 5.71% dd ld-2.28.so [.] 0x0000000000008b5c 2.86% 2.86% dd [kernel.kallsyms] [k] __queue_work 2.86% 2.86% dd [kernel.kallsyms] [k] __radix_tree_lookup 2.86% 2.86% dd [kernel.kallsyms] [k] copy_page [...] # Samples: 2 of event 'llc-access' # Event count (approx.): 2 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................. ............. # 50.00% 50.00% dd [kernel.kallsyms] [k] copy_page 50.00% 50.00% dd libc-2.28.so [.] _dl_addr # Samples: 48 of event 'tlb-miss' # Event count (approx.): 48 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................. .................................. # 20.83% 20.83% dd [kernel.kallsyms] [k] perf_iterate_ctx.constprop.135 12.50% 12.50% dd [kernel.kallsyms] [k] __arch_clear_user 10.42% 10.42% dd [kernel.kallsyms] [k] clear_page 4.17% 4.17% dd [kernel.kallsyms] [k] copy_page 4.17% 4.17% dd [kernel.kallsyms] [k] filemap_map_pages 2.08% 2.08% dd [kernel.kallsyms] [k] __alloc_fd 2.08% 2.08% dd [kernel.kallsyms] [k] __mod_memcg_state.part.70 2.08% 2.08% dd [kernel.kallsyms] [k] __queue_work 2.08% 2.08% dd [kernel.kallsyms] [k] __rcu_read_unlock 2.08% 2.08% dd [kernel.kallsyms] [k] d_path 2.08% 2.08% dd [kernel.kallsyms] [k] destroy_inode 2.08% 2.08% dd [kernel.kallsyms] [k] do_dentry_open [...] # Samples: 9K of event 'tlb-access' # Event count (approx.): 9573 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................. ...................................... # 25.79% 25.79% dd [kernel.kallsyms] [k] __arch_clear_user 11.22% 11.22% dd [kernel.kallsyms] [k] ret_to_user 8.56% 8.56% dd [kernel.kallsyms] [k] fsnotify 4.06% 4.06% dd [kernel.kallsyms] [k] new_sync_read 3.67% 3.67% dd [kernel.kallsyms] [k] el0_svc_common.constprop.2 3.04% 3.04% dd [kernel.kallsyms] [k] __fsnotify_parent 2.90% 2.90% dd [kernel.kallsyms] [k] vfs_write 2.82% 2.82% dd [kernel.kallsyms] [k] vfs_read 2.52% 2.52% dd libc-2.28.so [.] write 2.26% 2.26% dd [kernel.kallsyms] [k] security_file_permission 2.08% 2.08% dd [kernel.kallsyms] [k] ksys_write 1.96% 1.96% dd [kernel.kallsyms] [k] rw_verify_area 1.95% 1.95% dd [kernel.kallsyms] [k] read_iter_zero [...] # Samples: 9 of event 'branch-miss' # Event count (approx.): 9 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................. ......................... # 22.22% 22.22% dd libc-2.28.so [.] _dl_addr 11.11% 11.11% dd [kernel.kallsyms] [k] __arch_clear_user 11.11% 11.11% dd [kernel.kallsyms] [k] __arch_copy_from_user 11.11% 11.11% dd [kernel.kallsyms] [k] __dentry_kill 11.11% 11.11% dd [kernel.kallsyms] [k] __efistub_memcpy 11.11% 11.11% dd ld-2.28.so [.] 0x0000000000012b7c 11.11% 11.11% dd libc-2.28.so [.] 0x000000000002a980 11.11% 11.11% dd libc-2.28.so [.] 0x0000000000083340 # Samples: 29 of event 'remote-access' # Event count (approx.): 29 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................. ........................... # 41.38% 41.38% dd [kernel.kallsyms] [k] filemap_map_pages 10.34% 10.34% dd [kernel.kallsyms] [k] unlock_page_memcg 10.34% 10.34% dd [kernel.kallsyms] [k] unmap_page_range 6.90% 6.90% dd [kernel.kallsyms] [k] release_pages 3.45% 3.45% dd [kernel.kallsyms] [k] PageHuge 3.45% 3.45% dd [kernel.kallsyms] [k] __queue_work 3.45% 3.45% dd [kernel.kallsyms] [k] page_add_file_rmap 3.45% 3.45% dd [kernel.kallsyms] [k] page_counter_try_charge 3.45% 3.45% dd [kernel.kallsyms] [k] page_remove_rmap 3.45% 3.45% dd [kernel.kallsyms] [k] xas_start 3.45% 3.45% dd ld-2.28.so [.] 0x0000000000002a1c 3.45% 3.45% dd ld-2.28.so [.] 0x0000000000008b5c 3.45% 3.45% dd ld-2.28.so [.] 0x00000000000093cc Signed-off-by: Tan Xiaojun <[email protected]> Tested-by: James Clark <[email protected]> Cc: Adrian Hunter <[email protected]> Cc: Al Grant <[email protected]> Cc: Alexander Shishkin <[email protected]> Cc: Andi Kleen <[email protected]> Cc: Ian Rogers <[email protected]> Cc: Jin Yao <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Leo Yan <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Mathieu Poirier <[email protected]> Cc: Mike Leach <[email protected]> Cc: Namhyung Kim <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Will Deacon <[email protected]> Cc: [email protected] Link: http://lore.kernel.org/lkml/[email protected] Signed-off-by: James Clark <[email protected]> Signed-off-by: Leo Yan <[email protected]> Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent 9f74d77 commit a54ca19

File tree

5 files changed

+1097
-43
lines changed

5 files changed

+1097
-43
lines changed

tools/perf/util/arm-spe-decoder/Build

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
1+
perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o arm-spe-decoder.o
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* arm_spe_decoder.c: ARM SPE support
4+
*/
5+
6+
#ifndef _GNU_SOURCE
7+
#define _GNU_SOURCE
8+
#endif
9+
#include <errno.h>
10+
#include <inttypes.h>
11+
#include <stdbool.h>
12+
#include <string.h>
13+
#include <stdint.h>
14+
#include <stdlib.h>
15+
#include <linux/compiler.h>
16+
#include <linux/zalloc.h>
17+
18+
#include "../auxtrace.h"
19+
#include "../debug.h"
20+
#include "../util.h"
21+
22+
#include "arm-spe-decoder.h"
23+
24+
#ifndef BIT
25+
#define BIT(n) (1UL << (n))
26+
#endif
27+
28+
static u64 arm_spe_calc_ip(int index, u64 payload)
29+
{
30+
u8 *addr = (u8 *)&payload;
31+
int ns, el;
32+
33+
/* Instruction virtual address or Branch target address */
34+
if (index == SPE_ADDR_PKT_HDR_INDEX_INS ||
35+
index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) {
36+
ns = addr[7] & SPE_ADDR_PKT_NS;
37+
el = (addr[7] & SPE_ADDR_PKT_EL_MASK) >> SPE_ADDR_PKT_EL_OFFSET;
38+
39+
/* Fill highest byte for EL1 or EL2 (VHE) mode */
40+
if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2))
41+
addr[7] = 0xff;
42+
/* Clean highest byte for other cases */
43+
else
44+
addr[7] = 0x0;
45+
46+
/* Data access virtual address */
47+
} else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) {
48+
49+
/* Fill highest byte if bits [48..55] is 0xff */
50+
if (addr[6] == 0xff)
51+
addr[7] = 0xff;
52+
/* Otherwise, cleanup tags */
53+
else
54+
addr[7] = 0x0;
55+
56+
/* Data access physical address */
57+
} else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) {
58+
/* Cleanup byte 7 */
59+
addr[7] = 0x0;
60+
} else {
61+
pr_err("unsupported address packet index: 0x%x\n", index);
62+
}
63+
64+
return payload;
65+
}
66+
67+
struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params)
68+
{
69+
struct arm_spe_decoder *decoder;
70+
71+
if (!params->get_trace)
72+
return NULL;
73+
74+
decoder = zalloc(sizeof(struct arm_spe_decoder));
75+
if (!decoder)
76+
return NULL;
77+
78+
decoder->get_trace = params->get_trace;
79+
decoder->data = params->data;
80+
81+
return decoder;
82+
}
83+
84+
void arm_spe_decoder_free(struct arm_spe_decoder *decoder)
85+
{
86+
free(decoder);
87+
}
88+
89+
static int arm_spe_get_data(struct arm_spe_decoder *decoder)
90+
{
91+
struct arm_spe_buffer buffer = { .buf = 0, };
92+
int ret;
93+
94+
pr_debug("Getting more data\n");
95+
ret = decoder->get_trace(&buffer, decoder->data);
96+
if (ret < 0)
97+
return ret;
98+
99+
decoder->buf = buffer.buf;
100+
decoder->len = buffer.len;
101+
102+
if (!decoder->len)
103+
pr_debug("No more data\n");
104+
105+
return decoder->len;
106+
}
107+
108+
static int arm_spe_get_next_packet(struct arm_spe_decoder *decoder)
109+
{
110+
int ret;
111+
112+
do {
113+
if (!decoder->len) {
114+
ret = arm_spe_get_data(decoder);
115+
116+
/* Failed to read out trace data */
117+
if (ret <= 0)
118+
return ret;
119+
}
120+
121+
ret = arm_spe_get_packet(decoder->buf, decoder->len,
122+
&decoder->packet);
123+
if (ret <= 0) {
124+
/* Move forward for 1 byte */
125+
decoder->buf += 1;
126+
decoder->len -= 1;
127+
return -EBADMSG;
128+
}
129+
130+
decoder->buf += ret;
131+
decoder->len -= ret;
132+
} while (decoder->packet.type == ARM_SPE_PAD);
133+
134+
return 1;
135+
}
136+
137+
static int arm_spe_read_record(struct arm_spe_decoder *decoder)
138+
{
139+
int err;
140+
int idx;
141+
u64 payload, ip;
142+
143+
memset(&decoder->record, 0x0, sizeof(decoder->record));
144+
145+
while (1) {
146+
err = arm_spe_get_next_packet(decoder);
147+
if (err <= 0)
148+
return err;
149+
150+
idx = decoder->packet.index;
151+
payload = decoder->packet.payload;
152+
153+
switch (decoder->packet.type) {
154+
case ARM_SPE_TIMESTAMP:
155+
decoder->record.timestamp = payload;
156+
return 1;
157+
case ARM_SPE_END:
158+
return 1;
159+
case ARM_SPE_ADDRESS:
160+
ip = arm_spe_calc_ip(idx, payload);
161+
if (idx == SPE_ADDR_PKT_HDR_INDEX_INS)
162+
decoder->record.from_ip = ip;
163+
else if (idx == SPE_ADDR_PKT_HDR_INDEX_BRANCH)
164+
decoder->record.to_ip = ip;
165+
break;
166+
case ARM_SPE_COUNTER:
167+
break;
168+
case ARM_SPE_CONTEXT:
169+
break;
170+
case ARM_SPE_OP_TYPE:
171+
break;
172+
case ARM_SPE_EVENTS:
173+
if (payload & BIT(EV_L1D_REFILL))
174+
decoder->record.type |= ARM_SPE_L1D_MISS;
175+
176+
if (payload & BIT(EV_L1D_ACCESS))
177+
decoder->record.type |= ARM_SPE_L1D_ACCESS;
178+
179+
if (payload & BIT(EV_TLB_WALK))
180+
decoder->record.type |= ARM_SPE_TLB_MISS;
181+
182+
if (payload & BIT(EV_TLB_ACCESS))
183+
decoder->record.type |= ARM_SPE_TLB_ACCESS;
184+
185+
if ((idx == 1 || idx == 2 || idx == 3) &&
186+
(payload & BIT(EV_LLC_MISS)))
187+
decoder->record.type |= ARM_SPE_LLC_MISS;
188+
189+
if ((idx == 1 || idx == 2 || idx == 3) &&
190+
(payload & BIT(EV_LLC_ACCESS)))
191+
decoder->record.type |= ARM_SPE_LLC_ACCESS;
192+
193+
if ((idx == 1 || idx == 2 || idx == 3) &&
194+
(payload & BIT(EV_REMOTE_ACCESS)))
195+
decoder->record.type |= ARM_SPE_REMOTE_ACCESS;
196+
197+
if (payload & BIT(EV_MISPRED))
198+
decoder->record.type |= ARM_SPE_BRANCH_MISS;
199+
200+
break;
201+
case ARM_SPE_DATA_SOURCE:
202+
break;
203+
case ARM_SPE_BAD:
204+
break;
205+
case ARM_SPE_PAD:
206+
break;
207+
default:
208+
pr_err("Get packet error!\n");
209+
return -1;
210+
}
211+
}
212+
213+
return 0;
214+
}
215+
216+
int arm_spe_decode(struct arm_spe_decoder *decoder)
217+
{
218+
return arm_spe_read_record(decoder);
219+
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* arm_spe_decoder.h: Arm Statistical Profiling Extensions support
4+
* Copyright (c) 2019-2020, Arm Ltd.
5+
*/
6+
7+
#ifndef INCLUDE__ARM_SPE_DECODER_H__
8+
#define INCLUDE__ARM_SPE_DECODER_H__
9+
10+
#include <stdbool.h>
11+
#include <stddef.h>
12+
#include <stdint.h>
13+
14+
#include "arm-spe-pkt-decoder.h"
15+
16+
enum arm_spe_events {
17+
EV_EXCEPTION_GEN = 0,
18+
EV_RETIRED = 1,
19+
EV_L1D_ACCESS = 2,
20+
EV_L1D_REFILL = 3,
21+
EV_TLB_ACCESS = 4,
22+
EV_TLB_WALK = 5,
23+
EV_NOT_TAKEN = 6,
24+
EV_MISPRED = 7,
25+
EV_LLC_ACCESS = 8,
26+
EV_LLC_MISS = 9,
27+
EV_REMOTE_ACCESS = 10,
28+
EV_ALIGNMENT = 11,
29+
EV_PARTIAL_PREDICATE = 17,
30+
EV_EMPTY_PREDICATE = 18,
31+
};
32+
33+
enum arm_spe_sample_type {
34+
ARM_SPE_L1D_ACCESS = 1 << 0,
35+
ARM_SPE_L1D_MISS = 1 << 1,
36+
ARM_SPE_LLC_ACCESS = 1 << 2,
37+
ARM_SPE_LLC_MISS = 1 << 3,
38+
ARM_SPE_TLB_ACCESS = 1 << 4,
39+
ARM_SPE_TLB_MISS = 1 << 5,
40+
ARM_SPE_BRANCH_MISS = 1 << 6,
41+
ARM_SPE_REMOTE_ACCESS = 1 << 7,
42+
};
43+
44+
struct arm_spe_record {
45+
enum arm_spe_sample_type type;
46+
int err;
47+
u64 from_ip;
48+
u64 to_ip;
49+
u64 timestamp;
50+
};
51+
52+
struct arm_spe_insn;
53+
54+
struct arm_spe_buffer {
55+
const unsigned char *buf;
56+
size_t len;
57+
u64 offset;
58+
u64 trace_nr;
59+
};
60+
61+
struct arm_spe_params {
62+
int (*get_trace)(struct arm_spe_buffer *buffer, void *data);
63+
void *data;
64+
};
65+
66+
struct arm_spe_decoder {
67+
int (*get_trace)(struct arm_spe_buffer *buffer, void *data);
68+
void *data;
69+
struct arm_spe_record record;
70+
71+
const unsigned char *buf;
72+
size_t len;
73+
74+
struct arm_spe_pkt packet;
75+
};
76+
77+
struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params);
78+
void arm_spe_decoder_free(struct arm_spe_decoder *decoder);
79+
80+
int arm_spe_decode(struct arm_spe_decoder *decoder);
81+
82+
#endif

tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
#define ARM_SPE_NEED_MORE_BYTES -1
1616
#define ARM_SPE_BAD_PACKET -2
1717

18+
#define ARM_SPE_PKT_MAX_SZ 16
19+
1820
enum arm_spe_pkt_type {
1921
ARM_SPE_BAD,
2022
ARM_SPE_PAD,
@@ -34,6 +36,20 @@ struct arm_spe_pkt {
3436
uint64_t payload;
3537
};
3638

39+
#define SPE_ADDR_PKT_HDR_INDEX_INS (0x0)
40+
#define SPE_ADDR_PKT_HDR_INDEX_BRANCH (0x1)
41+
#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT (0x2)
42+
#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS (0x3)
43+
44+
#define SPE_ADDR_PKT_NS BIT(7)
45+
#define SPE_ADDR_PKT_CH BIT(6)
46+
#define SPE_ADDR_PKT_EL_OFFSET (5)
47+
#define SPE_ADDR_PKT_EL_MASK (0x3 << SPE_ADDR_PKT_EL_OFFSET)
48+
#define SPE_ADDR_PKT_EL0 (0)
49+
#define SPE_ADDR_PKT_EL1 (1)
50+
#define SPE_ADDR_PKT_EL2 (2)
51+
#define SPE_ADDR_PKT_EL3 (3)
52+
3753
const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
3854

3955
int arm_spe_get_packet(const unsigned char *buf, size_t len,

0 commit comments

Comments
 (0)