Skip to content

Commit cae1d75

Browse files
kjain101Peter Zijlstra
authored andcommitted
tools/perf: Add mem_hops field in perf_mem_data_src structure
Going forward, future generation systems can have more hierarchy within the node/package level but currently we don't have any data source encoding field in perf, which can be used to represent this level of data. Add a new field called 'mem_hops' in the perf_mem_data_src structure which can be used to represent intra-node/package or inter-node/off-package details. This field is of size 3 bits where PERF_MEM_HOPS_{NA, 0..6} value can be used to present different hop levels data. Also add corresponding macros to define mem_hop field values and shift value. Currently we define macro for HOPS_0 which corresponds to data coming from another core but same node. Add functionality to represent mem_hop field data in perf_mem__lvl_scnprintf function with the help of added string array called mem_hops. For ex: Encodings for mem_hops fields with L2 cache: L2 - local L2 L2 | REMOTE | HOPS_0 - remote core, same node L2 Since with the addition of HOPS field, now remote can be used to denote cache access from the same node but different core, a check is added in the c2c_decode_stats function to set mrem only when HOPS is zero along with set remote field. Signed-off-by: Kajol Jain <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent fec9cc6 commit cae1d75

File tree

2 files changed

+27
-3
lines changed

2 files changed

+27
-3
lines changed

tools/include/uapi/linux/perf_event.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,14 +1210,16 @@ union perf_mem_data_src {
12101210
mem_remote:1, /* remote */
12111211
mem_snoopx:2, /* snoop mode, ext */
12121212
mem_blk:3, /* access blocked */
1213-
mem_rsvd:21;
1213+
mem_hops:3, /* hop level */
1214+
mem_rsvd:18;
12141215
};
12151216
};
12161217
#elif defined(__BIG_ENDIAN_BITFIELD)
12171218
union perf_mem_data_src {
12181219
__u64 val;
12191220
struct {
1220-
__u64 mem_rsvd:21,
1221+
__u64 mem_rsvd:18,
1222+
mem_hops:3, /* hop level */
12211223
mem_blk:3, /* access blocked */
12221224
mem_snoopx:2, /* snoop mode, ext */
12231225
mem_remote:1, /* remote */
@@ -1313,6 +1315,11 @@ union perf_mem_data_src {
13131315
#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */
13141316
#define PERF_MEM_BLK_SHIFT 40
13151317

1318+
/* hop level */
1319+
#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
1320+
/* 2-7 available */
1321+
#define PERF_MEM_HOPS_SHIFT 43
1322+
13161323
#define PERF_MEM_S(a, s) \
13171324
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
13181325

tools/perf/util/mem-events.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,16 @@ static const char * const mem_lvlnum[] = {
301301
[PERF_MEM_LVLNUM_NA] = "N/A",
302302
};
303303

304+
static const char * const mem_hops[] = {
305+
"N/A",
306+
/*
307+
* While printing, 'Remote' will be added to represent
308+
* 'Remote core, same node' accesses as remote field need
309+
* to be set with mem_hops field.
310+
*/
311+
"core, same node",
312+
};
313+
304314
int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
305315
{
306316
size_t i, l = 0;
@@ -325,6 +335,9 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
325335
l += 7;
326336
}
327337

338+
if (mem_info && mem_info->data_src.mem_hops)
339+
l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]);
340+
328341
printed = 0;
329342
for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
330343
if (!(m & 0x1))
@@ -471,8 +484,12 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
471484
/*
472485
* Skylake might report unknown remote level via this
473486
* bit, consider it when evaluating remote HITMs.
487+
*
488+
* Incase of power, remote field can also be used to denote cache
489+
* accesses from the another core of same node. Hence, setting
490+
* mrem only when HOPS is zero along with set remote field.
474491
*/
475-
bool mrem = data_src->mem_remote;
492+
bool mrem = (data_src->mem_remote && !data_src->mem_hops);
476493
int err = 0;
477494

478495
#define HITM_INC(__f) \

0 commit comments

Comments
 (0)