Skip to content

Commit d7d213e

Browse files
Kan Liangacmel
authored andcommitted
perf report: Support Retire Latency
The Retire Latency field is added in the var3_w of the PERF_SAMPLE_WEIGHT_STRUCT. The Retire Latency reports pipeline stall of this instruction compared to the previous instruction in cycles. That's quite useful to display the information with perf mem report. The p_stage_cyc for Power is also from the var3_w. Union the p_stage_cyc and retire_lat to share the code. Implement X86 specific codes to display the X86 specific header. Add a new sort key retire_lat for the Retire Latency. Reviewed-by: Andi Kleen <[email protected]> Signed-off-by: Kan Liang <[email protected]> Cc: Ian Rogers <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Stephane Eranian <[email protected]> Link: http://lore.kernel.org/lkml/[email protected] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent ebab291 commit d7d213e

File tree

5 files changed

+30
-1
lines changed

5 files changed

+30
-1
lines changed

tools/perf/Documentation/perf-report.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ OPTIONS
115115
- p_stage_cyc: On powerpc, this presents the number of cycles spent in a
116116
pipeline stage. And currently supported only on powerpc.
117117
- addr: (Full) virtual address of the sampled instruction
118+
- retire_lat: On X86, this reports pipeline stall of this instruction compared
119+
to the previous instruction in cycles. And currently supported only on X86
118120

119121
By default, comm, dso and symbol keys are used.
120122
(i.e. --sort comm,dso,symbol)

tools/perf/arch/x86/util/event.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ void arch_perf_parse_sample_weight(struct perf_sample *data,
8989
else {
9090
data->weight = weight.var1_dw;
9191
data->ins_lat = weight.var2_w;
92+
data->retire_lat = weight.var3_w;
9293
}
9394
}
9495

@@ -102,3 +103,22 @@ void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
102103
*array |= ((u64)data->ins_lat << 32);
103104
}
104105
}
106+
107+
const char *arch_perf_header_entry(const char *se_header)
108+
{
109+
if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
110+
return "Local Retire Latency";
111+
else if (!strcmp(se_header, "Pipeline Stage Cycle"))
112+
return "Retire Latency";
113+
114+
return se_header;
115+
}
116+
117+
int arch_support_sort_key(const char *sort_key)
118+
{
119+
if (!strcmp(sort_key, "p_stage_cyc"))
120+
return 1;
121+
if (!strcmp(sort_key, "local_p_stage_cyc"))
122+
return 1;
123+
return 0;
124+
}

tools/perf/util/sample.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,10 @@ struct perf_sample {
9292
u8 cpumode;
9393
u16 misc;
9494
u16 ins_lat;
95-
u16 p_stage_cyc;
95+
union {
96+
u16 p_stage_cyc;
97+
u16 retire_lat;
98+
};
9699
bool no_hw_idx; /* No hw_idx collected in branch_stack */
97100
char insn[MAX_INSN];
98101
void *raw_data;

tools/perf/util/sort.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2133,6 +2133,8 @@ static struct sort_dimension common_sort_dimensions[] = {
21332133
DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
21342134
DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
21352135
DIM(SORT_ADDR, "addr", sort_addr),
2136+
DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc),
2137+
DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc),
21362138
};
21372139

21382140
#undef DIM

tools/perf/util/sort.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,8 @@ enum sort_type {
237237
SORT_LOCAL_PIPELINE_STAGE_CYC,
238238
SORT_GLOBAL_PIPELINE_STAGE_CYC,
239239
SORT_ADDR,
240+
SORT_LOCAL_RETIRE_LAT,
241+
SORT_GLOBAL_RETIRE_LAT,
240242

241243
/* branch stack specific sort keys */
242244
__SORT_BRANCH_STACK,

0 commit comments

Comments
 (0)