Skip to content

Commit 295c52e

Browse files
virtuosoIngo Molnar
authored andcommitted
perf/x86/intel/pt: Prevent redundant WRMSRs
With recent optimizations to AUX and PT buffer management code (high order AUX allocations, opportunistic Single Range Output), it is far more likely now that the output MSRs won't need reprogramming on every sched-in. To avoid needless WRMSRs of those registers, cache their values and only write them when needed. Signed-off-by: Alexander Shishkin <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: David Ahern <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Namhyung Kim <[email protected]> Cc: Stephane Eranian <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Vince Weaver <[email protected]> Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Ingo Molnar <[email protected]>
1 parent 6706384 commit 295c52e

File tree

2 files changed

+23
-12
lines changed

2 files changed

+23
-12
lines changed

arch/x86/events/intel/pt.c

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,7 @@ static inline phys_addr_t topa_pfn(struct topa *topa)
606606

607607
static void pt_config_buffer(struct pt_buffer *buf)
608608
{
609+
struct pt *pt = this_cpu_ptr(&pt_ctx);
609610
u64 reg, mask;
610611
void *base;
611612

@@ -617,11 +618,17 @@ static void pt_config_buffer(struct pt_buffer *buf)
617618
mask = (u64)buf->cur_idx;
618619
}
619620

620-
wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(base));
621+
reg = virt_to_phys(base);
622+
if (pt->output_base != reg) {
623+
pt->output_base = reg;
624+
wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, reg);
625+
}
621626

622627
reg = 0x7f | (mask << 7) | ((u64)buf->output_off << 32);
623-
624-
wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
628+
if (pt->output_mask != reg) {
629+
pt->output_mask = reg;
630+
wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
631+
}
625632
}
626633

627634
/**
@@ -930,21 +937,21 @@ static void pt_handle_status(struct pt *pt)
930937
*/
931938
static void pt_read_offset(struct pt_buffer *buf)
932939
{
933-
u64 offset, base;
940+
struct pt *pt = this_cpu_ptr(&pt_ctx);
934941
struct topa_page *tp;
935942

936943
if (!buf->single) {
937-
rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base);
938-
tp = phys_to_virt(base);
944+
rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base);
945+
tp = phys_to_virt(pt->output_base);
939946
buf->cur = &tp->topa;
940947
}
941948

942-
rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
949+
rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask);
943950
/* offset within current output region */
944-
buf->output_off = offset >> 32;
951+
buf->output_off = pt->output_mask >> 32;
945952
/* index of current output region within this table */
946953
if (!buf->single)
947-
buf->cur_idx = (offset & 0xffffff80) >> 7;
954+
buf->cur_idx = (pt->output_mask & 0xffffff80) >> 7;
948955
}
949956

950957
static struct topa_entry *

arch/x86/events/intel/pt.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,16 +113,20 @@ struct pt_filters {
113113

114114
/**
115115
* struct pt - per-cpu pt context
116-
* @handle: perf output handle
116+
* @handle: perf output handle
117117
* @filters: last configured filters
118-
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
119-
* @vmx_on: 1 if VMX is ON on this cpu
118+
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
119+
* @vmx_on: 1 if VMX is ON on this cpu
120+
* @output_base: cached RTIT_OUTPUT_BASE MSR value
121+
* @output_mask: cached RTIT_OUTPUT_MASK MSR value
120122
*/
121123
struct pt {
122124
struct perf_output_handle handle;
123125
struct pt_filters filters;
124126
int handle_nmi;
125127
int vmx_on;
128+
u64 output_base;
129+
u64 output_mask;
126130
};
127131

128132
#endif /* __INTEL_PT_H__ */

0 commit comments

Comments
 (0)