Skip to content

Commit ce711ea

Browse files
Kan LiangPeter Zijlstra
authored andcommitted
perf/x86/intel/lbr: Support XSAVES/XRSTORS for LBR context switch
In the LBR call stack mode, LBR information is used to reconstruct a call stack. To get the complete call stack, perf has to save/restore all LBR registers during a context switch. Due to a large number of the LBR registers, this process causes a high CPU overhead. To reduce the CPU overhead during a context switch, use the XSAVES/XRSTORS instructions. Every XSAVE area must follow a canonical format: the legacy region, an XSAVE header and the extended region. Although the LBR information is only kept in the extended region, a space for the legacy region and XSAVE header is still required. Add a new dedicated structure for LBR XSAVES support. Before enabling XSAVES support, the size of the LBR state has to be sanity checked, because: - the size of the software structure is calculated from the max number of the LBR depth, which is enumerated by the CPUID leaf for Arch LBR. The size of the LBR state is enumerated by the CPUID leaf for XSAVE support of Arch LBR. If the values from the two CPUID leaves are not consistent, it may trigger a buffer overflow. For example, a hypervisor may unconsciously set inconsistent values for the two emulated CPUID. - unlike other state components, the size of an LBR state depends on the max number of LBRs, which may vary from generation to generation. Expose the function xfeature_size() for the sanity check. The LBR XSAVES support will be disabled if the size of the LBR state enumerated by CPUID doesn't match with the size of the software structure. The XSAVE instruction requires 64-byte alignment for state buffers. A new macro is added to reflect the alignment requirement. A 64-byte aligned kmem_cache is created for architecture LBR. Currently, the structure for each state component is maintained in fpu/types.h. The structure for the new LBR state component should be maintained in the same place. Move structure lbr_entry to fpu/types.h as well for broader sharing. Add dedicated lbr_save/lbr_restore functions for LBR XSAVES support, which invokes the corresponding xstate helpers to XSAVES/XRSTORS LBR information at the context switch when the call stack mode is enabled. Since the XSAVES/XRSTORS instructions will be eventually invoked, the dedicated functions is named with '_xsaves'/'_xrstors' postfix. Signed-off-by: Kan Liang <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Dave Hansen <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 50f408d commit ce711ea

File tree

6 files changed

+119
-10
lines changed

6 files changed

+119
-10
lines changed

arch/x86/events/intel/lbr.c

Lines changed: 74 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,17 @@ static void intel_pmu_arch_lbr_restore(void *ctx)
483483
}
484484
}
485485

486+
/*
487+
* Restore the Architecture LBR state from the xsave area in the perf
488+
* context data for the task via the XRSTORS instruction.
489+
*/
490+
static void intel_pmu_arch_lbr_xrstors(void *ctx)
491+
{
492+
struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
493+
494+
copy_kernel_to_dynamic_supervisor(&task_ctx->xsave, XFEATURE_MASK_LBR);
495+
}
496+
486497
static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
487498
{
488499
if (static_cpu_has(X86_FEATURE_ARCH_LBR))
@@ -557,6 +568,17 @@ static void intel_pmu_arch_lbr_save(void *ctx)
557568
entries[x86_pmu.lbr_nr - 1].from = 0;
558569
}
559570

571+
/*
572+
* Save the Architecture LBR state to the xsave area in the perf
573+
* context data for the task via the XSAVES instruction.
574+
*/
575+
static void intel_pmu_arch_lbr_xsaves(void *ctx)
576+
{
577+
struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
578+
579+
copy_dynamic_supervisor_to_kernel(&task_ctx->xsave, XFEATURE_MASK_LBR);
580+
}
581+
560582
static void __intel_pmu_lbr_save(void *ctx)
561583
{
562584
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1639,12 +1661,40 @@ void intel_pmu_lbr_init_knl(void)
16391661
x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
16401662
}
16411663

1664+
/*
1665+
* LBR state size is variable based on the max number of registers.
1666+
* This calculates the expected state size, which should match
1667+
* what the hardware enumerates for the size of XFEATURE_LBR.
1668+
*/
1669+
static inline unsigned int get_lbr_state_size(void)
1670+
{
1671+
return sizeof(struct arch_lbr_state) +
1672+
x86_pmu.lbr_nr * sizeof(struct lbr_entry);
1673+
}
1674+
1675+
static bool is_arch_lbr_xsave_available(void)
1676+
{
1677+
if (!boot_cpu_has(X86_FEATURE_XSAVES))
1678+
return false;
1679+
1680+
/*
1681+
* Check the LBR state with the corresponding software structure.
1682+
* Disable LBR XSAVES support if the size doesn't match.
1683+
*/
1684+
if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
1685+
return false;
1686+
1687+
return true;
1688+
}
1689+
16421690
void __init intel_pmu_arch_lbr_init(void)
16431691
{
1692+
struct pmu *pmu = x86_get_pmu();
16441693
union cpuid28_eax eax;
16451694
union cpuid28_ebx ebx;
16461695
union cpuid28_ecx ecx;
16471696
unsigned int unused_edx;
1697+
bool arch_lbr_xsave;
16481698
size_t size;
16491699
u64 lbr_nr;
16501700

@@ -1670,9 +1720,22 @@ void __init intel_pmu_arch_lbr_init(void)
16701720
x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
16711721
x86_pmu.lbr_nr = lbr_nr;
16721722

1673-
size = sizeof(struct x86_perf_task_context_arch_lbr) +
1674-
lbr_nr * sizeof(struct lbr_entry);
1675-
x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1723+
1724+
arch_lbr_xsave = is_arch_lbr_xsave_available();
1725+
if (arch_lbr_xsave) {
1726+
size = sizeof(struct x86_perf_task_context_arch_lbr_xsave) +
1727+
get_lbr_state_size();
1728+
pmu->task_ctx_cache = create_lbr_kmem_cache(size,
1729+
XSAVE_ALIGNMENT);
1730+
}
1731+
1732+
if (!pmu->task_ctx_cache) {
1733+
arch_lbr_xsave = false;
1734+
1735+
size = sizeof(struct x86_perf_task_context_arch_lbr) +
1736+
lbr_nr * sizeof(struct lbr_entry);
1737+
pmu->task_ctx_cache = create_lbr_kmem_cache(size, 0);
1738+
}
16761739

16771740
x86_pmu.lbr_from = MSR_ARCH_LBR_FROM_0;
16781741
x86_pmu.lbr_to = MSR_ARCH_LBR_TO_0;
@@ -1705,8 +1768,14 @@ void __init intel_pmu_arch_lbr_init(void)
17051768

17061769
x86_pmu.lbr_reset = intel_pmu_arch_lbr_reset;
17071770
x86_pmu.lbr_read = intel_pmu_arch_lbr_read;
1708-
x86_pmu.lbr_save = intel_pmu_arch_lbr_save;
1709-
x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore;
1771+
if (arch_lbr_xsave) {
1772+
x86_pmu.lbr_save = intel_pmu_arch_lbr_xsaves;
1773+
x86_pmu.lbr_restore = intel_pmu_arch_lbr_xrstors;
1774+
pr_cont("XSAVE ");
1775+
} else {
1776+
x86_pmu.lbr_save = intel_pmu_arch_lbr_save;
1777+
x86_pmu.lbr_restore = intel_pmu_arch_lbr_restore;
1778+
}
17101779

17111780
pr_cont("Architectural LBR, ");
17121781

arch/x86/events/perf_event.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,27 @@ struct x86_perf_task_context_arch_lbr {
777777
struct lbr_entry entries[];
778778
};
779779

780+
/*
781+
* Add padding to guarantee the 64-byte alignment of the state buffer.
782+
*
783+
* The structure is dynamically allocated. The size of the LBR state may vary
784+
* based on the number of LBR registers.
785+
*
786+
* Do not put anything after the LBR state.
787+
*/
788+
struct x86_perf_task_context_arch_lbr_xsave {
789+
struct x86_perf_task_context_opt opt;
790+
791+
union {
792+
struct xregs_state xsave;
793+
struct {
794+
struct fxregs_state i387;
795+
struct xstate_header header;
796+
struct arch_lbr_state lbr;
797+
} __attribute__ ((packed, aligned (XSAVE_ALIGNMENT)));
798+
};
799+
};
800+
780801
#define x86_add_quirk(func_) \
781802
do { \
782803
static struct x86_pmu_quirk __quirk __initdata = { \

arch/x86/include/asm/fpu/types.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,26 @@ struct pkru_state {
236236
u32 pad;
237237
} __packed;
238238

239+
/*
240+
* State component 15: Architectural LBR configuration state.
241+
* The size of Arch LBR state depends on the number of LBRs (lbr_depth).
242+
*/
243+
244+
struct lbr_entry {
245+
u64 from;
246+
u64 to;
247+
u64 info;
248+
};
249+
250+
struct arch_lbr_state {
251+
u64 lbr_ctl;
252+
u64 lbr_depth;
253+
u64 ler_from;
254+
u64 ler_to;
255+
u64 ler_info;
256+
struct lbr_entry entries[];
257+
} __packed;
258+
239259
struct xstate_header {
240260
u64 xfeatures;
241261
u64 xcomp_bv;

arch/x86/include/asm/fpu/xstate.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
#define XSAVE_YMM_SIZE 256
2222
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
2323

24+
#define XSAVE_ALIGNMENT 64
25+
2426
/* All currently supported user features */
2527
#define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \
2628
XFEATURE_MASK_SSE | \
@@ -101,6 +103,7 @@ extern void __init update_regset_xstate_info(unsigned int size,
101103
void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
102104
const void *get_xsave_field_ptr(int xfeature_nr);
103105
int using_compacted_format(void);
106+
int xfeature_size(int xfeature_nr);
104107
int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
105108
int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
106109
int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);

arch/x86/include/asm/perf_event.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -282,10 +282,6 @@ struct pebs_xmm {
282282
u64 xmm[16*2]; /* two entries for each register */
283283
};
284284

285-
struct lbr_entry {
286-
u64 from, to, info;
287-
};
288-
289285
/*
290286
* IBS cpuid feature detection
291287
*/

arch/x86/kernel/fpu/xstate.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ static int xfeature_uncompacted_offset(int xfeature_nr)
488488
return ebx;
489489
}
490490

491-
static int xfeature_size(int xfeature_nr)
491+
int xfeature_size(int xfeature_nr)
492492
{
493493
u32 eax, ebx, ecx, edx;
494494

0 commit comments

Comments
 (0)