Skip to content

Commit a063bf2

Browse files
Kan LiangPeter Zijlstra
authored andcommitted
x86/fpu: Use proper mask to replace full instruction mask
When saving xstate to a kernel/user XSAVE area with the XSAVE family of instructions, the current code applies the 'full' instruction mask (-1), which tries to XSAVE all possible features. This method relies on hardware to trim 'all possible' down to what is enabled in the hardware. The code works well for now. However, there will be a problem, if some features are enabled in hardware, but are not suitable to be saved into all kernel XSAVE buffers, like task->fpu, due to performance consideration. One such example is the Last Branch Records (LBR) state. The LBR state only contains valuable information when LBR is explicitly enabled by the perf subsystem, and the size of an LBR state is large (808 bytes for now). To avoid both CPU overhead and space overhead at each context switch, the LBR state should not be saved into task->fpu like other state components. It should be saved/restored on demand when LBR is enabled in the perf subsystem. Current copy_xregs_to_* will trigger a buffer overflow for such cases. Three sites use the '-1' instruction mask which must be updated. Two are saving/restoring the xstate to/from a kernel-allocated XSAVE buffer and can use 'xfeatures_mask_all', which will save/restore all of the features present in a normal task FPU buffer. The last one saves the register state directly to a user buffer. It could also use 'xfeatures_mask_all'. Just as it was with the '-1' argument, any supervisor states in the mask will be filtered out by the hardware and not saved to the buffer. But, to be more explicit about what is expected to be saved, use xfeatures_mask_user() for the instruction mask. KVM includes the header file fpu/internal.h. To avoid 'undefined xfeatures_mask_all' compiling issue, move copy_fpregs_to_fpstate() to fpu/core.c and export it, because: - The xfeatures_mask_all is indirectly used via copy_fpregs_to_fpstate() by KVM. The function which is directly used by other modules should be exported. - The copy_fpregs_to_fpstate() is a function, while xfeatures_mask_all is a variable for the "internal" FPU state. It's safer to export a function than a variable, which may be implicitly changed by others. - The copy_fpregs_to_fpstate() is a big function with many checks. The removal of the inline keyword should not impact the performance. Signed-off-by: Kan Liang <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Dave Hansen <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 5a09928 commit a063bf2

File tree

2 files changed

+46
-40
lines changed

2 files changed

+46
-40
lines changed

arch/x86/include/asm/fpu/internal.h

Lines changed: 7 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
274274
*/
275275
static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
276276
{
277-
u64 mask = -1;
277+
u64 mask = xfeatures_mask_all;
278278
u32 lmask = mask;
279279
u32 hmask = mask >> 32;
280280
int err;
@@ -320,7 +320,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
320320
*/
321321
static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
322322
{
323-
u64 mask = -1;
323+
u64 mask = xfeatures_mask_all;
324324
u32 lmask = mask;
325325
u32 hmask = mask >> 32;
326326
int err;
@@ -356,6 +356,9 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
356356
*/
357357
static inline int copy_xregs_to_user(struct xregs_state __user *buf)
358358
{
359+
u64 mask = xfeatures_mask_user();
360+
u32 lmask = mask;
361+
u32 hmask = mask >> 32;
359362
int err;
360363

361364
/*
@@ -367,7 +370,7 @@ static inline int copy_xregs_to_user(struct xregs_state __user *buf)
367370
return -EFAULT;
368371

369372
stac();
370-
XSTATE_OP(XSAVE, buf, -1, -1, err);
373+
XSTATE_OP(XSAVE, buf, lmask, hmask, err);
371374
clac();
372375

373376
return err;
@@ -408,43 +411,7 @@ static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask)
408411
return err;
409412
}
410413

411-
/*
412-
* These must be called with preempt disabled. Returns
413-
* 'true' if the FPU state is still intact and we can
414-
* keep registers active.
415-
*
416-
* The legacy FNSAVE instruction cleared all FPU state
417-
* unconditionally, so registers are essentially destroyed.
418-
* Modern FPU state can be kept in registers, if there are
419-
* no pending FP exceptions.
420-
*/
421-
static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
422-
{
423-
if (likely(use_xsave())) {
424-
copy_xregs_to_kernel(&fpu->state.xsave);
425-
426-
/*
427-
* AVX512 state is tracked here because its use is
428-
* known to slow the max clock speed of the core.
429-
*/
430-
if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
431-
fpu->avx512_timestamp = jiffies;
432-
return 1;
433-
}
434-
435-
if (likely(use_fxsr())) {
436-
copy_fxregs_to_kernel(fpu);
437-
return 1;
438-
}
439-
440-
/*
441-
* Legacy FPU register saving, FNSAVE always clears FPU registers,
442-
* so we have to mark them inactive:
443-
*/
444-
asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
445-
446-
return 0;
447-
}
414+
extern int copy_fpregs_to_fpstate(struct fpu *fpu);
448415

449416
static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate, u64 mask)
450417
{

arch/x86/kernel/fpu/core.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,45 @@ bool irq_fpu_usable(void)
8282
}
8383
EXPORT_SYMBOL(irq_fpu_usable);
8484

85+
/*
86+
* These must be called with preempt disabled. Returns
87+
* 'true' if the FPU state is still intact and we can
88+
* keep registers active.
89+
*
90+
* The legacy FNSAVE instruction cleared all FPU state
91+
* unconditionally, so registers are essentially destroyed.
92+
* Modern FPU state can be kept in registers, if there are
93+
* no pending FP exceptions.
94+
*/
95+
int copy_fpregs_to_fpstate(struct fpu *fpu)
96+
{
97+
if (likely(use_xsave())) {
98+
copy_xregs_to_kernel(&fpu->state.xsave);
99+
100+
/*
101+
* AVX512 state is tracked here because its use is
102+
* known to slow the max clock speed of the core.
103+
*/
104+
if (fpu->state.xsave.header.xfeatures & XFEATURE_MASK_AVX512)
105+
fpu->avx512_timestamp = jiffies;
106+
return 1;
107+
}
108+
109+
if (likely(use_fxsr())) {
110+
copy_fxregs_to_kernel(fpu);
111+
return 1;
112+
}
113+
114+
/*
115+
* Legacy FPU register saving, FNSAVE always clears FPU registers,
116+
* so we have to mark them inactive:
117+
*/
118+
asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
119+
120+
return 0;
121+
}
122+
EXPORT_SYMBOL(copy_fpregs_to_fpstate);
123+
85124
void kernel_fpu_begin(void)
86125
{
87126
preempt_disable();

0 commit comments

Comments
 (0)