Skip to content

Commit 6739034

Browse files
amlutoKAGA-KOKO
authored andcommitted
x86/process/64: Use FSBSBASE in switch_to() if available
With the new FSGSBASE instructions, FS and GSABSE can be efficiently read and writen in __switch_to(). Use that capability to preserve the full state. This will enable user code to do whatever it wants with the new instructions without any kernel-induced gotchas. (There can still be architectural gotchas: movl %gs,%eax; movl %eax,%gs may change GSBASE if WRGSBASE was used, but users are expected to read the CPU manual before doing things like that.) This is a considerable speedup. It seems to save about 100 cycles per context switch compared to the baseline 4.6-rc1 behavior on a Skylake laptop. This is mostly due to avoiding the WRMSR operation. [ chang: 5~10% performance improvements were seen with a context switch benchmark that ran threads with different FS/GSBASE values (to the baseline 4.16). Minor edit on the changelog. ] [ tglx: Masaage changelog ] Signed-off-by: Andy Lutomirski <[email protected]> Signed-off-by: Chang S. Bae <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Signed-off-by: Sasha Levin <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Reviewed-by: Andi Kleen <[email protected]> Link: https://lkml.kernel.org/r/[email protected] Link: https://lkml.kernel.org/r/[email protected]
1 parent 6758034 commit 6739034

File tree

1 file changed

+28
-6
lines changed

1 file changed

+28
-6
lines changed

arch/x86/kernel/process_64.c

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -236,8 +236,18 @@ static __always_inline void save_fsgs(struct task_struct *task)
236236
{
237237
savesegment(fs, task->thread.fsindex);
238238
savesegment(gs, task->thread.gsindex);
239-
save_base_legacy(task, task->thread.fsindex, FS);
240-
save_base_legacy(task, task->thread.gsindex, GS);
239+
if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
240+
/*
241+
* If FSGSBASE is enabled, we can't make any useful guesses
242+
* about the base, and user code expects us to save the current
243+
* value. Fortunately, reading the base directly is efficient.
244+
*/
245+
task->thread.fsbase = rdfsbase();
246+
task->thread.gsbase = __rdgsbase_inactive();
247+
} else {
248+
save_base_legacy(task, task->thread.fsindex, FS);
249+
save_base_legacy(task, task->thread.gsindex, GS);
250+
}
241251
}
242252

243253
/*
@@ -319,10 +329,22 @@ static __always_inline void load_seg_legacy(unsigned short prev_index,
319329
static __always_inline void x86_fsgsbase_load(struct thread_struct *prev,
320330
struct thread_struct *next)
321331
{
322-
load_seg_legacy(prev->fsindex, prev->fsbase,
323-
next->fsindex, next->fsbase, FS);
324-
load_seg_legacy(prev->gsindex, prev->gsbase,
325-
next->gsindex, next->gsbase, GS);
332+
if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
333+
/* Update the FS and GS selectors if they could have changed. */
334+
if (unlikely(prev->fsindex || next->fsindex))
335+
loadseg(FS, next->fsindex);
336+
if (unlikely(prev->gsindex || next->gsindex))
337+
loadseg(GS, next->gsindex);
338+
339+
/* Update the bases. */
340+
wrfsbase(next->fsbase);
341+
__wrgsbase_inactive(next->gsbase);
342+
} else {
343+
load_seg_legacy(prev->fsindex, prev->fsbase,
344+
next->fsindex, next->fsbase, FS);
345+
load_seg_legacy(prev->gsindex, prev->gsbase,
346+
next->gsindex, next->gsbase, GS);
347+
}
326348
}
327349

328350
static unsigned long x86_fsgsbase_read_task(struct task_struct *task,

0 commit comments

Comments
 (0)