Skip to content

Commit 199cd1d

Browse files
ubizjakbonzini
authored andcommitted
KVM: SVM: Split svm_vcpu_run inline assembly to separate file
The compiler (GCC) does not like the situation, where there is inline assembly block that clobbers all available machine registers in the middle of the function. This situation can be found in function svm_vcpu_run in file kvm/svm.c and results in many register spills and fills to/from stack frame. This patch fixes the issue with the same approach as was done for VMX some time ago. The big inline assembly is moved to a separate assembly .S file, taking into account all ABI requirements. There are two main benefits of the above approach: * elimination of several register spills and fills to/from stack frame, and consequently smaller function .text size. The binary size of svm_vcpu_run is lowered from 2019 to 1626 bytes. * more efficient access to a register save array. Currently, register save array is accessed as: 7b00: 48 8b 98 28 02 00 00 mov 0x228(%rax),%rbx 7b07: 48 8b 88 18 02 00 00 mov 0x218(%rax),%rcx 7b0e: 48 8b 90 20 02 00 00 mov 0x220(%rax),%rdx and passing ia pointer to a register array as an argument to a function one gets: 12: 48 8b 48 08 mov 0x8(%rax),%rcx 16: 48 8b 50 10 mov 0x10(%rax),%rdx 1a: 48 8b 58 18 mov 0x18(%rax),%rbx As a result, the total size, considering that the new function size is 229 bytes, gets lowered by 164 bytes. Signed-off-by: Uros Bizjak <[email protected]> Signed-off-by: Paolo Bonzini <[email protected]>
1 parent eaf7826 commit 199cd1d

File tree

3 files changed

+166
-90
lines changed

3 files changed

+166
-90
lines changed

arch/x86/kvm/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
1414
hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o
1515

1616
kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
17-
kvm-amd-y += svm/svm.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o
17+
kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o
1818

1919
obj-$(CONFIG_KVM) += kvm.o
2020
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o

arch/x86/kvm/svm/svm.c

Lines changed: 3 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -3276,6 +3276,8 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
32763276
svm_complete_interrupts(svm);
32773277
}
32783278

3279+
bool __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
3280+
32793281
static void svm_vcpu_run(struct kvm_vcpu *vcpu)
32803282
{
32813283
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3330,95 +3332,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
33303332

33313333
local_irq_enable();
33323334

3333-
asm volatile (
3334-
"push %%" _ASM_BP "; \n\t"
3335-
"mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
3336-
"mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t"
3337-
"mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t"
3338-
"mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t"
3339-
"mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t"
3340-
"mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t"
3341-
#ifdef CONFIG_X86_64
3342-
"mov %c[r8](%[svm]), %%r8 \n\t"
3343-
"mov %c[r9](%[svm]), %%r9 \n\t"
3344-
"mov %c[r10](%[svm]), %%r10 \n\t"
3345-
"mov %c[r11](%[svm]), %%r11 \n\t"
3346-
"mov %c[r12](%[svm]), %%r12 \n\t"
3347-
"mov %c[r13](%[svm]), %%r13 \n\t"
3348-
"mov %c[r14](%[svm]), %%r14 \n\t"
3349-
"mov %c[r15](%[svm]), %%r15 \n\t"
3350-
#endif
3351-
3352-
/* Enter guest mode */
3353-
"push %%" _ASM_AX " \n\t"
3354-
"mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t"
3355-
__ex("vmload %%" _ASM_AX) "\n\t"
3356-
__ex("vmrun %%" _ASM_AX) "\n\t"
3357-
__ex("vmsave %%" _ASM_AX) "\n\t"
3358-
"pop %%" _ASM_AX " \n\t"
3359-
3360-
/* Save guest registers, load host registers */
3361-
"mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t"
3362-
"mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t"
3363-
"mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t"
3364-
"mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t"
3365-
"mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t"
3366-
"mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t"
3367-
#ifdef CONFIG_X86_64
3368-
"mov %%r8, %c[r8](%[svm]) \n\t"
3369-
"mov %%r9, %c[r9](%[svm]) \n\t"
3370-
"mov %%r10, %c[r10](%[svm]) \n\t"
3371-
"mov %%r11, %c[r11](%[svm]) \n\t"
3372-
"mov %%r12, %c[r12](%[svm]) \n\t"
3373-
"mov %%r13, %c[r13](%[svm]) \n\t"
3374-
"mov %%r14, %c[r14](%[svm]) \n\t"
3375-
"mov %%r15, %c[r15](%[svm]) \n\t"
3376-
/*
3377-
* Clear host registers marked as clobbered to prevent
3378-
* speculative use.
3379-
*/
3380-
"xor %%r8d, %%r8d \n\t"
3381-
"xor %%r9d, %%r9d \n\t"
3382-
"xor %%r10d, %%r10d \n\t"
3383-
"xor %%r11d, %%r11d \n\t"
3384-
"xor %%r12d, %%r12d \n\t"
3385-
"xor %%r13d, %%r13d \n\t"
3386-
"xor %%r14d, %%r14d \n\t"
3387-
"xor %%r15d, %%r15d \n\t"
3388-
#endif
3389-
"xor %%ebx, %%ebx \n\t"
3390-
"xor %%ecx, %%ecx \n\t"
3391-
"xor %%edx, %%edx \n\t"
3392-
"xor %%esi, %%esi \n\t"
3393-
"xor %%edi, %%edi \n\t"
3394-
"pop %%" _ASM_BP
3395-
:
3396-
: [svm]"a"(svm),
3397-
[vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
3398-
[rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
3399-
[rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
3400-
[rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
3401-
[rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
3402-
[rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
3403-
[rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
3404-
#ifdef CONFIG_X86_64
3405-
, [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
3406-
[r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
3407-
[r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
3408-
[r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
3409-
[r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
3410-
[r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
3411-
[r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
3412-
[r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
3413-
#endif
3414-
: "cc", "memory"
3415-
#ifdef CONFIG_X86_64
3416-
, "rbx", "rcx", "rdx", "rsi", "rdi"
3417-
, "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
3418-
#else
3419-
, "ebx", "ecx", "edx", "esi", "edi"
3420-
#endif
3421-
);
3335+
__svm_vcpu_run(svm->vmcb_pa, (unsigned long *)&svm->vcpu.arch.regs);
34223336

34233337
/* Eliminate branch target predictions from guest mode */
34243338
vmexit_fill_RSB();

arch/x86/kvm/svm/vmenter.S

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#include <linux/linkage.h>
3+
#include <asm/asm.h>
4+
#include <asm/bitsperlong.h>
5+
#include <asm/kvm_vcpu_regs.h>
6+
7+
#define WORD_SIZE (BITS_PER_LONG / 8)
8+
9+
/* Intentionally omit RAX as it's context switched by hardware */
10+
#define VCPU_RCX __VCPU_REGS_RCX * WORD_SIZE
11+
#define VCPU_RDX __VCPU_REGS_RDX * WORD_SIZE
12+
#define VCPU_RBX __VCPU_REGS_RBX * WORD_SIZE
13+
/* Intentionally omit RSP as it's context switched by hardware */
14+
#define VCPU_RBP __VCPU_REGS_RBP * WORD_SIZE
15+
#define VCPU_RSI __VCPU_REGS_RSI * WORD_SIZE
16+
#define VCPU_RDI __VCPU_REGS_RDI * WORD_SIZE
17+
18+
#ifdef CONFIG_X86_64
19+
#define VCPU_R8 __VCPU_REGS_R8 * WORD_SIZE
20+
#define VCPU_R9 __VCPU_REGS_R9 * WORD_SIZE
21+
#define VCPU_R10 __VCPU_REGS_R10 * WORD_SIZE
22+
#define VCPU_R11 __VCPU_REGS_R11 * WORD_SIZE
23+
#define VCPU_R12 __VCPU_REGS_R12 * WORD_SIZE
24+
#define VCPU_R13 __VCPU_REGS_R13 * WORD_SIZE
25+
#define VCPU_R14 __VCPU_REGS_R14 * WORD_SIZE
26+
#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
27+
#endif
28+
29+
.text
30+
31+
/**
32+
* __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode
33+
* @vmcb_pa: unsigned long
34+
* @regs: unsigned long * (to guest registers)
35+
*/
36+
SYM_FUNC_START(__svm_vcpu_run)
37+
push %_ASM_BP
38+
mov %_ASM_SP, %_ASM_BP
39+
#ifdef CONFIG_X86_64
40+
push %r15
41+
push %r14
42+
push %r13
43+
push %r12
44+
#else
45+
push %edi
46+
push %esi
47+
#endif
48+
push %_ASM_BX
49+
50+
/* Save @regs. */
51+
push %_ASM_ARG2
52+
53+
/* Save @vmcb. */
54+
push %_ASM_ARG1
55+
56+
/* Move @regs to RAX. */
57+
mov %_ASM_ARG2, %_ASM_AX
58+
59+
/* Load guest registers. */
60+
mov VCPU_RCX(%_ASM_AX), %_ASM_CX
61+
mov VCPU_RDX(%_ASM_AX), %_ASM_DX
62+
mov VCPU_RBX(%_ASM_AX), %_ASM_BX
63+
mov VCPU_RBP(%_ASM_AX), %_ASM_BP
64+
mov VCPU_RSI(%_ASM_AX), %_ASM_SI
65+
mov VCPU_RDI(%_ASM_AX), %_ASM_DI
66+
#ifdef CONFIG_X86_64
67+
mov VCPU_R8 (%_ASM_AX), %r8
68+
mov VCPU_R9 (%_ASM_AX), %r9
69+
mov VCPU_R10(%_ASM_AX), %r10
70+
mov VCPU_R11(%_ASM_AX), %r11
71+
mov VCPU_R12(%_ASM_AX), %r12
72+
mov VCPU_R13(%_ASM_AX), %r13
73+
mov VCPU_R14(%_ASM_AX), %r14
74+
mov VCPU_R15(%_ASM_AX), %r15
75+
#endif
76+
77+
/* "POP" @vmcb to RAX. */
78+
pop %_ASM_AX
79+
80+
/* Enter guest mode */
81+
1: vmload %_ASM_AX
82+
jmp 3f
83+
2: cmpb $0, kvm_rebooting
84+
jne 3f
85+
ud2
86+
_ASM_EXTABLE(1b, 2b)
87+
88+
3: vmrun %_ASM_AX
89+
jmp 5f
90+
4: cmpb $0, kvm_rebooting
91+
jne 5f
92+
ud2
93+
_ASM_EXTABLE(3b, 4b)
94+
95+
5: vmsave %_ASM_AX
96+
jmp 7f
97+
6: cmpb $0, kvm_rebooting
98+
jne 7f
99+
ud2
100+
_ASM_EXTABLE(5b, 6b)
101+
7:
102+
/* "POP" @regs to RAX. */
103+
pop %_ASM_AX
104+
105+
/* Save all guest registers. */
106+
mov %_ASM_CX, VCPU_RCX(%_ASM_AX)
107+
mov %_ASM_DX, VCPU_RDX(%_ASM_AX)
108+
mov %_ASM_BX, VCPU_RBX(%_ASM_AX)
109+
mov %_ASM_BP, VCPU_RBP(%_ASM_AX)
110+
mov %_ASM_SI, VCPU_RSI(%_ASM_AX)
111+
mov %_ASM_DI, VCPU_RDI(%_ASM_AX)
112+
#ifdef CONFIG_X86_64
113+
mov %r8, VCPU_R8 (%_ASM_AX)
114+
mov %r9, VCPU_R9 (%_ASM_AX)
115+
mov %r10, VCPU_R10(%_ASM_AX)
116+
mov %r11, VCPU_R11(%_ASM_AX)
117+
mov %r12, VCPU_R12(%_ASM_AX)
118+
mov %r13, VCPU_R13(%_ASM_AX)
119+
mov %r14, VCPU_R14(%_ASM_AX)
120+
mov %r15, VCPU_R15(%_ASM_AX)
121+
#endif
122+
123+
/*
124+
* Clear all general purpose registers except RSP and RAX to prevent
125+
* speculative use of the guest's values, even those that are reloaded
126+
* via the stack. In theory, an L1 cache miss when restoring registers
127+
* could lead to speculative execution with the guest's values.
128+
* Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
129+
* free. RSP and RAX are exempt as they are restored by hardware
130+
* during VM-Exit.
131+
*/
132+
xor %ecx, %ecx
133+
xor %edx, %edx
134+
xor %ebx, %ebx
135+
xor %ebp, %ebp
136+
xor %esi, %esi
137+
xor %edi, %edi
138+
#ifdef CONFIG_X86_64
139+
xor %r8d, %r8d
140+
xor %r9d, %r9d
141+
xor %r10d, %r10d
142+
xor %r11d, %r11d
143+
xor %r12d, %r12d
144+
xor %r13d, %r13d
145+
xor %r14d, %r14d
146+
xor %r15d, %r15d
147+
#endif
148+
149+
pop %_ASM_BX
150+
151+
#ifdef CONFIG_X86_64
152+
pop %r12
153+
pop %r13
154+
pop %r14
155+
pop %r15
156+
#else
157+
pop %esi
158+
pop %edi
159+
#endif
160+
pop %_ASM_BP
161+
ret
162+
SYM_FUNC_END(__svm_vcpu_run)

0 commit comments

Comments
 (0)