Skip to content

Commit 405386b

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: - Allow again loading KVM on 32-bit non-PAE builds - Fixes for host SMIs on AMD - Fixes for guest SMIs on AMD - Fixes for selftests on s390 and ARM - Fix memory leak - Enforce no-instrumentation area on vmentry when hardware breakpoints are in use. * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (25 commits) KVM: selftests: smm_test: Test SMM enter from L2 KVM: nSVM: Restore nested control upon leaving SMM KVM: nSVM: Fix L1 state corruption upon return from SMM KVM: nSVM: Introduce svm_copy_vmrun_state() KVM: nSVM: Check that VM_HSAVE_PA MSR was set before VMRUN KVM: nSVM: Check the value written to MSR_VM_HSAVE_PA KVM: SVM: Fix sev_pin_memory() error checks in SEV migration utilities KVM: SVM: Return -EFAULT if copy_to_user() for SEV mig packet header fails KVM: SVM: add module param to control the #SMI interception KVM: SVM: remove INIT intercept handler KVM: SVM: #SMI interception must not skip the instruction KVM: VMX: Remove vmx_msr_index from vmx.h KVM: X86: Disable hardware breakpoints unconditionally before kvm_x86->run() KVM: selftests: Address extra memslot parameters in vm_vaddr_alloc kvm: debugfs: fix memory leak in kvm_create_vm_debugfs KVM: x86/pmu: Clear anythread deprecated bit when 0xa leaf is unsupported on the SVM KVM: mmio: Fix use-after-free Read in kvm_vm_ioctl_unregister_coalesced_mmio KVM: SVM: Revert clearing of C-bit on GPA in #NPF handler KVM: x86/mmu: Do not apply HPA (memory encryption) mask to GPAs KVM: x86: Use kernel's x86_phys_bits to handle reduced MAXPHYADDR ...
2 parents f3523a2 + d951b22 commit 405386b

File tree

21 files changed

+255
-64
lines changed

21 files changed

+255
-64
lines changed

arch/x86/kvm/cpuid.c

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -765,7 +765,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
765765

766766
edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS);
767767
edx.split.bit_width_fixed = cap.bit_width_fixed;
768-
edx.split.anythread_deprecated = 1;
768+
if (cap.version)
769+
edx.split.anythread_deprecated = 1;
769770
edx.split.reserved1 = 0;
770771
edx.split.reserved2 = 0;
771772

@@ -940,8 +941,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
940941
unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
941942
unsigned phys_as = entry->eax & 0xff;
942943

943-
if (!g_phys_as)
944+
/*
945+
* If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as
946+
* the guest operates in the same PA space as the host, i.e.
947+
* reductions in MAXPHYADDR for memory encryption affect shadow
948+
* paging, too.
949+
*
950+
* If TDP is enabled but an explicit guest MAXPHYADDR is not
951+
* provided, use the raw bare metal MAXPHYADDR as reductions to
952+
* the HPAs do not affect GPAs.
953+
*/
954+
if (!tdp_enabled)
955+
g_phys_as = boot_cpu_data.x86_phys_bits;
956+
else if (!g_phys_as)
944957
g_phys_as = phys_as;
958+
945959
entry->eax = g_phys_as | (virt_as << 8);
946960
entry->edx = 0;
947961
cpuid_entry_override(entry, CPUID_8000_0008_EBX);
@@ -964,12 +978,18 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
964978
case 0x8000001a:
965979
case 0x8000001e:
966980
break;
967-
/* Support memory encryption cpuid if host supports it */
968981
case 0x8000001F:
969-
if (!kvm_cpu_cap_has(X86_FEATURE_SEV))
982+
if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) {
970983
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
971-
else
984+
} else {
972985
cpuid_entry_override(entry, CPUID_8000_001F_EAX);
986+
987+
/*
988+
* Enumerate '0' for "PA bits reduction", the adjusted
989+
* MAXPHYADDR is enumerated directly (see 0x80000008).
990+
*/
991+
entry->ebx &= ~GENMASK(11, 6);
992+
}
973993
break;
974994
/*Add support for Centaur's CPUID instruction*/
975995
case 0xC0000000:

arch/x86/kvm/mmu/mmu.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@
5353
#include <asm/kvm_page_track.h>
5454
#include "trace.h"
5555

56+
#include "paging.h"
57+
5658
extern bool itlb_multihit_kvm_mitigation;
5759

5860
int __read_mostly nx_huge_pages = -1;

arch/x86/kvm/mmu/paging.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only */
2+
/* Shadow paging constants/helpers that don't need to be #undef'd. */
3+
#ifndef __KVM_X86_PAGING_H
4+
#define __KVM_X86_PAGING_H
5+
6+
#define GUEST_PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
7+
#define PT64_LVL_ADDR_MASK(level) \
8+
(GUEST_PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
9+
* PT64_LEVEL_BITS))) - 1))
10+
#define PT64_LVL_OFFSET_MASK(level) \
11+
(GUEST_PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
12+
* PT64_LEVEL_BITS))) - 1))
13+
#endif /* __KVM_X86_PAGING_H */
14+

arch/x86/kvm/mmu/paging_tmpl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
#define pt_element_t u64
2525
#define guest_walker guest_walker64
2626
#define FNAME(name) paging##64_##name
27-
#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
27+
#define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK
2828
#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
2929
#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
3030
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
@@ -57,7 +57,7 @@
5757
#define pt_element_t u64
5858
#define guest_walker guest_walkerEPT
5959
#define FNAME(name) ept_##name
60-
#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
60+
#define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK
6161
#define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl)
6262
#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
6363
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)

arch/x86/kvm/mmu/spte.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,6 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK == 0);
3838
#else
3939
#define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1))
4040
#endif
41-
#define PT64_LVL_ADDR_MASK(level) \
42-
(PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \
43-
* PT64_LEVEL_BITS))) - 1))
44-
#define PT64_LVL_OFFSET_MASK(level) \
45-
(PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \
46-
* PT64_LEVEL_BITS))) - 1))
4741

4842
#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \
4943
| shadow_x_mask | shadow_nx_mask | shadow_me_mask)

arch/x86/kvm/svm/nested.c

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@ void recalc_intercepts(struct vcpu_svm *svm)
154154

155155
for (i = 0; i < MAX_INTERCEPT; i++)
156156
c->intercepts[i] |= g->intercepts[i];
157+
158+
/* If SMI is not intercepted, ignore guest SMI intercept as well */
159+
if (!intercept_smi)
160+
vmcb_clr_intercept(c, INTERCEPT_SMI);
157161
}
158162

159163
static void copy_vmcb_control_area(struct vmcb_control_area *dst,
@@ -304,8 +308,8 @@ static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu,
304308
return true;
305309
}
306310

307-
static void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
308-
struct vmcb_control_area *control)
311+
void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
312+
struct vmcb_control_area *control)
309313
{
310314
copy_vmcb_control_area(&svm->nested.ctl, control);
311315

@@ -618,6 +622,11 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
618622
struct kvm_host_map map;
619623
u64 vmcb12_gpa;
620624

625+
if (!svm->nested.hsave_msr) {
626+
kvm_inject_gp(vcpu, 0);
627+
return 1;
628+
}
629+
621630
if (is_smm(vcpu)) {
622631
kvm_queue_exception(vcpu, UD_VECTOR);
623632
return 1;
@@ -692,6 +701,27 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
692701
return ret;
693702
}
694703

704+
/* Copy state save area fields which are handled by VMRUN */
705+
void svm_copy_vmrun_state(struct vmcb_save_area *from_save,
706+
struct vmcb_save_area *to_save)
707+
{
708+
to_save->es = from_save->es;
709+
to_save->cs = from_save->cs;
710+
to_save->ss = from_save->ss;
711+
to_save->ds = from_save->ds;
712+
to_save->gdtr = from_save->gdtr;
713+
to_save->idtr = from_save->idtr;
714+
to_save->rflags = from_save->rflags | X86_EFLAGS_FIXED;
715+
to_save->efer = from_save->efer;
716+
to_save->cr0 = from_save->cr0;
717+
to_save->cr3 = from_save->cr3;
718+
to_save->cr4 = from_save->cr4;
719+
to_save->rax = from_save->rax;
720+
to_save->rsp = from_save->rsp;
721+
to_save->rip = from_save->rip;
722+
to_save->cpl = 0;
723+
}
724+
695725
void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
696726
{
697727
to_vmcb->save.fs = from_vmcb->save.fs;
@@ -1355,28 +1385,11 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
13551385

13561386
svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
13571387

1358-
svm->vmcb01.ptr->save.es = save->es;
1359-
svm->vmcb01.ptr->save.cs = save->cs;
1360-
svm->vmcb01.ptr->save.ss = save->ss;
1361-
svm->vmcb01.ptr->save.ds = save->ds;
1362-
svm->vmcb01.ptr->save.gdtr = save->gdtr;
1363-
svm->vmcb01.ptr->save.idtr = save->idtr;
1364-
svm->vmcb01.ptr->save.rflags = save->rflags | X86_EFLAGS_FIXED;
1365-
svm->vmcb01.ptr->save.efer = save->efer;
1366-
svm->vmcb01.ptr->save.cr0 = save->cr0;
1367-
svm->vmcb01.ptr->save.cr3 = save->cr3;
1368-
svm->vmcb01.ptr->save.cr4 = save->cr4;
1369-
svm->vmcb01.ptr->save.rax = save->rax;
1370-
svm->vmcb01.ptr->save.rsp = save->rsp;
1371-
svm->vmcb01.ptr->save.rip = save->rip;
1372-
svm->vmcb01.ptr->save.cpl = 0;
1373-
1388+
svm_copy_vmrun_state(save, &svm->vmcb01.ptr->save);
13741389
nested_load_control_from_vmcb12(svm, ctl);
13751390

13761391
svm_switch_vmcb(svm, &svm->nested.vmcb02);
1377-
13781392
nested_vmcb02_prepare_control(svm);
1379-
13801393
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
13811394
ret = 0;
13821395
out_free:

arch/x86/kvm/svm/sev.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1272,8 +1272,8 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
12721272
/* Pin guest memory */
12731273
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
12741274
PAGE_SIZE, &n, 0);
1275-
if (!guest_page)
1276-
return -EFAULT;
1275+
if (IS_ERR(guest_page))
1276+
return PTR_ERR(guest_page);
12771277

12781278
/* allocate memory for header and transport buffer */
12791279
ret = -ENOMEM;
@@ -1310,8 +1310,9 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
13101310
}
13111311

13121312
/* Copy packet header to userspace. */
1313-
ret = copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr,
1314-
params.hdr_len);
1313+
if (copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr,
1314+
params.hdr_len))
1315+
ret = -EFAULT;
13151316

13161317
e_free_trans_data:
13171318
kfree(trans_data);
@@ -1463,11 +1464,12 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
14631464
data.trans_len = params.trans_len;
14641465

14651466
/* Pin guest memory */
1466-
ret = -EFAULT;
14671467
guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
14681468
PAGE_SIZE, &n, 0);
1469-
if (!guest_page)
1469+
if (IS_ERR(guest_page)) {
1470+
ret = PTR_ERR(guest_page);
14701471
goto e_free_trans;
1472+
}
14711473

14721474
/* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */
14731475
data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;

arch/x86/kvm/svm/svm.c

Lines changed: 70 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,11 @@ module_param(avic, bool, 0444);
198198
bool __read_mostly dump_invalid_vmcb;
199199
module_param(dump_invalid_vmcb, bool, 0644);
200200

201+
202+
bool intercept_smi = true;
203+
module_param(intercept_smi, bool, 0444);
204+
205+
201206
static bool svm_gp_erratum_intercept = true;
202207

203208
static u8 rsm_ins_bytes[] = "\x0f\xaa";
@@ -1185,7 +1190,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
11851190

11861191
svm_set_intercept(svm, INTERCEPT_INTR);
11871192
svm_set_intercept(svm, INTERCEPT_NMI);
1188-
svm_set_intercept(svm, INTERCEPT_SMI);
1193+
1194+
if (intercept_smi)
1195+
svm_set_intercept(svm, INTERCEPT_SMI);
1196+
11891197
svm_set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
11901198
svm_set_intercept(svm, INTERCEPT_RDPMC);
11911199
svm_set_intercept(svm, INTERCEPT_CPUID);
@@ -1923,7 +1931,7 @@ static int npf_interception(struct kvm_vcpu *vcpu)
19231931
{
19241932
struct vcpu_svm *svm = to_svm(vcpu);
19251933

1926-
u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
1934+
u64 fault_address = svm->vmcb->control.exit_info_2;
19271935
u64 error_code = svm->vmcb->control.exit_info_1;
19281936

19291937
trace_kvm_page_fault(fault_address, error_code);
@@ -2106,6 +2114,11 @@ static int nmi_interception(struct kvm_vcpu *vcpu)
21062114
return 1;
21072115
}
21082116

2117+
static int smi_interception(struct kvm_vcpu *vcpu)
2118+
{
2119+
return 1;
2120+
}
2121+
21092122
static int intr_interception(struct kvm_vcpu *vcpu)
21102123
{
21112124
++vcpu->stat.irq_exits;
@@ -2941,7 +2954,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
29412954
svm_disable_lbrv(vcpu);
29422955
break;
29432956
case MSR_VM_HSAVE_PA:
2944-
svm->nested.hsave_msr = data;
2957+
/*
2958+
* Old kernels did not validate the value written to
2959+
* MSR_VM_HSAVE_PA. Allow KVM_SET_MSR to set an invalid
2960+
* value to allow live migrating buggy or malicious guests
2961+
* originating from those kernels.
2962+
*/
2963+
if (!msr->host_initiated && !page_address_valid(vcpu, data))
2964+
return 1;
2965+
2966+
svm->nested.hsave_msr = data & PAGE_MASK;
29452967
break;
29462968
case MSR_VM_CR:
29472969
return svm_set_vm_cr(vcpu, data);
@@ -3080,8 +3102,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
30803102
[SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception,
30813103
[SVM_EXIT_INTR] = intr_interception,
30823104
[SVM_EXIT_NMI] = nmi_interception,
3083-
[SVM_EXIT_SMI] = kvm_emulate_as_nop,
3084-
[SVM_EXIT_INIT] = kvm_emulate_as_nop,
3105+
[SVM_EXIT_SMI] = smi_interception,
30853106
[SVM_EXIT_VINTR] = interrupt_window_interception,
30863107
[SVM_EXIT_RDPMC] = kvm_emulate_rdpmc,
30873108
[SVM_EXIT_CPUID] = kvm_emulate_cpuid,
@@ -4288,6 +4309,7 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
42884309
static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
42894310
{
42904311
struct vcpu_svm *svm = to_svm(vcpu);
4312+
struct kvm_host_map map_save;
42914313
int ret;
42924314

42934315
if (is_guest_mode(vcpu)) {
@@ -4303,20 +4325,44 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
43034325
ret = nested_svm_vmexit(svm);
43044326
if (ret)
43054327
return ret;
4328+
4329+
/*
4330+
* KVM uses VMCB01 to store L1 host state while L2 runs but
4331+
* VMCB01 is going to be used during SMM and thus the state will
4332+
* be lost. Temporary save non-VMLOAD/VMSAVE state to the host save
4333+
* area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the
4334+
* format of the area is identical to guest save area offsetted
4335+
* by 0x400 (matches the offset of 'struct vmcb_save_area'
4336+
* within 'struct vmcb'). Note: HSAVE area may also be used by
4337+
* L1 hypervisor to save additional host context (e.g. KVM does
4338+
* that, see svm_prepare_guest_switch()) which must be
4339+
* preserved.
4340+
*/
4341+
if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
4342+
&map_save) == -EINVAL)
4343+
return 1;
4344+
4345+
BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
4346+
4347+
svm_copy_vmrun_state(&svm->vmcb01.ptr->save,
4348+
map_save.hva + 0x400);
4349+
4350+
kvm_vcpu_unmap(vcpu, &map_save, true);
43064351
}
43074352
return 0;
43084353
}
43094354

43104355
static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
43114356
{
43124357
struct vcpu_svm *svm = to_svm(vcpu);
4313-
struct kvm_host_map map;
4358+
struct kvm_host_map map, map_save;
43144359
int ret = 0;
43154360

43164361
if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
43174362
u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
43184363
u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8);
43194364
u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
4365+
struct vmcb *vmcb12;
43204366

43214367
if (guest) {
43224368
if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
@@ -4332,8 +4378,25 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
43324378
if (svm_allocate_nested(svm))
43334379
return 1;
43344380

4335-
ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, map.hva);
4381+
vmcb12 = map.hva;
4382+
4383+
nested_load_control_from_vmcb12(svm, &vmcb12->control);
4384+
4385+
ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12);
43364386
kvm_vcpu_unmap(vcpu, &map, true);
4387+
4388+
/*
4389+
* Restore L1 host state from L1 HSAVE area as VMCB01 was
4390+
* used during SMM (see svm_enter_smm())
4391+
*/
4392+
if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
4393+
&map_save) == -EINVAL)
4394+
return 1;
4395+
4396+
svm_copy_vmrun_state(map_save.hva + 0x400,
4397+
&svm->vmcb01.ptr->save);
4398+
4399+
kvm_vcpu_unmap(vcpu, &map_save, true);
43374400
}
43384401
}
43394402

0 commit comments

Comments
 (0)