Skip to content

Commit f526d6a

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "x86: - fix lock initialization race in gfn-to-pfn cache (+selftests) - fix two refcounting errors - emulator fixes - mask off reserved bits in CPUID - fix bug with disabling SGX RISC-V: - update MAINTAINERS" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86/xen: Fix eventfd error handling in kvm_xen_eventfd_assign() KVM: x86: smm: number of GPRs in the SMRAM image depends on the image format KVM: x86: emulator: update the emulation mode after CR0 write KVM: x86: emulator: update the emulation mode after rsm KVM: x86: emulator: introduce emulator_recalc_and_set_mode KVM: x86: emulator: em_sysexit should update ctxt->mode KVM: selftests: Mark "guest_saw_irq" as volatile in xen_shinfo_test KVM: selftests: Add tests in xen_shinfo_test to detect lock races KVM: Reject attempts to consume or refresh inactive gfn_to_pfn_cache KVM: Initialize gfn_to_pfn_cache locks in dedicated helper KVM: VMX: fully disable SGX if SECONDARY_EXEC_ENCLS_EXITING unavailable KVM: x86: Exempt pending triple fault from event injection sanity check MAINTAINERS: git://github -> https://github.com for kvm-riscv KVM: debugfs: Return retval of simple_attr_open() if it fails KVM: x86: Reduce refcount if single_open() fails in kvm_mmu_rmaps_stat_open() KVM: x86: Mask off reserved bits in CPUID.8000001FH KVM: x86: Mask off reserved bits in CPUID.8000001AH KVM: x86: Mask off reserved bits in CPUID.80000008H KVM: x86: Mask off reserved bits in CPUID.80000006H KVM: x86: Mask off reserved bits in CPUID.80000001H
2 parents d79dcde + 7353633 commit f526d6a

File tree

11 files changed

+362
-103
lines changed

11 files changed

+362
-103
lines changed

MAINTAINERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11248,7 +11248,7 @@ L: [email protected]
1124811248
1124911249
1125011250
S: Maintained
11251-
T: git git://github.com/kvm-riscv/linux.git
11251+
T: git https://github.com/kvm-riscv/linux.git
1125211252
F: arch/riscv/include/asm/kvm*
1125311253
F: arch/riscv/include/uapi/asm/kvm*
1125411254
F: arch/riscv/kvm/

arch/x86/kvm/cpuid.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1133,11 +1133,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
11331133
entry->eax = max(entry->eax, 0x80000021);
11341134
break;
11351135
case 0x80000001:
1136+
entry->ebx &= ~GENMASK(27, 16);
11361137
cpuid_entry_override(entry, CPUID_8000_0001_EDX);
11371138
cpuid_entry_override(entry, CPUID_8000_0001_ECX);
11381139
break;
11391140
case 0x80000006:
1140-
/* L2 cache and TLB: pass through host info. */
1141+
/* Drop reserved bits, pass host L2 cache and TLB info. */
1142+
entry->edx &= ~GENMASK(17, 16);
11411143
break;
11421144
case 0x80000007: /* Advanced power management */
11431145
/* invariant TSC is CPUID.80000007H:EDX[8] */
@@ -1167,6 +1169,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
11671169
g_phys_as = phys_as;
11681170

11691171
entry->eax = g_phys_as | (virt_as << 8);
1172+
entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
11701173
entry->edx = 0;
11711174
cpuid_entry_override(entry, CPUID_8000_0008_EBX);
11721175
break;
@@ -1186,14 +1189,18 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
11861189
entry->ecx = entry->edx = 0;
11871190
break;
11881191
case 0x8000001a:
1192+
entry->eax &= GENMASK(2, 0);
1193+
entry->ebx = entry->ecx = entry->edx = 0;
1194+
break;
11891195
case 0x8000001e:
11901196
break;
11911197
case 0x8000001F:
11921198
if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) {
11931199
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
11941200
} else {
11951201
cpuid_entry_override(entry, CPUID_8000_001F_EAX);
1196-
1202+
/* Clear NumVMPL since KVM does not support VMPL. */
1203+
entry->ebx &= ~GENMASK(31, 12);
11971204
/*
11981205
* Enumerate '0' for "PA bits reduction", the adjusted
11991206
* MAXPHYADDR is enumerated directly (see 0x80000008).

arch/x86/kvm/debugfs.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,16 @@ static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v)
158158
static int kvm_mmu_rmaps_stat_open(struct inode *inode, struct file *file)
159159
{
160160
struct kvm *kvm = inode->i_private;
161+
int r;
161162

162163
if (!kvm_get_kvm_safe(kvm))
163164
return -ENOENT;
164165

165-
return single_open(file, kvm_mmu_rmaps_stat_show, kvm);
166+
r = single_open(file, kvm_mmu_rmaps_stat_show, kvm);
167+
if (r < 0)
168+
kvm_put_kvm(kvm);
169+
170+
return r;
166171
}
167172

168173
static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file)

arch/x86/kvm/emulate.c

Lines changed: 76 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -791,8 +791,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
791791
ctxt->mode, linear);
792792
}
793793

794-
static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
795-
enum x86emul_mode mode)
794+
static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
796795
{
797796
ulong linear;
798797
int rc;
@@ -802,41 +801,71 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
802801

803802
if (ctxt->op_bytes != sizeof(unsigned long))
804803
addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
805-
rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
804+
rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear);
806805
if (rc == X86EMUL_CONTINUE)
807806
ctxt->_eip = addr.ea;
808807
return rc;
809808
}
810809

810+
static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt)
811+
{
812+
u64 efer;
813+
struct desc_struct cs;
814+
u16 selector;
815+
u32 base3;
816+
817+
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
818+
819+
if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) {
820+
/* Real mode. cpu must not have long mode active */
821+
if (efer & EFER_LMA)
822+
return X86EMUL_UNHANDLEABLE;
823+
ctxt->mode = X86EMUL_MODE_REAL;
824+
return X86EMUL_CONTINUE;
825+
}
826+
827+
if (ctxt->eflags & X86_EFLAGS_VM) {
828+
/* Protected/VM86 mode. cpu must not have long mode active */
829+
if (efer & EFER_LMA)
830+
return X86EMUL_UNHANDLEABLE;
831+
ctxt->mode = X86EMUL_MODE_VM86;
832+
return X86EMUL_CONTINUE;
833+
}
834+
835+
if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS))
836+
return X86EMUL_UNHANDLEABLE;
837+
838+
if (efer & EFER_LMA) {
839+
if (cs.l) {
840+
/* Proper long mode */
841+
ctxt->mode = X86EMUL_MODE_PROT64;
842+
} else if (cs.d) {
843+
/* 32 bit compatibility mode*/
844+
ctxt->mode = X86EMUL_MODE_PROT32;
845+
} else {
846+
ctxt->mode = X86EMUL_MODE_PROT16;
847+
}
848+
} else {
849+
/* Legacy 32 bit / 16 bit mode */
850+
ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
851+
}
852+
853+
return X86EMUL_CONTINUE;
854+
}
855+
811856
static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
812857
{
813-
return assign_eip(ctxt, dst, ctxt->mode);
858+
return assign_eip(ctxt, dst);
814859
}
815860

816-
static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
817-
const struct desc_struct *cs_desc)
861+
static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst)
818862
{
819-
enum x86emul_mode mode = ctxt->mode;
820-
int rc;
863+
int rc = emulator_recalc_and_set_mode(ctxt);
821864

822-
#ifdef CONFIG_X86_64
823-
if (ctxt->mode >= X86EMUL_MODE_PROT16) {
824-
if (cs_desc->l) {
825-
u64 efer = 0;
865+
if (rc != X86EMUL_CONTINUE)
866+
return rc;
826867

827-
ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
828-
if (efer & EFER_LMA)
829-
mode = X86EMUL_MODE_PROT64;
830-
} else
831-
mode = X86EMUL_MODE_PROT32; /* temporary value */
832-
}
833-
#endif
834-
if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
835-
mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
836-
rc = assign_eip(ctxt, dst, mode);
837-
if (rc == X86EMUL_CONTINUE)
838-
ctxt->mode = mode;
839-
return rc;
868+
return assign_eip(ctxt, dst);
840869
}
841870

842871
static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
@@ -2172,7 +2201,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
21722201
if (rc != X86EMUL_CONTINUE)
21732202
return rc;
21742203

2175-
rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
2204+
rc = assign_eip_far(ctxt, ctxt->src.val);
21762205
/* Error handling is not implemented. */
21772206
if (rc != X86EMUL_CONTINUE)
21782207
return X86EMUL_UNHANDLEABLE;
@@ -2250,7 +2279,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
22502279
&new_desc);
22512280
if (rc != X86EMUL_CONTINUE)
22522281
return rc;
2253-
rc = assign_eip_far(ctxt, eip, &new_desc);
2282+
rc = assign_eip_far(ctxt, eip);
22542283
/* Error handling is not implemented. */
22552284
if (rc != X86EMUL_CONTINUE)
22562285
return X86EMUL_UNHANDLEABLE;
@@ -2432,7 +2461,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
24322461
ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
24332462
ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0);
24342463

2435-
for (i = 0; i < NR_EMULATOR_GPRS; i++)
2464+
for (i = 0; i < 8; i++)
24362465
*reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
24372466

24382467
val = GET_SMSTATE(u32, smstate, 0x7fcc);
@@ -2489,7 +2518,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
24892518
u16 selector;
24902519
int i, r;
24912520

2492-
for (i = 0; i < NR_EMULATOR_GPRS; i++)
2521+
for (i = 0; i < 16; i++)
24932522
*reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
24942523

24952524
ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78);
@@ -2633,7 +2662,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
26332662
* those side effects need to be explicitly handled for both success
26342663
* and shutdown.
26352664
*/
2636-
return X86EMUL_CONTINUE;
2665+
return emulator_recalc_and_set_mode(ctxt);
26372666

26382667
emulate_shutdown:
26392668
ctxt->ops->triple_fault(ctxt);
@@ -2876,6 +2905,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
28762905
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
28772906

28782907
ctxt->_eip = rdx;
2908+
ctxt->mode = usermode;
28792909
*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
28802910

28812911
return X86EMUL_CONTINUE;
@@ -3469,7 +3499,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
34693499
if (rc != X86EMUL_CONTINUE)
34703500
return rc;
34713501

3472-
rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
3502+
rc = assign_eip_far(ctxt, ctxt->src.val);
34733503
if (rc != X86EMUL_CONTINUE)
34743504
goto fail;
34753505

@@ -3611,11 +3641,25 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
36113641

36123642
static int em_cr_write(struct x86_emulate_ctxt *ctxt)
36133643
{
3614-
if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val))
3644+
int cr_num = ctxt->modrm_reg;
3645+
int r;
3646+
3647+
if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val))
36153648
return emulate_gp(ctxt, 0);
36163649

36173650
/* Disable writeback. */
36183651
ctxt->dst.type = OP_NONE;
3652+
3653+
if (cr_num == 0) {
3654+
/*
3655+
* CR0 write might have updated CR0.PE and/or CR0.PG
3656+
* which can affect the cpu's execution mode.
3657+
*/
3658+
r = emulator_recalc_and_set_mode(ctxt);
3659+
if (r != X86EMUL_CONTINUE)
3660+
return r;
3661+
}
3662+
36193663
return X86EMUL_CONTINUE;
36203664
}
36213665

arch/x86/kvm/vmx/vmx.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8263,6 +8263,11 @@ static __init int hardware_setup(void)
82638263
if (!cpu_has_virtual_nmis())
82648264
enable_vnmi = 0;
82658265

8266+
#ifdef CONFIG_X86_SGX_KVM
8267+
if (!cpu_has_vmx_encls_vmexit())
8268+
enable_sgx = false;
8269+
#endif
8270+
82668271
/*
82678272
* set_apic_access_page_addr() is used to reload apic access
82688273
* page upon invalidation. No need to do anything if not

arch/x86/kvm/x86.c

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2315,11 +2315,11 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
23152315

23162316
/* we verify if the enable bit is set... */
23172317
if (system_time & 1) {
2318-
kvm_gfn_to_pfn_cache_init(vcpu->kvm, &vcpu->arch.pv_time, vcpu,
2319-
KVM_HOST_USES_PFN, system_time & ~1ULL,
2320-
sizeof(struct pvclock_vcpu_time_info));
2318+
kvm_gpc_activate(vcpu->kvm, &vcpu->arch.pv_time, vcpu,
2319+
KVM_HOST_USES_PFN, system_time & ~1ULL,
2320+
sizeof(struct pvclock_vcpu_time_info));
23212321
} else {
2322-
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time);
2322+
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time);
23232323
}
23242324

23252325
return;
@@ -3388,7 +3388,7 @@ static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
33883388

33893389
static void kvmclock_reset(struct kvm_vcpu *vcpu)
33903390
{
3391-
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.pv_time);
3391+
kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time);
33923392
vcpu->arch.time = 0;
33933393
}
33943394

@@ -10044,7 +10044,20 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
1004410044
kvm_x86_ops.nested_ops->has_events(vcpu))
1004510045
*req_immediate_exit = true;
1004610046

10047-
WARN_ON(kvm_is_exception_pending(vcpu));
10047+
/*
10048+
* KVM must never queue a new exception while injecting an event; KVM
10049+
* is done emulating and should only propagate the to-be-injected event
10050+
* to the VMCS/VMCB. Queueing a new exception can put the vCPU into an
10051+
* infinite loop as KVM will bail from VM-Enter to inject the pending
10052+
* exception and start the cycle all over.
10053+
*
10054+
* Exempt triple faults as they have special handling and won't put the
10055+
* vCPU into an infinite loop. Triple fault can be queued when running
10056+
* VMX without unrestricted guest, as that requires KVM to emulate Real
10057+
* Mode events (see kvm_inject_realmode_interrupt()).
10058+
*/
10059+
WARN_ON_ONCE(vcpu->arch.exception.pending ||
10060+
vcpu->arch.exception_vmexit.pending);
1004810061
return 0;
1004910062

1005010063
out:
@@ -11816,6 +11829,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
1181611829
vcpu->arch.regs_avail = ~0;
1181711830
vcpu->arch.regs_dirty = ~0;
1181811831

11832+
kvm_gpc_init(&vcpu->arch.pv_time);
11833+
1181911834
if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
1182011835
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
1182111836
else

0 commit comments

Comments
 (0)