Skip to content

Commit f0e7012

Browse files
committed
KVM: x86: Bypass register cache when querying CPL from kvm_sched_out()
When querying guest CPL to determine if a vCPU was preempted while in kernel mode, bypass the register cache, i.e. always read SS.AR_BYTES from the VMCS on Intel CPUs. If the kernel is running with full preemption enabled, using the register cache in the preemption path can result in stale and/or uninitialized data being cached in the segment cache. In particular the following scenario is currently possible: - vCPU is just created, and the vCPU thread is preempted before SS.AR_BYTES is written in vmx_vcpu_reset(). - When scheduling out the vCPU task, kvm_arch_vcpu_in_kernel() => vmx_get_cpl() reads and caches '0' for SS.AR_BYTES. - vmx_vcpu_reset() => seg_setup() configures SS.AR_BYTES, but doesn't invoke vmx_segment_cache_clear() to invalidate the cache. As a result, KVM retains a stale value in the cache, which can be read, e.g. via KVM_GET_SREGS. Usually this is not a problem because the VMX segment cache is reset on each VM-Exit, but if the userspace VMM (e.g KVM selftests) reads and writes system registers just after the vCPU was created, _without_ modifying SS.AR_BYTES, userspace will write back the stale '0' value and ultimately will trigger a VM-Entry failure due to incorrect SS segment type. Note, the VM-Enter failure can also be avoided by moving the call to vmx_segment_cache_clear() until after the vmx_vcpu_reset() initializes all segments. However, while that change is correct and desirable (and will come along shortly), it does not address the underlying problem that accessing KVM's register caches from !task context is generally unsafe. In addition to fixing the immediate bug, bypassing the cache for this particular case will allow hardening KVM register caching log to assert that the caches are accessed only when KVM _knows_ it is safe to do so. Fixes: de63ad4 ("KVM: X86: implement the logic for spinlock optimization") Reported-by: Maxim Levitsky <[email protected]> Closes: https://lore.kernel.org/all/[email protected] Reviewed-by: Maxim Levitsky <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Sean Christopherson <[email protected]>
1 parent de57249 commit f0e7012

File tree

7 files changed

+30
-6
lines changed

7 files changed

+30
-6
lines changed

arch/x86/include/asm/kvm-x86-ops.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ KVM_X86_OP(set_msr)
3434
KVM_X86_OP(get_segment_base)
3535
KVM_X86_OP(get_segment)
3636
KVM_X86_OP(get_cpl)
37+
KVM_X86_OP(get_cpl_no_cache)
3738
KVM_X86_OP(set_segment)
3839
KVM_X86_OP(get_cs_db_l_bits)
3940
KVM_X86_OP(is_valid_cr0)

arch/x86/include/asm/kvm_host.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1656,6 +1656,7 @@ struct kvm_x86_ops {
16561656
void (*get_segment)(struct kvm_vcpu *vcpu,
16571657
struct kvm_segment *var, int seg);
16581658
int (*get_cpl)(struct kvm_vcpu *vcpu);
1659+
int (*get_cpl_no_cache)(struct kvm_vcpu *vcpu);
16591660
void (*set_segment)(struct kvm_vcpu *vcpu,
16601661
struct kvm_segment *var, int seg);
16611662
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);

arch/x86/kvm/svm/svm.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5031,6 +5031,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
50315031
.get_segment = svm_get_segment,
50325032
.set_segment = svm_set_segment,
50335033
.get_cpl = svm_get_cpl,
5034+
.get_cpl_no_cache = svm_get_cpl,
50345035
.get_cs_db_l_bits = svm_get_cs_db_l_bits,
50355036
.is_valid_cr0 = svm_is_valid_cr0,
50365037
.set_cr0 = svm_set_cr0,

arch/x86/kvm/vmx/main.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
5050
.get_segment = vmx_get_segment,
5151
.set_segment = vmx_set_segment,
5252
.get_cpl = vmx_get_cpl,
53+
.get_cpl_no_cache = vmx_get_cpl_no_cache,
5354
.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
5455
.is_valid_cr0 = vmx_is_valid_cr0,
5556
.set_cr0 = vmx_set_cr0,

arch/x86/kvm/vmx/vmx.c

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3568,16 +3568,29 @@ u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
35683568
return vmx_read_guest_seg_base(to_vmx(vcpu), seg);
35693569
}
35703570

3571-
int vmx_get_cpl(struct kvm_vcpu *vcpu)
3571+
static int __vmx_get_cpl(struct kvm_vcpu *vcpu, bool no_cache)
35723572
{
35733573
struct vcpu_vmx *vmx = to_vmx(vcpu);
3574+
int ar;
35743575

35753576
if (unlikely(vmx->rmode.vm86_active))
35763577
return 0;
3577-
else {
3578-
int ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
3579-
return VMX_AR_DPL(ar);
3580-
}
3578+
3579+
if (no_cache)
3580+
ar = vmcs_read32(GUEST_SS_AR_BYTES);
3581+
else
3582+
ar = vmx_read_guest_seg_ar(vmx, VCPU_SREG_SS);
3583+
return VMX_AR_DPL(ar);
3584+
}
3585+
3586+
int vmx_get_cpl(struct kvm_vcpu *vcpu)
3587+
{
3588+
return __vmx_get_cpl(vcpu, false);
3589+
}
3590+
3591+
int vmx_get_cpl_no_cache(struct kvm_vcpu *vcpu)
3592+
{
3593+
return __vmx_get_cpl(vcpu, true);
35813594
}
35823595

35833596
static u32 vmx_segment_access_rights(struct kvm_segment *var)

arch/x86/kvm/vmx/vmx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
385385
void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
386386
unsigned long fs_base, unsigned long gs_base);
387387
int vmx_get_cpl(struct kvm_vcpu *vcpu);
388+
int vmx_get_cpl_no_cache(struct kvm_vcpu *vcpu);
388389
bool vmx_emulation_required(struct kvm_vcpu *vcpu);
389390
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
390391
void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);

arch/x86/kvm/x86.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5095,7 +5095,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
50955095
int idx;
50965096

50975097
if (vcpu->preempted) {
5098-
vcpu->arch.preempted_in_kernel = kvm_arch_vcpu_in_kernel(vcpu);
5098+
/*
5099+
* Assume protected guests are in-kernel. Inefficient yielding
5100+
* due to false positives is preferable to never yielding due
5101+
* to false negatives.
5102+
*/
5103+
vcpu->arch.preempted_in_kernel = vcpu->arch.guest_state_protected ||
5104+
!kvm_x86_call(get_cpl_no_cache)(vcpu);
50995105

51005106
/*
51015107
* Take the srcu lock as memslots will be accessed to check the gfn

0 commit comments

Comments
 (0)