Skip to content

Commit 1e21b53

Browse files
committed
Merge branch 'kvm-vmx-ve' into HEAD
Allow a non-zero value for non-present SPTE and removed SPTE, so that TDX can set the "suppress VE" bit.
2 parents 40269c0 + 8131cf5 commit 1e21b53

File tree

13 files changed

+167
-43
lines changed

13 files changed

+167
-43
lines changed

arch/x86/include/asm/kvm_host.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,6 +1313,8 @@ struct kvm_arch {
13131313
*/
13141314
spinlock_t mmu_unsync_pages_lock;
13151315

1316+
u64 shadow_mmio_value;
1317+
13161318
struct iommu_domain *iommu_domain;
13171319
bool iommu_noncoherent;
13181320
#define __KVM_HAVE_ARCH_NONCOHERENT_DMA

arch/x86/include/asm/vmx.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
#define SECONDARY_EXEC_ENCLS_EXITING VMCS_CONTROL_BIT(ENCLS_EXITING)
7272
#define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING)
7373
#define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING)
74+
#define SECONDARY_EXEC_EPT_VIOLATION_VE VMCS_CONTROL_BIT(EPT_VIOLATION_VE)
7475
#define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX)
7576
#define SECONDARY_EXEC_ENABLE_XSAVES VMCS_CONTROL_BIT(XSAVES)
7677
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
@@ -226,6 +227,8 @@ enum vmcs_field {
226227
VMREAD_BITMAP_HIGH = 0x00002027,
227228
VMWRITE_BITMAP = 0x00002028,
228229
VMWRITE_BITMAP_HIGH = 0x00002029,
230+
VE_INFORMATION_ADDRESS = 0x0000202A,
231+
VE_INFORMATION_ADDRESS_HIGH = 0x0000202B,
229232
XSS_EXIT_BITMAP = 0x0000202C,
230233
XSS_EXIT_BITMAP_HIGH = 0x0000202D,
231234
ENCLS_EXITING_BITMAP = 0x0000202E,
@@ -514,6 +517,7 @@ enum vmcs_field {
514517
#define VMX_EPT_IPAT_BIT (1ull << 6)
515518
#define VMX_EPT_ACCESS_BIT (1ull << 8)
516519
#define VMX_EPT_DIRTY_BIT (1ull << 9)
520+
#define VMX_EPT_SUPPRESS_VE_BIT (1ull << 63)
517521
#define VMX_EPT_RWX_MASK (VMX_EPT_READABLE_MASK | \
518522
VMX_EPT_WRITABLE_MASK | \
519523
VMX_EPT_EXECUTABLE_MASK)
@@ -630,4 +634,13 @@ enum vmx_l1d_flush_state {
630634

631635
extern enum vmx_l1d_flush_state l1tf_vmx_mitigation;
632636

637+
struct vmx_ve_information {
638+
u32 exit_reason;
639+
u32 delivery;
640+
u64 exit_qualification;
641+
u64 guest_linear_address;
642+
u64 guest_physical_address;
643+
u16 eptp_index;
644+
};
645+
633646
#endif

arch/x86/kvm/Kconfig

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,19 @@ config KVM_INTEL
9595
To compile this as a module, choose M here: the module
9696
will be called kvm-intel.
9797

98+
config KVM_INTEL_PROVE_VE
99+
bool "Check that guests do not receive #VE exceptions"
100+
default KVM_PROVE_MMU || DEBUG_KERNEL
101+
depends on KVM_INTEL
102+
help
103+
104+
Checks that KVM's page table management code will not incorrectly
105+
let guests receive a virtualization exception. Virtualization
106+
exceptions will be trapped by the hypervisor rather than injected
107+
in the guest.
108+
109+
If unsure, say N.
110+
98111
config X86_SGX_KVM
99112
bool "Software Guard eXtensions (SGX) Virtualization"
100113
depends on X86_SGX && KVM_INTEL

arch/x86/kvm/mmu/mmu.c

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -567,9 +567,9 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
567567

568568
if (!is_shadow_present_pte(old_spte) ||
569569
!spte_has_volatile_bits(old_spte))
570-
__update_clear_spte_fast(sptep, 0ull);
570+
__update_clear_spte_fast(sptep, SHADOW_NONPRESENT_VALUE);
571571
else
572-
old_spte = __update_clear_spte_slow(sptep, 0ull);
572+
old_spte = __update_clear_spte_slow(sptep, SHADOW_NONPRESENT_VALUE);
573573

574574
if (!is_shadow_present_pte(old_spte))
575575
return old_spte;
@@ -603,7 +603,7 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 *sptep)
603603
*/
604604
static void mmu_spte_clear_no_track(u64 *sptep)
605605
{
606-
__update_clear_spte_fast(sptep, 0ull);
606+
__update_clear_spte_fast(sptep, SHADOW_NONPRESENT_VALUE);
607607
}
608608

609609
static u64 mmu_spte_get_lockless(u64 *sptep)
@@ -1897,7 +1897,8 @@ static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
18971897

18981898
static int kvm_sync_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int i)
18991899
{
1900-
if (!sp->spt[i])
1900+
/* sp->spt[i] has initial value of shadow page table allocation */
1901+
if (sp->spt[i] == SHADOW_NONPRESENT_VALUE)
19011902
return 0;
19021903

19031904
return vcpu->arch.mmu->sync_spte(vcpu, sp, i);
@@ -2461,7 +2462,7 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
24612462
return kvm_mmu_prepare_zap_page(kvm, child,
24622463
invalid_list);
24632464
}
2464-
} else if (is_mmio_spte(pte)) {
2465+
} else if (is_mmio_spte(kvm, pte)) {
24652466
mmu_spte_clear_no_track(spte);
24662467
}
24672468
return 0;
@@ -4143,7 +4144,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
41434144
if (WARN_ON_ONCE(reserved))
41444145
return -EINVAL;
41454146

4146-
if (is_mmio_spte(spte)) {
4147+
if (is_mmio_spte(vcpu->kvm, spte)) {
41474148
gfn_t gfn = get_mmio_spte_gfn(spte);
41484149
unsigned int access = get_mmio_spte_access(spte);
41494150

@@ -4759,7 +4760,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd);
47594760
static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
47604761
unsigned int access)
47614762
{
4762-
if (unlikely(is_mmio_spte(*sptep))) {
4763+
if (unlikely(is_mmio_spte(vcpu->kvm, *sptep))) {
47634764
if (gfn != get_mmio_spte_gfn(*sptep)) {
47644765
mmu_spte_clear_no_track(sptep);
47654766
return true;
@@ -6120,7 +6121,10 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
61206121
vcpu->arch.mmu_page_header_cache.kmem_cache = mmu_page_header_cache;
61216122
vcpu->arch.mmu_page_header_cache.gfp_zero = __GFP_ZERO;
61226123

6123-
vcpu->arch.mmu_shadow_page_cache.gfp_zero = __GFP_ZERO;
6124+
vcpu->arch.mmu_shadow_page_cache.init_value =
6125+
SHADOW_NONPRESENT_VALUE;
6126+
if (!vcpu->arch.mmu_shadow_page_cache.init_value)
6127+
vcpu->arch.mmu_shadow_page_cache.gfp_zero = __GFP_ZERO;
61246128

61256129
vcpu->arch.mmu = &vcpu->arch.root_mmu;
61266130
vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
@@ -6263,6 +6267,7 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
62636267

62646268
void kvm_mmu_init_vm(struct kvm *kvm)
62656269
{
6270+
kvm->arch.shadow_mmio_value = shadow_mmio_value;
62666271
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
62676272
INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
62686273
INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);

arch/x86/kvm/mmu/paging_tmpl.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -911,7 +911,7 @@ static int FNAME(sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int
911911
gpa_t pte_gpa;
912912
gfn_t gfn;
913913

914-
if (WARN_ON_ONCE(!sp->spt[i]))
914+
if (WARN_ON_ONCE(sp->spt[i] == SHADOW_NONPRESENT_VALUE))
915915
return 0;
916916

917917
first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
@@ -933,13 +933,13 @@ static int FNAME(sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int
933933
return 0;
934934

935935
/*
936-
* Drop the SPTE if the new protections would result in a RWX=0
937-
* SPTE or if the gfn is changing. The RWX=0 case only affects
938-
* EPT with execute-only support, i.e. EPT without an effective
939-
* "present" bit, as all other paging modes will create a
940-
* read-only SPTE if pte_access is zero.
936+
* Drop the SPTE if the new protections result in no effective
937+
* "present" bit or if the gfn is changing. The former case
938+
* only affects EPT with execute-only support with pte_access==0;
939+
* all other paging modes will create a read-only SPTE if
940+
* pte_access is zero.
941941
*/
942-
if ((!pte_access && !shadow_present_mask) ||
942+
if ((pte_access | shadow_present_mask) == SHADOW_NONPRESENT_VALUE ||
943943
gfn != kvm_mmu_page_get_gfn(sp, i)) {
944944
drop_spte(vcpu->kvm, &sp->spt[i]);
945945
return 1;

arch/x86/kvm/mmu/spte.c

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,10 @@ u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access)
7474
u64 spte = generation_mmio_spte_mask(gen);
7575
u64 gpa = gfn << PAGE_SHIFT;
7676

77-
WARN_ON_ONCE(!shadow_mmio_value);
77+
WARN_ON_ONCE(!vcpu->kvm->arch.shadow_mmio_value);
7878

7979
access &= shadow_mmio_access_mask;
80-
spte |= shadow_mmio_value | access;
80+
spte |= vcpu->kvm->arch.shadow_mmio_value | access;
8181
spte |= gpa | shadow_nonpresent_or_rsvd_mask;
8282
spte |= (gpa & shadow_nonpresent_or_rsvd_mask)
8383
<< SHADOW_NONPRESENT_OR_RSVD_MASK_LEN;
@@ -144,19 +144,19 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
144144
u64 spte = SPTE_MMU_PRESENT_MASK;
145145
bool wrprot = false;
146146

147-
WARN_ON_ONCE(!pte_access && !shadow_present_mask);
147+
/*
148+
* For the EPT case, shadow_present_mask has no RWX bits set if
149+
* exec-only page table entries are supported. In that case,
150+
* ACC_USER_MASK and shadow_user_mask are used to represent
151+
* read access. See FNAME(gpte_access) in paging_tmpl.h.
152+
*/
153+
WARN_ON_ONCE((pte_access | shadow_present_mask) == SHADOW_NONPRESENT_VALUE);
148154

149155
if (sp->role.ad_disabled)
150156
spte |= SPTE_TDP_AD_DISABLED;
151157
else if (kvm_mmu_page_ad_need_write_protect(sp))
152158
spte |= SPTE_TDP_AD_WRPROT_ONLY;
153159

154-
/*
155-
* For the EPT case, shadow_present_mask is 0 if hardware
156-
* supports exec-only page table entries. In that case,
157-
* ACC_USER_MASK and shadow_user_mask are used to represent
158-
* read access. See FNAME(gpte_access) in paging_tmpl.h.
159-
*/
160160
spte |= shadow_present_mask;
161161
if (!prefetch)
162162
spte |= spte_shadow_accessed_mask(spte);
@@ -413,7 +413,9 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only)
413413
shadow_dirty_mask = has_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull;
414414
shadow_nx_mask = 0ull;
415415
shadow_x_mask = VMX_EPT_EXECUTABLE_MASK;
416-
shadow_present_mask = has_exec_only ? 0ull : VMX_EPT_READABLE_MASK;
416+
/* VMX_EPT_SUPPRESS_VE_BIT is needed for W or X violation. */
417+
shadow_present_mask =
418+
(has_exec_only ? 0ull : VMX_EPT_READABLE_MASK) | VMX_EPT_SUPPRESS_VE_BIT;
417419
/*
418420
* EPT overrides the host MTRRs, and so KVM must program the desired
419421
* memtype directly into the SPTEs. Note, this mask is just the mask
@@ -430,7 +432,7 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only)
430432
* of an EPT paging-structure entry is 110b (write/execute).
431433
*/
432434
kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE,
433-
VMX_EPT_RWX_MASK, 0);
435+
VMX_EPT_RWX_MASK | VMX_EPT_SUPPRESS_VE_BIT, 0);
434436
}
435437
EXPORT_SYMBOL_GPL(kvm_mmu_set_ept_masks);
436438

arch/x86/kvm/mmu/spte.h

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,22 @@ static_assert(MMIO_SPTE_GEN_LOW_BITS == 8 && MMIO_SPTE_GEN_HIGH_BITS == 11);
149149

150150
#define MMIO_SPTE_GEN_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_BITS + MMIO_SPTE_GEN_HIGH_BITS - 1, 0)
151151

152+
/*
153+
* Non-present SPTE value needs to set bit 63 for TDX, in order to suppress
154+
* #VE and get EPT violations on non-present PTEs. We can use the
155+
* same value also without TDX for both VMX and SVM:
156+
*
157+
* For SVM NPT, for non-present spte (bit 0 = 0), other bits are ignored.
158+
* For VMX EPT, bit 63 is ignored if #VE is disabled. (EPT_VIOLATION_VE=0)
159+
* bit 63 is #VE suppress if #VE is enabled. (EPT_VIOLATION_VE=1)
160+
*/
161+
#ifdef CONFIG_X86_64
162+
#define SHADOW_NONPRESENT_VALUE BIT_ULL(63)
163+
static_assert(!(SHADOW_NONPRESENT_VALUE & SPTE_MMU_PRESENT_MASK));
164+
#else
165+
#define SHADOW_NONPRESENT_VALUE 0ULL
166+
#endif
167+
152168
extern u64 __read_mostly shadow_host_writable_mask;
153169
extern u64 __read_mostly shadow_mmu_writable_mask;
154170
extern u64 __read_mostly shadow_nx_mask;
@@ -190,11 +206,11 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
190206
*
191207
* Use a semi-arbitrary value that doesn't set RWX bits, i.e. is not-present on
192208
* both AMD and Intel CPUs, and doesn't set PFN bits, i.e. doesn't create a L1TF
193-
* vulnerability. Use only low bits to avoid 64-bit immediates.
209+
* vulnerability.
194210
*
195211
* Only used by the TDP MMU.
196212
*/
197-
#define REMOVED_SPTE 0x5a0ULL
213+
#define REMOVED_SPTE (SHADOW_NONPRESENT_VALUE | 0x5a0ULL)
198214

199215
/* Removed SPTEs must not be misconstrued as shadow present PTEs. */
200216
static_assert(!(REMOVED_SPTE & SPTE_MMU_PRESENT_MASK));
@@ -249,9 +265,9 @@ static inline struct kvm_mmu_page *root_to_sp(hpa_t root)
249265
return spte_to_child_sp(root);
250266
}
251267

252-
static inline bool is_mmio_spte(u64 spte)
268+
static inline bool is_mmio_spte(struct kvm *kvm, u64 spte)
253269
{
254-
return (spte & shadow_mmio_mask) == shadow_mmio_value &&
270+
return (spte & shadow_mmio_mask) == kvm->arch.shadow_mmio_value &&
255271
likely(enable_mmio_caching);
256272
}
257273

arch/x86/kvm/mmu/tdp_mmu.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -495,8 +495,8 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
495495
* impact the guest since both the former and current SPTEs
496496
* are nonpresent.
497497
*/
498-
if (WARN_ON_ONCE(!is_mmio_spte(old_spte) &&
499-
!is_mmio_spte(new_spte) &&
498+
if (WARN_ON_ONCE(!is_mmio_spte(kvm, old_spte) &&
499+
!is_mmio_spte(kvm, new_spte) &&
500500
!is_removed_spte(new_spte)))
501501
pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
502502
"should not be replaced with another,\n"
@@ -603,7 +603,7 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
603603
* here since the SPTE is going from non-present to non-present. Use
604604
* the raw write helper to avoid an unnecessary check on volatile bits.
605605
*/
606-
__kvm_tdp_mmu_write_spte(iter->sptep, 0);
606+
__kvm_tdp_mmu_write_spte(iter->sptep, SHADOW_NONPRESENT_VALUE);
607607

608608
return 0;
609609
}
@@ -740,8 +740,8 @@ static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
740740
continue;
741741

742742
if (!shared)
743-
tdp_mmu_iter_set_spte(kvm, &iter, 0);
744-
else if (tdp_mmu_set_spte_atomic(kvm, &iter, 0))
743+
tdp_mmu_iter_set_spte(kvm, &iter, SHADOW_NONPRESENT_VALUE);
744+
else if (tdp_mmu_set_spte_atomic(kvm, &iter, SHADOW_NONPRESENT_VALUE))
745745
goto retry;
746746
}
747747
}
@@ -808,8 +808,8 @@ bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
808808
if (WARN_ON_ONCE(!is_shadow_present_pte(old_spte)))
809809
return false;
810810

811-
tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte, 0,
812-
sp->gfn, sp->role.level + 1);
811+
tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte,
812+
SHADOW_NONPRESENT_VALUE, sp->gfn, sp->role.level + 1);
813813

814814
return true;
815815
}
@@ -843,7 +843,7 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
843843
!is_last_spte(iter.old_spte, iter.level))
844844
continue;
845845

846-
tdp_mmu_iter_set_spte(kvm, &iter, 0);
846+
tdp_mmu_iter_set_spte(kvm, &iter, SHADOW_NONPRESENT_VALUE);
847847

848848
/*
849849
* Zappings SPTEs in invalid roots doesn't require a TLB flush,
@@ -1028,7 +1028,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
10281028
}
10291029

10301030
/* If a MMIO SPTE is installed, the MMIO will need to be emulated. */
1031-
if (unlikely(is_mmio_spte(new_spte))) {
1031+
if (unlikely(is_mmio_spte(vcpu->kvm, new_spte))) {
10321032
vcpu->stat.pf_mmio_spte_created++;
10331033
trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn,
10341034
new_spte);

arch/x86/kvm/vmx/vmcs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,11 @@ static inline bool is_nm_fault(u32 intr_info)
140140
return is_exception_n(intr_info, NM_VECTOR);
141141
}
142142

143+
static inline bool is_ve_fault(u32 intr_info)
144+
{
145+
return is_exception_n(intr_info, VE_VECTOR);
146+
}
147+
143148
/* Undocumented: icebp/int1 */
144149
static inline bool is_icebp(u32 intr_info)
145150
{

0 commit comments

Comments
 (0)