Skip to content

Commit 989a84c

Browse files
committed
KVM: x86/mmu: Trigger unprotect logic only on write-protection page faults
Trigger KVM's various "unprotect gfn" paths if and only if the page fault was a write to a write-protected gfn. To do so, add a new page fault return code, RET_PF_WRITE_PROTECTED, to explicitly and precisely track such page faults. If a page fault requires emulation for any MMIO (or any reason besides write-protection), trying to unprotect the gfn is pointless and risks putting the vCPU into an infinite loop. E.g. KVM will put the vCPU into an infinite loop if the vCPU manages to trigger MMIO on a page table walk. Fixes: 1472775 ("kvm: svm: Add support for additional SVM NPF error codes") Reviewed-by: Yuan Yao <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Sean Christopherson <[email protected]>
1 parent 4ececec commit 989a84c

File tree

5 files changed

+50
-37
lines changed

5 files changed

+50
-37
lines changed

arch/x86/kvm/mmu/mmu.c

Lines changed: 43 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2896,10 +2896,8 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
28962896
trace_kvm_mmu_set_spte(level, gfn, sptep);
28972897
}
28982898

2899-
if (wrprot) {
2900-
if (write_fault)
2901-
ret = RET_PF_EMULATE;
2902-
}
2899+
if (wrprot && write_fault)
2900+
ret = RET_PF_WRITE_PROTECTED;
29032901

29042902
if (flush)
29052903
kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level);
@@ -4531,7 +4529,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
45314529
return RET_PF_RETRY;
45324530

45334531
if (page_fault_handle_page_track(vcpu, fault))
4534-
return RET_PF_EMULATE;
4532+
return RET_PF_WRITE_PROTECTED;
45354533

45364534
r = fast_page_fault(vcpu, fault);
45374535
if (r != RET_PF_INVALID)
@@ -4624,7 +4622,7 @@ static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu,
46244622
int r;
46254623

46264624
if (page_fault_handle_page_track(vcpu, fault))
4627-
return RET_PF_EMULATE;
4625+
return RET_PF_WRITE_PROTECTED;
46284626

46294627
r = fast_page_fault(vcpu, fault);
46304628
if (r != RET_PF_INVALID)
@@ -4703,6 +4701,7 @@ static int kvm_tdp_map_page(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code,
47034701
switch (r) {
47044702
case RET_PF_FIXED:
47054703
case RET_PF_SPURIOUS:
4704+
case RET_PF_WRITE_PROTECTED:
47064705
return 0;
47074706

47084707
case RET_PF_EMULATE:
@@ -5952,6 +5951,40 @@ static bool is_write_to_guest_page_table(u64 error_code)
59525951
return (error_code & mask) == mask;
59535952
}
59545953

5954+
static int kvm_mmu_write_protect_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
5955+
u64 error_code, int *emulation_type)
5956+
{
5957+
bool direct = vcpu->arch.mmu->root_role.direct;
5958+
5959+
/*
5960+
* Before emulating the instruction, check if the error code
5961+
* was due to a RO violation while translating the guest page.
5962+
* This can occur when using nested virtualization with nested
5963+
* paging in both guests. If true, we simply unprotect the page
5964+
* and resume the guest.
5965+
*/
5966+
if (direct && is_write_to_guest_page_table(error_code)) {
5967+
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa));
5968+
return RET_PF_RETRY;
5969+
}
5970+
5971+
/*
5972+
* The gfn is write-protected, but if emulation fails we can still
5973+
* optimistically try to just unprotect the page and let the processor
5974+
* re-execute the instruction that caused the page fault. Do not allow
5975+
* retrying MMIO emulation, as it's not only pointless but could also
5976+
* cause us to enter an infinite loop because the processor will keep
5977+
* faulting on the non-existent MMIO address. Retrying an instruction
5978+
* from a nested guest is also pointless and dangerous as we are only
5979+
* explicitly shadowing L1's page tables, i.e. unprotecting something
5980+
* for L1 isn't going to magically fix whatever issue cause L2 to fail.
5981+
*/
5982+
if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu))
5983+
*emulation_type |= EMULTYPE_ALLOW_RETRY_PF;
5984+
5985+
return RET_PF_EMULATE;
5986+
}
5987+
59555988
int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
59565989
void *insn, int insn_len)
59575990
{
@@ -5997,6 +6030,10 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
59976030
if (r < 0)
59986031
return r;
59996032

6033+
if (r == RET_PF_WRITE_PROTECTED)
6034+
r = kvm_mmu_write_protect_fault(vcpu, cr2_or_gpa, error_code,
6035+
&emulation_type);
6036+
60006037
if (r == RET_PF_FIXED)
60016038
vcpu->stat.pf_fixed++;
60026039
else if (r == RET_PF_EMULATE)
@@ -6007,32 +6044,6 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
60076044
if (r != RET_PF_EMULATE)
60086045
return 1;
60096046

6010-
/*
6011-
* Before emulating the instruction, check if the error code
6012-
* was due to a RO violation while translating the guest page.
6013-
* This can occur when using nested virtualization with nested
6014-
* paging in both guests. If true, we simply unprotect the page
6015-
* and resume the guest.
6016-
*/
6017-
if (vcpu->arch.mmu->root_role.direct &&
6018-
is_write_to_guest_page_table(error_code)) {
6019-
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa));
6020-
return 1;
6021-
}
6022-
6023-
/*
6024-
* vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still
6025-
* optimistically try to just unprotect the page and let the processor
6026-
* re-execute the instruction that caused the page fault. Do not allow
6027-
* retrying MMIO emulation, as it's not only pointless but could also
6028-
* cause us to enter an infinite loop because the processor will keep
6029-
* faulting on the non-existent MMIO address. Retrying an instruction
6030-
* from a nested guest is also pointless and dangerous as we are only
6031-
* explicitly shadowing L1's page tables, i.e. unprotecting something
6032-
* for L1 isn't going to magically fix whatever issue cause L2 to fail.
6033-
*/
6034-
if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu))
6035-
emulation_type |= EMULTYPE_ALLOW_RETRY_PF;
60366047
emulate:
60376048
return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn,
60386049
insn_len);

arch/x86/kvm/mmu/mmu_internal.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,8 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
258258
* RET_PF_CONTINUE: So far, so good, keep handling the page fault.
259259
* RET_PF_RETRY: let CPU fault again on the address.
260260
* RET_PF_EMULATE: mmio page fault, emulate the instruction directly.
261+
* RET_PF_WRITE_PROTECTED: the gfn is write-protected, either unprotected the
262+
* gfn and retry, or emulate the instruction directly.
261263
* RET_PF_INVALID: the spte is invalid, let the real page fault path update it.
262264
* RET_PF_FIXED: The faulting entry has been fixed.
263265
* RET_PF_SPURIOUS: The faulting entry was already fixed, e.g. by another vCPU.
@@ -274,6 +276,7 @@ enum {
274276
RET_PF_CONTINUE = 0,
275277
RET_PF_RETRY,
276278
RET_PF_EMULATE,
279+
RET_PF_WRITE_PROTECTED,
277280
RET_PF_INVALID,
278281
RET_PF_FIXED,
279282
RET_PF_SPURIOUS,

arch/x86/kvm/mmu/mmutrace.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
TRACE_DEFINE_ENUM(RET_PF_CONTINUE);
5858
TRACE_DEFINE_ENUM(RET_PF_RETRY);
5959
TRACE_DEFINE_ENUM(RET_PF_EMULATE);
60+
TRACE_DEFINE_ENUM(RET_PF_WRITE_PROTECTED);
6061
TRACE_DEFINE_ENUM(RET_PF_INVALID);
6162
TRACE_DEFINE_ENUM(RET_PF_FIXED);
6263
TRACE_DEFINE_ENUM(RET_PF_SPURIOUS);

arch/x86/kvm/mmu/paging_tmpl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -806,7 +806,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
806806

807807
if (page_fault_handle_page_track(vcpu, fault)) {
808808
shadow_page_table_clear_flood(vcpu, fault->addr);
809-
return RET_PF_EMULATE;
809+
return RET_PF_WRITE_PROTECTED;
810810
}
811811

812812
r = mmu_topup_memory_caches(vcpu, true);

arch/x86/kvm/mmu/tdp_mmu.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,10 +1046,8 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
10461046
* protected, emulation is needed. If the emulation was skipped,
10471047
* the vCPU would have the same fault again.
10481048
*/
1049-
if (wrprot) {
1050-
if (fault->write)
1051-
ret = RET_PF_EMULATE;
1052-
}
1049+
if (wrprot && fault->write)
1050+
ret = RET_PF_WRITE_PROTECTED;
10531051

10541052
/* If a MMIO SPTE is installed, the MMIO will need to be emulated. */
10551053
if (unlikely(is_mmio_spte(vcpu->kvm, new_spte))) {

0 commit comments

Comments
 (0)