Skip to content

Commit 65855ed

Browse files
Lai Jiangshanbonzini
authored andcommitted
KVM: X86: Synchronize the shadow pagetable before link it
If gpte is changed from non-present to present, the guest doesn't need to flush tlb per SDM. So the host must synchronze sp before link it. Otherwise the guest might use a wrong mapping. For example: the guest first changes a level-1 pagetable, and then links its parent to a new place where the original gpte is non-present. Finally the guest can access the remapped area without flushing the tlb. The guest's behavior should be allowed per SDM, but the host kvm mmu makes it wrong. Fixes: 4731d4c ("KVM: MMU: out of sync shadow core") Signed-off-by: Lai Jiangshan <[email protected]> Signed-off-by: Paolo Bonzini <[email protected]> Message-Id: <[email protected]> Signed-off-by: Paolo Bonzini <[email protected]>
1 parent f816029 commit 65855ed

File tree

2 files changed

+31
-9
lines changed

2 files changed

+31
-9
lines changed

arch/x86/kvm/mmu/mmu.c

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2027,8 +2027,8 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
20272027
} while (!sp->unsync_children);
20282028
}
20292029

2030-
static void mmu_sync_children(struct kvm_vcpu *vcpu,
2031-
struct kvm_mmu_page *parent)
2030+
static int mmu_sync_children(struct kvm_vcpu *vcpu,
2031+
struct kvm_mmu_page *parent, bool can_yield)
20322032
{
20332033
int i;
20342034
struct kvm_mmu_page *sp;
@@ -2055,12 +2055,18 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
20552055
}
20562056
if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) {
20572057
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
2058+
if (!can_yield) {
2059+
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
2060+
return -EINTR;
2061+
}
2062+
20582063
cond_resched_rwlock_write(&vcpu->kvm->mmu_lock);
20592064
flush = false;
20602065
}
20612066
}
20622067

20632068
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
2069+
return 0;
20642070
}
20652071

20662072
static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
@@ -2146,9 +2152,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
21462152
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
21472153
}
21482154

2149-
if (sp->unsync_children)
2150-
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
2151-
21522155
__clear_sp_write_flooding_count(sp);
21532156

21542157
trace_get_page:
@@ -3684,7 +3687,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
36843687
write_lock(&vcpu->kvm->mmu_lock);
36853688
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
36863689

3687-
mmu_sync_children(vcpu, sp);
3690+
mmu_sync_children(vcpu, sp, true);
36883691

36893692
kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
36903693
write_unlock(&vcpu->kvm->mmu_lock);
@@ -3700,7 +3703,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
37003703
if (IS_VALID_PAE_ROOT(root)) {
37013704
root &= PT64_BASE_ADDR_MASK;
37023705
sp = to_shadow_page(root);
3703-
mmu_sync_children(vcpu, sp);
3706+
mmu_sync_children(vcpu, sp, true);
37043707
}
37053708
}
37063709

arch/x86/kvm/mmu/paging_tmpl.h

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -707,8 +707,27 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
707707
if (!is_shadow_present_pte(*it.sptep)) {
708708
table_gfn = gw->table_gfn[it.level - 2];
709709
access = gw->pt_access[it.level - 2];
710-
sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
711-
false, access);
710+
sp = kvm_mmu_get_page(vcpu, table_gfn, addr,
711+
it.level-1, false, access);
712+
/*
713+
* We must synchronize the pagetable before linking it
714+
* because the guest doesn't need to flush tlb when
715+
* the gpte is changed from non-present to present.
716+
* Otherwise, the guest may use the wrong mapping.
717+
*
718+
* For PG_LEVEL_4K, kvm_mmu_get_page() has already
719+
* synchronized it transiently via kvm_sync_page().
720+
*
721+
* For higher level pagetable, we synchronize it via
722+
* the slower mmu_sync_children(). If it needs to
723+
* break, some progress has been made; return
724+
* RET_PF_RETRY and retry on the next #PF.
725+
* KVM_REQ_MMU_SYNC is not necessary but it
726+
* expedites the process.
727+
*/
728+
if (sp->unsync_children &&
729+
mmu_sync_children(vcpu, sp, false))
730+
return RET_PF_RETRY;
712731
}
713732

714733
/*

0 commit comments

Comments
 (0)