Skip to content

Commit 0d5c7fc

Browse files
Nikolay Kuratovgregkh
authored andcommitted
KVM: x86/mmu: Ensure that kvm_release_pfn_clean() takes exact pfn from kvm_faultin_pfn()
Since 5.16 and prior to 6.13 KVM can't be used with FSDAX guest memory (PMD pages). To reproduce the issue you need to reserve guest memory with `memmap=` cmdline, create and mount FS in DAX mode (tested both XFS and ext4), see doc link below. ndctl command for test: ndctl create-namespace -v -e namespace1.0 --map=dev --mode=fsdax -a 2M Then pass memory object to qemu like: -m 8G -object memory-backend-file,id=ram0,size=8G,\ mem-path=/mnt/pmem/guestmem,share=on,prealloc=on,dump=off,align=2097152 \ -numa node,memdev=ram0,cpus=0-1 QEMU fails to run guest with error: kvm run failed Bad address and there are two warnings in dmesg: WARN_ON_ONCE(!page_count(page)) in kvm_is_zone_device_page() and WARN_ON_ONCE(folio_ref_count(folio) <= 0) in try_grab_folio() (v6.6.63) It looks like in the past assumption was made that pfn won't change from faultin_pfn() to release_pfn_clean(), e.g. see commit 4cd071d ("KVM: x86/mmu: Move calls to thp_adjust() down a level") But kvm_page_fault structure made pfn part of mutable state, so now release_pfn_clean() can take hugepage-adjusted pfn. And it works for all cases (/dev/shm, hugetlb, devdax) except fsdax. Apparently in fsdax mode faultin-pfn and adjusted-pfn may refer to different folios, so we're getting get_page/put_page imbalance. To solve this preserve faultin pfn in separate local variable and pass it in kvm_release_pfn_clean(). Patch tested for all mentioned guest memory backends with tdp_mmu={0,1}. No bug in upstream as it was solved fundamentally by commit 8dd861c ("KVM: x86/mmu: Put refcounted pages instead of blindly releasing pfns") and related patch series. Link: https://nvdimm.docs.kernel.org/2mib_fs_dax.html Fixes: 2f6305d ("KVM: MMU: change kvm_tdp_mmu_map() arguments to kvm_page_fault") Co-developed-by: Sean Christopherson <[email protected]> Signed-off-by: Sean Christopherson <[email protected]> Reviewed-by: Sean Christopherson <[email protected]> Signed-off-by: Nikolay Kuratov <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 67b5ed3 commit 0d5c7fc

File tree

2 files changed

+12
-3
lines changed

2 files changed

+12
-3
lines changed

arch/x86/kvm/mmu/mmu.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4363,6 +4363,7 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
43634363

43644364
static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
43654365
{
4366+
kvm_pfn_t orig_pfn;
43664367
int r;
43674368

43684369
/* Dummy roots are used only for shadowing bad guest roots. */
@@ -4384,6 +4385,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
43844385
if (r != RET_PF_CONTINUE)
43854386
return r;
43864387

4388+
orig_pfn = fault->pfn;
4389+
43874390
r = RET_PF_RETRY;
43884391
write_lock(&vcpu->kvm->mmu_lock);
43894392

@@ -4398,7 +4401,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
43984401

43994402
out_unlock:
44004403
write_unlock(&vcpu->kvm->mmu_lock);
4401-
kvm_release_pfn_clean(fault->pfn);
4404+
kvm_release_pfn_clean(orig_pfn);
44024405
return r;
44034406
}
44044407

@@ -4447,6 +4450,7 @@ EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
44474450
static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu,
44484451
struct kvm_page_fault *fault)
44494452
{
4453+
kvm_pfn_t orig_pfn;
44504454
int r;
44514455

44524456
if (page_fault_handle_page_track(vcpu, fault))
@@ -4464,6 +4468,8 @@ static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu,
44644468
if (r != RET_PF_CONTINUE)
44654469
return r;
44664470

4471+
orig_pfn = fault->pfn;
4472+
44674473
r = RET_PF_RETRY;
44684474
read_lock(&vcpu->kvm->mmu_lock);
44694475

@@ -4474,7 +4480,7 @@ static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu,
44744480

44754481
out_unlock:
44764482
read_unlock(&vcpu->kvm->mmu_lock);
4477-
kvm_release_pfn_clean(fault->pfn);
4483+
kvm_release_pfn_clean(orig_pfn);
44784484
return r;
44794485
}
44804486
#endif

arch/x86/kvm/mmu/paging_tmpl.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
777777
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
778778
{
779779
struct guest_walker walker;
780+
kvm_pfn_t orig_pfn;
780781
int r;
781782

782783
WARN_ON_ONCE(fault->is_tdp);
@@ -835,6 +836,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
835836
walker.pte_access &= ~ACC_EXEC_MASK;
836837
}
837838

839+
orig_pfn = fault->pfn;
840+
838841
r = RET_PF_RETRY;
839842
write_lock(&vcpu->kvm->mmu_lock);
840843

@@ -848,7 +851,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
848851

849852
out_unlock:
850853
write_unlock(&vcpu->kvm->mmu_lock);
851-
kvm_release_pfn_clean(fault->pfn);
854+
kvm_release_pfn_clean(orig_pfn);
852855
return r;
853856
}
854857

0 commit comments

Comments
 (0)