Skip to content

Commit 6f38f8c

Browse files
Szy0127sean-jc
authored andcommitted
KVM: SVM: Flush cache only on CPUs running SEV guest
On AMD CPUs without ensuring cache consistency, each memory page reclamation in an SEV guest triggers a call to do WBNOINVD/WBINVD on all CPUs, thereby affecting the performance of other programs on the host. Typically, an AMD server may have 128 cores or more, while the SEV guest might only utilize 8 of these cores. Meanwhile, host can use qemu-affinity to bind these 8 vCPUs to specific physical CPUs. Therefore, keeping a record of the physical core numbers each time a vCPU runs can help avoid flushing the cache for all CPUs every time. Take care to allocate the cpumask used to track which CPUs have run a vCPU when copying or moving an "encryption context", as nothing guarantees memory in a mirror VM is a strict subset of the ASID owner, and the destination VM for intrahost migration needs to maintain it's own set of CPUs. E.g. for intrahost migration, if a CPU was used for the source VM but not the destination VM, then it can only have cached memory that was accessible to the source VM. And a CPU that was run in the source is also used by the destination is no different than a CPU that was run in the destination only. Note, KVM is guaranteed to do flush caches prior to sev_vm_destroy(), thanks to kvm_arch_guest_memory_reclaimed for SEV and SEV-ES, and kvm_arch_gmem_invalidate() for SEV-SNP. I.e. it's safe to free the cpumask prior to unregistering encrypted regions and freeing the ASID. Opportunistically clean up sev_vm_destroy()'s comment regarding what is (implicitly, what isn't) skipped for mirror VMs. Cc: Srikanth Aithal <[email protected]> Reviewed-by: Tom Lendacky <[email protected]> Signed-off-by: Zheyun Shen <[email protected]> Link: https://lore.kernel.org/r/[email protected] Link: https://lore.kernel.org/all/[email protected] Co-developed-by: Sean Christopherson <[email protected]> Signed-off-by: Sean Christopherson <[email protected]>
1 parent a77896e commit 6f38f8c

File tree

2 files changed

+63
-9
lines changed

2 files changed

+63
-9
lines changed

arch/x86/kvm/svm/sev.c

Lines changed: 62 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,12 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
447447
init_args.probe = false;
448448
ret = sev_platform_init(&init_args);
449449
if (ret)
450-
goto e_free;
450+
goto e_free_asid;
451+
452+
if (!zalloc_cpumask_var(&sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) {
453+
ret = -ENOMEM;
454+
goto e_free_asid;
455+
}
451456

452457
/* This needs to happen after SEV/SNP firmware initialization. */
453458
if (vm_type == KVM_X86_SNP_VM) {
@@ -465,6 +470,8 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
465470
return 0;
466471

467472
e_free:
473+
free_cpumask_var(sev->have_run_cpus);
474+
e_free_asid:
468475
argp->error = init_args.error;
469476
sev_asid_free(sev);
470477
sev->asid = 0;
@@ -709,16 +716,31 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
709716
}
710717
}
711718

712-
static void sev_writeback_caches(void)
719+
static void sev_writeback_caches(struct kvm *kvm)
713720
{
721+
/*
722+
* Note, the caller is responsible for ensuring correctness if the mask
723+
* can be modified, e.g. if a CPU could be doing VMRUN.
724+
*/
725+
if (cpumask_empty(to_kvm_sev_info(kvm)->have_run_cpus))
726+
return;
727+
714728
/*
715729
* Ensure that all dirty guest tagged cache entries are written back
716730
* before releasing the pages back to the system for use. CLFLUSH will
717731
* not do this without SME_COHERENT, and flushing many cache lines
718732
* individually is slower than blasting WBINVD for large VMs, so issue
719-
* WBNOINVD (or WBINVD if the "no invalidate" variant is unsupported).
733+
* WBNOINVD (or WBINVD if the "no invalidate" variant is unsupported)
734+
* on CPUs that have done VMRUN, i.e. may have dirtied data using the
735+
* VM's ASID.
736+
*
737+
* For simplicity, never remove CPUs from the bitmap. Ideally, KVM
738+
* would clear the mask when flushing caches, but doing so requires
739+
* serializing multiple calls and having responding CPUs (to the IPI)
740+
* mark themselves as still running if they are running (or about to
741+
* run) a vCPU for the VM.
720742
*/
721-
wbnoinvd_on_all_cpus();
743+
wbnoinvd_on_cpus_mask(to_kvm_sev_info(kvm)->have_run_cpus);
722744
}
723745

724746
static unsigned long get_num_contig_pages(unsigned long idx,
@@ -2046,6 +2068,17 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
20462068
if (ret)
20472069
goto out_source_vcpu;
20482070

2071+
/*
2072+
* Allocate a new have_run_cpus for the destination, i.e. don't copy
2073+
* the set of CPUs from the source. If a CPU was used to run a vCPU in
2074+
* the source VM but is never used for the destination VM, then the CPU
2075+
* can only have cached memory that was accessible to the source VM.
2076+
*/
2077+
if (!zalloc_cpumask_var(&dst_sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) {
2078+
ret = -ENOMEM;
2079+
goto out_source_vcpu;
2080+
}
2081+
20492082
sev_migrate_from(kvm, source_kvm);
20502083
kvm_vm_dead(source_kvm);
20512084
cg_cleanup_sev = src_sev;
@@ -2707,7 +2740,7 @@ int sev_mem_enc_unregister_region(struct kvm *kvm,
27072740
goto failed;
27082741
}
27092742

2710-
sev_writeback_caches();
2743+
sev_writeback_caches(kvm);
27112744

27122745
__unregister_enc_region_locked(kvm, region);
27132746

@@ -2749,13 +2782,18 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd)
27492782
goto e_unlock;
27502783
}
27512784

2785+
mirror_sev = to_kvm_sev_info(kvm);
2786+
if (!zalloc_cpumask_var(&mirror_sev->have_run_cpus, GFP_KERNEL_ACCOUNT)) {
2787+
ret = -ENOMEM;
2788+
goto e_unlock;
2789+
}
2790+
27522791
/*
27532792
* The mirror kvm holds an enc_context_owner ref so its asid can't
27542793
* disappear until we're done with it
27552794
*/
27562795
source_sev = to_kvm_sev_info(source_kvm);
27572796
kvm_get_kvm(source_kvm);
2758-
mirror_sev = to_kvm_sev_info(kvm);
27592797
list_add_tail(&mirror_sev->mirror_entry, &source_sev->mirror_vms);
27602798

27612799
/* Set enc_context_owner and copy its encryption context over */
@@ -2817,7 +2855,13 @@ void sev_vm_destroy(struct kvm *kvm)
28172855

28182856
WARN_ON(!list_empty(&sev->mirror_vms));
28192857

2820-
/* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
2858+
free_cpumask_var(sev->have_run_cpus);
2859+
2860+
/*
2861+
* If this is a mirror VM, remove it from the owner's list of a mirrors
2862+
* and skip ASID cleanup (the ASID is tied to the lifetime of the owner).
2863+
* Note, mirror VMs don't support registering encrypted regions.
2864+
*/
28212865
if (is_mirroring_enc_context(kvm)) {
28222866
struct kvm *owner_kvm = sev->enc_context_owner;
28232867

@@ -3106,7 +3150,7 @@ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
31063150
return;
31073151

31083152
do_sev_writeback_caches:
3109-
sev_writeback_caches();
3153+
sev_writeback_caches(vcpu->kvm);
31103154
}
31113155

31123156
void sev_guest_memory_reclaimed(struct kvm *kvm)
@@ -3119,7 +3163,7 @@ void sev_guest_memory_reclaimed(struct kvm *kvm)
31193163
if (!sev_guest(kvm) || sev_snp_guest(kvm))
31203164
return;
31213165

3122-
sev_writeback_caches();
3166+
sev_writeback_caches(kvm);
31233167
}
31243168

31253169
void sev_free_vcpu(struct kvm_vcpu *vcpu)
@@ -3451,6 +3495,15 @@ int pre_sev_run(struct vcpu_svm *svm, int cpu)
34513495
if (sev_es_guest(kvm) && !VALID_PAGE(svm->vmcb->control.vmsa_pa))
34523496
return -EINVAL;
34533497

3498+
/*
3499+
* To optimize cache flushes when memory is reclaimed from an SEV VM,
3500+
* track physical CPUs that enter the guest for SEV VMs and thus can
3501+
* have encrypted, dirty data in the cache, and flush caches only for
3502+
* CPUs that have entered the guest.
3503+
*/
3504+
if (!cpumask_test_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus))
3505+
cpumask_set_cpu(cpu, to_kvm_sev_info(kvm)->have_run_cpus);
3506+
34543507
/* Assign the asid allocated with this SEV guest */
34553508
svm->asid = asid;
34563509

arch/x86/kvm/svm/svm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ struct kvm_sev_info {
113113
void *guest_req_buf; /* Bounce buffer for SNP Guest Request input */
114114
void *guest_resp_buf; /* Bounce buffer for SNP Guest Request output */
115115
struct mutex guest_req_mutex; /* Must acquire before using bounce buffers */
116+
cpumask_var_t have_run_cpus; /* CPUs that have done VMRUN for this VM. */
116117
};
117118

118119
#define SEV_POLICY_NODBG BIT_ULL(0)

0 commit comments

Comments
 (0)