@@ -117,6 +117,7 @@ static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid)
117
117
*/
118
118
down_write (& sev_deactivate_lock );
119
119
120
+ /* SNP firmware requires use of WBINVD for ASID recycling. */
120
121
wbinvd_on_all_cpus ();
121
122
122
123
if (sev_snp_enabled )
@@ -446,7 +447,12 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
446
447
init_args .probe = false;
447
448
ret = sev_platform_init (& init_args );
448
449
if (ret )
449
- goto e_free ;
450
+ goto e_free_asid ;
451
+
452
+ if (!zalloc_cpumask_var (& sev -> have_run_cpus , GFP_KERNEL_ACCOUNT )) {
453
+ ret = - ENOMEM ;
454
+ goto e_free_asid ;
455
+ }
450
456
451
457
/* This needs to happen after SEV/SNP firmware initialization. */
452
458
if (vm_type == KVM_X86_SNP_VM ) {
@@ -464,6 +470,8 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
464
470
return 0 ;
465
471
466
472
e_free :
473
+ free_cpumask_var (sev -> have_run_cpus );
474
+ e_free_asid :
467
475
argp -> error = init_args .error ;
468
476
sev_asid_free (sev );
469
477
sev -> asid = 0 ;
@@ -708,6 +716,33 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
708
716
}
709
717
}
710
718
719
+ static void sev_writeback_caches (struct kvm * kvm )
720
+ {
721
+ /*
722
+ * Note, the caller is responsible for ensuring correctness if the mask
723
+ * can be modified, e.g. if a CPU could be doing VMRUN.
724
+ */
725
+ if (cpumask_empty (to_kvm_sev_info (kvm )-> have_run_cpus ))
726
+ return ;
727
+
728
+ /*
729
+ * Ensure that all dirty guest tagged cache entries are written back
730
+ * before releasing the pages back to the system for use. CLFLUSH will
731
+ * not do this without SME_COHERENT, and flushing many cache lines
732
+ * individually is slower than blasting WBINVD for large VMs, so issue
733
+ * WBNOINVD (or WBINVD if the "no invalidate" variant is unsupported)
734
+ * on CPUs that have done VMRUN, i.e. may have dirtied data using the
735
+ * VM's ASID.
736
+ *
737
+ * For simplicity, never remove CPUs from the bitmap. Ideally, KVM
738
+ * would clear the mask when flushing caches, but doing so requires
739
+ * serializing multiple calls and having responding CPUs (to the IPI)
740
+ * mark themselves as still running if they are running (or about to
741
+ * run) a vCPU for the VM.
742
+ */
743
+ wbnoinvd_on_cpus_mask (to_kvm_sev_info (kvm )-> have_run_cpus );
744
+ }
745
+
711
746
static unsigned long get_num_contig_pages (unsigned long idx ,
712
747
struct page * * inpages , unsigned long npages )
713
748
{
@@ -2037,6 +2072,17 @@ int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd)
2037
2072
if (ret )
2038
2073
goto out_source_vcpu ;
2039
2074
2075
+ /*
2076
+ * Allocate a new have_run_cpus for the destination, i.e. don't copy
2077
+ * the set of CPUs from the source. If a CPU was used to run a vCPU in
2078
+ * the source VM but is never used for the destination VM, then the CPU
2079
+ * can only have cached memory that was accessible to the source VM.
2080
+ */
2081
+ if (!zalloc_cpumask_var (& dst_sev -> have_run_cpus , GFP_KERNEL_ACCOUNT )) {
2082
+ ret = - ENOMEM ;
2083
+ goto out_source_vcpu ;
2084
+ }
2085
+
2040
2086
sev_migrate_from (kvm , source_kvm );
2041
2087
kvm_vm_dead (source_kvm );
2042
2088
cg_cleanup_sev = src_sev ;
@@ -2694,12 +2740,7 @@ int sev_mem_enc_unregister_region(struct kvm *kvm,
2694
2740
goto failed ;
2695
2741
}
2696
2742
2697
- /*
2698
- * Ensure that all guest tagged cache entries are flushed before
2699
- * releasing the pages back to the system for use. CLFLUSH will
2700
- * not do this, so issue a WBINVD.
2701
- */
2702
- wbinvd_on_all_cpus ();
2743
+ sev_writeback_caches (kvm );
2703
2744
2704
2745
__unregister_enc_region_locked (kvm , region );
2705
2746
@@ -2741,13 +2782,18 @@ int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd)
2741
2782
goto e_unlock ;
2742
2783
}
2743
2784
2785
+ mirror_sev = to_kvm_sev_info (kvm );
2786
+ if (!zalloc_cpumask_var (& mirror_sev -> have_run_cpus , GFP_KERNEL_ACCOUNT )) {
2787
+ ret = - ENOMEM ;
2788
+ goto e_unlock ;
2789
+ }
2790
+
2744
2791
/*
2745
2792
* The mirror kvm holds an enc_context_owner ref so its asid can't
2746
2793
* disappear until we're done with it
2747
2794
*/
2748
2795
source_sev = to_kvm_sev_info (source_kvm );
2749
2796
kvm_get_kvm (source_kvm );
2750
- mirror_sev = to_kvm_sev_info (kvm );
2751
2797
list_add_tail (& mirror_sev -> mirror_entry , & source_sev -> mirror_vms );
2752
2798
2753
2799
/* Set enc_context_owner and copy its encryption context over */
@@ -2809,7 +2855,13 @@ void sev_vm_destroy(struct kvm *kvm)
2809
2855
2810
2856
WARN_ON (!list_empty (& sev -> mirror_vms ));
2811
2857
2812
- /* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
2858
+ free_cpumask_var (sev -> have_run_cpus );
2859
+
2860
+ /*
2861
+ * If this is a mirror VM, remove it from the owner's list of a mirrors
2862
+ * and skip ASID cleanup (the ASID is tied to the lifetime of the owner).
2863
+ * Note, mirror VMs don't support registering encrypted regions.
2864
+ */
2813
2865
if (is_mirroring_enc_context (kvm )) {
2814
2866
struct kvm * owner_kvm = sev -> enc_context_owner ;
2815
2867
@@ -2820,12 +2872,6 @@ void sev_vm_destroy(struct kvm *kvm)
2820
2872
return ;
2821
2873
}
2822
2874
2823
- /*
2824
- * Ensure that all guest tagged cache entries are flushed before
2825
- * releasing the pages back to the system for use. CLFLUSH will
2826
- * not do this, so issue a WBINVD.
2827
- */
2828
- wbinvd_on_all_cpus ();
2829
2875
2830
2876
/*
2831
2877
* if userspace was terminated before unregistering the memory regions
@@ -3095,30 +3141,29 @@ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
3095
3141
3096
3142
/*
3097
3143
* VM Page Flush takes a host virtual address and a guest ASID. Fall
3098
- * back to WBINVD if this faults so as not to make any problems worse
3099
- * by leaving stale encrypted data in the cache.
3144
+ * back to full writeback of caches if this faults so as not to make
3145
+ * any problems worse by leaving stale encrypted data in the cache.
3100
3146
*/
3101
3147
if (WARN_ON_ONCE (wrmsrq_safe (MSR_AMD64_VM_PAGE_FLUSH , addr | asid )))
3102
- goto do_wbinvd ;
3148
+ goto do_sev_writeback_caches ;
3103
3149
3104
3150
return ;
3105
3151
3106
- do_wbinvd :
3107
- wbinvd_on_all_cpus ( );
3152
+ do_sev_writeback_caches :
3153
+ sev_writeback_caches ( vcpu -> kvm );
3108
3154
}
3109
3155
3110
3156
void sev_guest_memory_reclaimed (struct kvm * kvm )
3111
3157
{
3112
3158
/*
3113
3159
* With SNP+gmem, private/encrypted memory is unreachable via the
3114
- * hva-based mmu notifiers, so these events are only actually
3115
- * pertaining to shared pages where there is no need to perform
3116
- * the WBINVD to flush associated caches.
3160
+ * hva-based mmu notifiers, i.e. these events are explicitly scoped to
3161
+ * shared pages, where there's no need to flush caches.
3117
3162
*/
3118
3163
if (!sev_guest (kvm ) || sev_snp_guest (kvm ))
3119
3164
return ;
3120
3165
3121
- wbinvd_on_all_cpus ( );
3166
+ sev_writeback_caches ( kvm );
3122
3167
}
3123
3168
3124
3169
void sev_free_vcpu (struct kvm_vcpu * vcpu )
@@ -3450,6 +3495,15 @@ int pre_sev_run(struct vcpu_svm *svm, int cpu)
3450
3495
if (sev_es_guest (kvm ) && !VALID_PAGE (svm -> vmcb -> control .vmsa_pa ))
3451
3496
return - EINVAL ;
3452
3497
3498
+ /*
3499
+ * To optimize cache flushes when memory is reclaimed from an SEV VM,
3500
+ * track physical CPUs that enter the guest for SEV VMs and thus can
3501
+ * have encrypted, dirty data in the cache, and flush caches only for
3502
+ * CPUs that have entered the guest.
3503
+ */
3504
+ if (!cpumask_test_cpu (cpu , to_kvm_sev_info (kvm )-> have_run_cpus ))
3505
+ cpumask_set_cpu (cpu , to_kvm_sev_info (kvm )-> have_run_cpus );
3506
+
3453
3507
/* Assign the asid allocated with this SEV guest */
3454
3508
svm -> asid = asid ;
3455
3509
@@ -3882,9 +3936,9 @@ void sev_snp_init_protected_guest_state(struct kvm_vcpu *vcpu)
3882
3936
* From this point forward, the VMSA will always be a guest-mapped page
3883
3937
* rather than the initial one allocated by KVM in svm->sev_es.vmsa. In
3884
3938
* theory, svm->sev_es.vmsa could be free'd and cleaned up here, but
3885
- * that involves cleanups like wbinvd_on_all_cpus() which would ideally
3886
- * be handled during teardown rather than guest boot. Deferring that
3887
- * also allows the existing logic for SEV-ES VMSAs to be re-used with
3939
+ * that involves cleanups like flushing caches, which would ideally be
3940
+ * handled during teardown rather than guest boot. Deferring that also
3941
+ * allows the existing logic for SEV-ES VMSAs to be re-used with
3888
3942
* minimal SNP-specific changes.
3889
3943
*/
3890
3944
svm -> sev_es .snp_has_guest_vmsa = true;
@@ -4875,7 +4929,7 @@ void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)
4875
4929
4876
4930
/*
4877
4931
* SEV-ES avoids host/guest cache coherency issues through
4878
- * WBINVD hooks issued via MMU notifiers during run-time, and
4932
+ * WBNOINVD hooks issued via MMU notifiers during run-time, and
4879
4933
* KVM's VM destroy path at shutdown. Those MMU notifier events
4880
4934
* don't cover gmem since there is no requirement to map pages
4881
4935
* to a HVA in order to use them for a running guest. While the
0 commit comments