@@ -530,6 +530,31 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
530
530
kvm_set_pfn_accessed (spte_to_pfn (old_spte ));
531
531
}
532
532
533
+ static inline int __tdp_mmu_set_spte_atomic (struct tdp_iter * iter , u64 new_spte )
534
+ {
535
+ u64 * sptep = rcu_dereference (iter -> sptep );
536
+
537
+ /*
538
+ * The caller is responsible for ensuring the old SPTE is not a REMOVED
539
+ * SPTE. KVM should never attempt to zap or manipulate a REMOVED SPTE,
540
+ * and pre-checking before inserting a new SPTE is advantageous as it
541
+ * avoids unnecessary work.
542
+ */
543
+ WARN_ON_ONCE (iter -> yielded || is_removed_spte (iter -> old_spte ));
544
+
545
+ /*
546
+ * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
547
+ * does not hold the mmu_lock. On failure, i.e. if a different logical
548
+ * CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte with
549
+ * the current value, so the caller operates on fresh data, e.g. if it
550
+ * retries tdp_mmu_set_spte_atomic()
551
+ */
552
+ if (!try_cmpxchg64 (sptep , & iter -> old_spte , new_spte ))
553
+ return - EBUSY ;
554
+
555
+ return 0 ;
556
+ }
557
+
533
558
/*
534
559
* tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically
535
560
* and handle the associated bookkeeping. Do not mark the page dirty
@@ -551,27 +576,13 @@ static inline int tdp_mmu_set_spte_atomic(struct kvm *kvm,
551
576
struct tdp_iter * iter ,
552
577
u64 new_spte )
553
578
{
554
- u64 * sptep = rcu_dereference (iter -> sptep );
555
-
556
- /*
557
- * The caller is responsible for ensuring the old SPTE is not a REMOVED
558
- * SPTE. KVM should never attempt to zap or manipulate a REMOVED SPTE,
559
- * and pre-checking before inserting a new SPTE is advantageous as it
560
- * avoids unnecessary work.
561
- */
562
- WARN_ON_ONCE (iter -> yielded || is_removed_spte (iter -> old_spte ));
579
+ int ret ;
563
580
564
581
lockdep_assert_held_read (& kvm -> mmu_lock );
565
582
566
- /*
567
- * Note, fast_pf_fix_direct_spte() can also modify TDP MMU SPTEs and
568
- * does not hold the mmu_lock. On failure, i.e. if a different logical
569
- * CPU modified the SPTE, try_cmpxchg64() updates iter->old_spte with
570
- * the current value, so the caller operates on fresh data, e.g. if it
571
- * retries tdp_mmu_set_spte_atomic()
572
- */
573
- if (!try_cmpxchg64 (sptep , & iter -> old_spte , new_spte ))
574
- return - EBUSY ;
583
+ ret = __tdp_mmu_set_spte_atomic (iter , new_spte );
584
+ if (ret )
585
+ return ret ;
575
586
576
587
handle_changed_spte (kvm , iter -> as_id , iter -> gfn , iter -> old_spte ,
577
588
new_spte , iter -> level , true);
@@ -584,13 +595,17 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
584
595
{
585
596
int ret ;
586
597
598
+ lockdep_assert_held_read (& kvm -> mmu_lock );
599
+
587
600
/*
588
- * Freeze the SPTE by setting it to a special,
589
- * non-present value. This will stop other threads from
590
- * immediately installing a present entry in its place
591
- * before the TLBs are flushed.
601
+ * Freeze the SPTE by setting it to a special, non-present value. This
602
+ * will stop other threads from immediately installing a present entry
603
+ * in its place before the TLBs are flushed.
604
+ *
605
+ * Delay processing of the zapped SPTE until after TLBs are flushed and
606
+ * the REMOVED_SPTE is replaced (see below).
592
607
*/
593
- ret = tdp_mmu_set_spte_atomic ( kvm , iter , REMOVED_SPTE );
608
+ ret = __tdp_mmu_set_spte_atomic ( iter , REMOVED_SPTE );
594
609
if (ret )
595
610
return ret ;
596
611
@@ -599,12 +614,20 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
599
614
/*
600
615
* No other thread can overwrite the removed SPTE as they must either
601
616
* wait on the MMU lock or use tdp_mmu_set_spte_atomic() which will not
602
- * overwrite the special removed SPTE value. No bookkeeping is needed
603
- * here since the SPTE is going from non-present to non-present. Use
604
- * the raw write helper to avoid an unnecessary check on volatile bits.
617
+ * overwrite the special removed SPTE value. Use the raw write helper to
618
+ * avoid an unnecessary check on volatile bits.
605
619
*/
606
620
__kvm_tdp_mmu_write_spte (iter -> sptep , SHADOW_NONPRESENT_VALUE );
607
621
622
+ /*
623
+ * Process the zapped SPTE after flushing TLBs, and after replacing
624
+ * REMOVED_SPTE with 0. This minimizes the amount of time vCPUs are
625
+ * blocked by the REMOVED_SPTE and reduces contention on the child
626
+ * SPTEs.
627
+ */
628
+ handle_changed_spte (kvm , iter -> as_id , iter -> gfn , iter -> old_spte ,
629
+ 0 , iter -> level , true);
630
+
608
631
return 0 ;
609
632
}
610
633
0 commit comments