@@ -3869,59 +3869,71 @@ bool __weak kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
3869
3869
3870
3870
void kvm_vcpu_on_spin (struct kvm_vcpu * me , bool yield_to_kernel_mode )
3871
3871
{
3872
+ int nr_vcpus , start , i , idx , yielded ;
3872
3873
struct kvm * kvm = me -> kvm ;
3873
3874
struct kvm_vcpu * vcpu ;
3874
- int last_boosted_vcpu ;
3875
- unsigned long i ;
3876
- int yielded = 0 ;
3877
3875
int try = 3 ;
3878
- int pass ;
3879
3876
3880
- last_boosted_vcpu = READ_ONCE (kvm -> last_boosted_vcpu );
3877
+ nr_vcpus = atomic_read (& kvm -> online_vcpus );
3878
+ if (nr_vcpus < 2 )
3879
+ return ;
3880
+
3881
+ /* Pairs with the smp_wmb() in kvm_vm_ioctl_create_vcpu(). */
3882
+ smp_rmb ();
3883
+
3881
3884
kvm_vcpu_set_in_spin_loop (me , true);
3885
+
3882
3886
/*
3883
- * We boost the priority of a VCPU that is runnable but not
3884
- * currently running, because it got preempted by something
3885
- * else and called schedule in __vcpu_run. Hopefully that
3886
- * VCPU is holding the lock that we need and will release it.
3887
- * We approximate round-robin by starting at the last boosted VCPU.
3887
+ * The current vCPU ("me") is spinning in kernel mode, i.e. is likely
3888
+ * waiting for a resource to become available. Attempt to yield to a
3889
+ * vCPU that is runnable, but not currently running, e.g. because the
3890
+ * vCPU was preempted by a higher priority task. With luck, the vCPU
3891
+ * that was preempted is holding a lock or some other resource that the
3892
+ * current vCPU is waiting to acquire, and yielding to the other vCPU
3893
+ * will allow it to make forward progress and release the lock (or kick
3894
+ * the spinning vCPU, etc).
3895
+ *
3896
+ * Since KVM has no insight into what exactly the guest is doing,
3897
+ * approximate a round-robin selection by iterating over all vCPUs,
3898
+ * starting at the last boosted vCPU. I.e. if N=kvm->last_boosted_vcpu,
3899
+ * iterate over vCPU[N+1]..vCPU[N-1], wrapping as needed.
3900
+ *
3901
+ * Note, this is inherently racy, e.g. if multiple vCPUs are spinning,
3902
+ * they may all try to yield to the same vCPU(s). But as above, this
3903
+ * is all best effort due to KVM's lack of visibility into the guest.
3888
3904
*/
3889
- for (pass = 0 ; pass < 2 && !yielded && try ; pass ++ ) {
3890
- kvm_for_each_vcpu (i , vcpu , kvm ) {
3891
- if (!pass && i <= last_boosted_vcpu ) {
3892
- i = last_boosted_vcpu ;
3893
- continue ;
3894
- } else if (pass && i > last_boosted_vcpu )
3895
- break ;
3896
- if (!READ_ONCE (vcpu -> ready ))
3897
- continue ;
3898
- if (vcpu == me )
3899
- continue ;
3900
- if (kvm_vcpu_is_blocking (vcpu ) && !vcpu_dy_runnable (vcpu ))
3901
- continue ;
3905
+ start = READ_ONCE (kvm -> last_boosted_vcpu ) + 1 ;
3906
+ for (i = 0 ; i < nr_vcpus ; i ++ ) {
3907
+ idx = (start + i ) % nr_vcpus ;
3908
+ if (idx == me -> vcpu_idx )
3909
+ continue ;
3902
3910
3903
- /*
3904
- * Treat the target vCPU as being in-kernel if it has a
3905
- * pending interrupt, as the vCPU trying to yield may
3906
- * be spinning waiting on IPI delivery, i.e. the target
3907
- * vCPU is in-kernel for the purposes of directed yield.
3908
- */
3909
- if (READ_ONCE (vcpu -> preempted ) && yield_to_kernel_mode &&
3910
- !kvm_arch_dy_has_pending_interrupt (vcpu ) &&
3911
- !kvm_arch_vcpu_preempted_in_kernel (vcpu ))
3912
- continue ;
3913
- if (!kvm_vcpu_eligible_for_directed_yield (vcpu ))
3914
- continue ;
3911
+ vcpu = xa_load (& kvm -> vcpu_array , idx );
3912
+ if (!READ_ONCE (vcpu -> ready ))
3913
+ continue ;
3914
+ if (kvm_vcpu_is_blocking (vcpu ) && !vcpu_dy_runnable (vcpu ))
3915
+ continue ;
3915
3916
3916
- yielded = kvm_vcpu_yield_to (vcpu );
3917
- if (yielded > 0 ) {
3918
- WRITE_ONCE (kvm -> last_boosted_vcpu , i );
3919
- break ;
3920
- } else if (yielded < 0 ) {
3921
- try -- ;
3922
- if (!try )
3923
- break ;
3924
- }
3917
+ /*
3918
+ * Treat the target vCPU as being in-kernel if it has a pending
3919
+ * interrupt, as the vCPU trying to yield may be spinning
3920
+ * waiting on IPI delivery, i.e. the target vCPU is in-kernel
3921
+ * for the purposes of directed yield.
3922
+ */
3923
+ if (READ_ONCE (vcpu -> preempted ) && yield_to_kernel_mode &&
3924
+ !kvm_arch_dy_has_pending_interrupt (vcpu ) &&
3925
+ !kvm_arch_vcpu_preempted_in_kernel (vcpu ))
3926
+ continue ;
3927
+
3928
+ if (!kvm_vcpu_eligible_for_directed_yield (vcpu ))
3929
+ continue ;
3930
+
3931
+ yielded = kvm_vcpu_yield_to (vcpu );
3932
+ if (yielded > 0 ) {
3933
+ WRITE_ONCE (kvm -> last_boosted_vcpu , i );
3934
+ break ;
3935
+ } else if (yielded < 0 && !-- try ) {
3936
+ break ;
3925
3937
}
3926
3938
}
3927
3939
kvm_vcpu_set_in_spin_loop (me , false);
0 commit comments