Skip to content

Commit ca5e83e

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: - Fixes for s390 interrupt delivery - Fixes for Xen emulator bugs showing up as debug kernel WARNs - Fix another issue with SEV/ES string I/O VMGEXITs * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: Take srcu lock in post_kvm_run_save() KVM: SEV-ES: fix another issue with string I/O VMGEXITs KVM: x86/xen: Fix kvm_xen_has_interrupt() sleeping in kvm_vcpu_block() KVM: x86: switch pvclock_gtod_sync_lock to a raw spinlock KVM: s390: preserve deliverable_mask in __airqs_kick_single_vcpu KVM: s390: clear kicked_mask before sleeping again
2 parents 180eca5 + f3d1436 commit ca5e83e

File tree

6 files changed

+61
-25
lines changed

6 files changed

+61
-25
lines changed

arch/s390/kvm/interrupt.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3053,13 +3053,14 @@ static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask)
30533053
int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus);
30543054
struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
30553055
struct kvm_vcpu *vcpu;
3056+
u8 vcpu_isc_mask;
30563057

30573058
for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) {
30583059
vcpu = kvm_get_vcpu(kvm, vcpu_idx);
30593060
if (psw_ioint_disabled(vcpu))
30603061
continue;
3061-
deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
3062-
if (deliverable_mask) {
3062+
vcpu_isc_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
3063+
if (deliverable_mask & vcpu_isc_mask) {
30633064
/* lately kicked but not yet running */
30643065
if (test_and_set_bit(vcpu_idx, gi->kicked_mask))
30653066
return;

arch/s390/kvm/kvm-s390.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3363,6 +3363,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
33633363

33643364
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
33653365
{
3366+
clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
33663367
return kvm_s390_vcpu_has_irq(vcpu, 0);
33673368
}
33683369

arch/x86/include/asm/kvm_host.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1098,7 +1098,7 @@ struct kvm_arch {
10981098
u64 cur_tsc_generation;
10991099
int nr_vcpus_matched_tsc;
11001100

1101-
spinlock_t pvclock_gtod_sync_lock;
1101+
raw_spinlock_t pvclock_gtod_sync_lock;
11021102
bool use_master_clock;
11031103
u64 master_kernel_ns;
11041104
u64 master_cycle_now;

arch/x86/kvm/svm/sev.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2591,11 +2591,20 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
25912591

25922592
int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
25932593
{
2594-
if (!setup_vmgexit_scratch(svm, in, svm->vmcb->control.exit_info_2))
2594+
int count;
2595+
int bytes;
2596+
2597+
if (svm->vmcb->control.exit_info_2 > INT_MAX)
2598+
return -EINVAL;
2599+
2600+
count = svm->vmcb->control.exit_info_2;
2601+
if (unlikely(check_mul_overflow(count, size, &bytes)))
2602+
return -EINVAL;
2603+
2604+
if (!setup_vmgexit_scratch(svm, in, bytes))
25952605
return -EINVAL;
25962606

2597-
return kvm_sev_es_string_io(&svm->vcpu, size, port,
2598-
svm->ghcb_sa, svm->ghcb_sa_len / size, in);
2607+
return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in);
25992608
}
26002609

26012610
void sev_es_init_vmcb(struct vcpu_svm *svm)

arch/x86/kvm/x86.c

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2542,15 +2542,15 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
25422542
kvm_vcpu_write_tsc_offset(vcpu, offset);
25432543
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
25442544

2545-
spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
2545+
raw_spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
25462546
if (!matched) {
25472547
kvm->arch.nr_vcpus_matched_tsc = 0;
25482548
} else if (!already_matched) {
25492549
kvm->arch.nr_vcpus_matched_tsc++;
25502550
}
25512551

25522552
kvm_track_tsc_matching(vcpu);
2553-
spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
2553+
raw_spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
25542554
}
25552555

25562556
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
@@ -2780,9 +2780,9 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
27802780
kvm_make_mclock_inprogress_request(kvm);
27812781

27822782
/* no guest entries from this point */
2783-
spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
2783+
raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
27842784
pvclock_update_vm_gtod_copy(kvm);
2785-
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
2785+
raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
27862786

27872787
kvm_for_each_vcpu(i, vcpu, kvm)
27882788
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -2800,15 +2800,15 @@ u64 get_kvmclock_ns(struct kvm *kvm)
28002800
unsigned long flags;
28012801
u64 ret;
28022802

2803-
spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
2803+
raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
28042804
if (!ka->use_master_clock) {
2805-
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
2805+
raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
28062806
return get_kvmclock_base_ns() + ka->kvmclock_offset;
28072807
}
28082808

28092809
hv_clock.tsc_timestamp = ka->master_cycle_now;
28102810
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
2811-
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
2811+
raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
28122812

28132813
/* both __this_cpu_read() and rdtsc() should be on the same cpu */
28142814
get_cpu();
@@ -2902,13 +2902,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
29022902
* If the host uses TSC clock, then passthrough TSC as stable
29032903
* to the guest.
29042904
*/
2905-
spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
2905+
raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
29062906
use_master_clock = ka->use_master_clock;
29072907
if (use_master_clock) {
29082908
host_tsc = ka->master_cycle_now;
29092909
kernel_ns = ka->master_kernel_ns;
29102910
}
2911-
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
2911+
raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
29122912

29132913
/* Keep irq disabled to prevent changes to the clock */
29142914
local_irq_save(flags);
@@ -6100,13 +6100,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
61006100
* is slightly ahead) here we risk going negative on unsigned
61016101
* 'system_time' when 'user_ns.clock' is very small.
61026102
*/
6103-
spin_lock_irq(&ka->pvclock_gtod_sync_lock);
6103+
raw_spin_lock_irq(&ka->pvclock_gtod_sync_lock);
61046104
if (kvm->arch.use_master_clock)
61056105
now_ns = ka->master_kernel_ns;
61066106
else
61076107
now_ns = get_kvmclock_base_ns();
61086108
ka->kvmclock_offset = user_ns.clock - now_ns;
6109-
spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
6109+
raw_spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
61106110

61116111
kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
61126112
break;
@@ -8156,9 +8156,9 @@ static void kvm_hyperv_tsc_notifier(void)
81568156
list_for_each_entry(kvm, &vm_list, vm_list) {
81578157
struct kvm_arch *ka = &kvm->arch;
81588158

8159-
spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
8159+
raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
81608160
pvclock_update_vm_gtod_copy(kvm);
8161-
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
8161+
raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
81628162

81638163
kvm_for_each_vcpu(cpu, vcpu, kvm)
81648164
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -8800,9 +8800,17 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
88008800

88018801
kvm_run->cr8 = kvm_get_cr8(vcpu);
88028802
kvm_run->apic_base = kvm_get_apic_base(vcpu);
8803+
8804+
/*
8805+
* The call to kvm_ready_for_interrupt_injection() may end up in
8806+
* kvm_xen_has_interrupt() which may require the srcu lock to be
8807+
* held, to protect against changes in the vcpu_info address.
8808+
*/
8809+
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
88038810
kvm_run->ready_for_interrupt_injection =
88048811
pic_in_kernel(vcpu->kvm) ||
88058812
kvm_vcpu_ready_for_interrupt_injection(vcpu);
8813+
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
88068814

88078815
if (is_smm(vcpu))
88088816
kvm_run->flags |= KVM_RUN_X86_SMM;
@@ -11199,7 +11207,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1119911207

1120011208
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
1120111209
mutex_init(&kvm->arch.apic_map_lock);
11202-
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
11210+
raw_spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
1120311211

1120411212
kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
1120511213
pvclock_update_vm_gtod_copy(kvm);

arch/x86/kvm/xen.c

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
190190

191191
int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
192192
{
193+
int err;
193194
u8 rc = 0;
194195

195196
/*
@@ -216,13 +217,29 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
216217
if (likely(slots->generation == ghc->generation &&
217218
!kvm_is_error_hva(ghc->hva) && ghc->memslot)) {
218219
/* Fast path */
219-
__get_user(rc, (u8 __user *)ghc->hva + offset);
220-
} else {
221-
/* Slow path */
222-
kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset,
223-
sizeof(rc));
220+
pagefault_disable();
221+
err = __get_user(rc, (u8 __user *)ghc->hva + offset);
222+
pagefault_enable();
223+
if (!err)
224+
return rc;
224225
}
225226

227+
/* Slow path */
228+
229+
/*
230+
* This function gets called from kvm_vcpu_block() after setting the
231+
* task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately
232+
* from a HLT. So we really mustn't sleep. If the page ended up absent
233+
* at that point, just return 1 in order to trigger an immediate wake,
234+
* and we'll end up getting called again from a context where we *can*
235+
* fault in the page and wait for it.
236+
*/
237+
if (in_atomic() || !task_is_running(current))
238+
return 1;
239+
240+
kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset,
241+
sizeof(rc));
242+
226243
return rc;
227244
}
228245

0 commit comments

Comments
 (0)