Skip to content

Commit ec30dcf

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Radim Krčmář: "PPC: - Close a hole which could possibly lead to the host timebase getting out of sync. - Three fixes relating to PTEs and TLB entries for radix guests. - Fix a bug which could lead to an interrupt never getting delivered to the guest, if it is pending for a guest vCPU when the vCPU gets offlined. s390: - Fix false negatives in VSIE validity check (Cc stable) x86: - Fix time drift of VMX preemption timer when a guest uses LAPIC timer in periodic mode (Cc stable) - Unconditionally expose CPUID.IA32_ARCH_CAPABILITIES to allow migration from hosts that don't need retpoline mitigation (Cc stable) - Fix guest crashes on reboot by properly coupling CR4.OSXSAVE and CPUID.OSXSAVE (Cc stable) - Report correct RIP after Hyper-V hypercall #UD (introduced in -rc6)" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: fix #UD address of failed Hyper-V hypercalls kvm: x86: IA32_ARCH_CAPABILITIES is always supported KVM: x86: Update cpuid properly when CR4.OSXAVE or CR4.PKE is changed x86/kvm: fix LAPIC timer drift when guest uses periodic mode KVM: s390: vsie: fix < 8k check for the itdba KVM: PPC: Book 3S HV: Do ptesync in radix guest exit path KVM: PPC: Book3S HV: XIVE: Resend re-routed interrupts on CPU priority change KVM: PPC: Book3S HV: Make radix clear pte when unmapping KVM: PPC: Book3S HV: Make radix use correct tlbie sequence in kvmppc_radix_tlbie_page KVM: PPC: Book3S HV: Snapshot timebase offset on guest entry
2 parents bc2dbc5 + 696ca77 commit ec30dcf

File tree

11 files changed

+198
-75
lines changed

11 files changed

+198
-75
lines changed

arch/powerpc/include/asm/kvm_book3s.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ struct kvmppc_vcore {
9696
struct kvm_vcpu *runner;
9797
struct kvm *kvm;
9898
u64 tb_offset; /* guest timebase - host timebase */
99+
u64 tb_offset_applied; /* timebase offset currently in force */
99100
ulong lpcr;
100101
u32 arch_compat;
101102
ulong pcr;

arch/powerpc/kernel/asm-offsets.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,7 @@ int main(void)
562562
OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads);
563563
OFFSET(VCORE_KVM, kvmppc_vcore, kvm);
564564
OFFSET(VCORE_TB_OFFSET, kvmppc_vcore, tb_offset);
565+
OFFSET(VCORE_TB_OFFSET_APPL, kvmppc_vcore, tb_offset_applied);
565566
OFFSET(VCORE_LPCR, kvmppc_vcore, lpcr);
566567
OFFSET(VCORE_PCR, kvmppc_vcore, pcr);
567568
OFFSET(VCORE_DPDES, kvmppc_vcore, dpdes);

arch/powerpc/kvm/book3s_64_mmu_radix.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
162162
if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG))
163163
asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1)
164164
: : "r" (addr), "r" (kvm->arch.lpid) : "memory");
165-
asm volatile("ptesync": : :"memory");
165+
asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
166166
}
167167

168168
static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
@@ -173,7 +173,7 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
173173
/* RIC=1 PRS=0 R=1 IS=2 */
174174
asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1)
175175
: : "r" (rb), "r" (kvm->arch.lpid) : "memory");
176-
asm volatile("ptesync": : :"memory");
176+
asm volatile("eieio ; tlbsync ; ptesync": : :"memory");
177177
}
178178

179179
unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
@@ -584,7 +584,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
584584

585585
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
586586
if (ptep && pte_present(*ptep)) {
587-
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0,
587+
old = kvmppc_radix_update_pte(kvm, ptep, ~0UL, 0,
588588
gpa, shift);
589589
kvmppc_radix_tlbie_page(kvm, gpa, shift);
590590
if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) {

arch/powerpc/kvm/book3s_hv.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2441,6 +2441,7 @@ static void init_vcore_to_run(struct kvmppc_vcore *vc)
24412441
vc->in_guest = 0;
24422442
vc->napping_threads = 0;
24432443
vc->conferring_threads = 0;
2444+
vc->tb_offset_applied = 0;
24442445
}
24452446

24462447
static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)

arch/powerpc/kvm/book3s_hv_rmhandlers.S

Lines changed: 52 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
692692
22: ld r8,VCORE_TB_OFFSET(r5)
693693
cmpdi r8,0
694694
beq 37f
695+
std r8, VCORE_TB_OFFSET_APPL(r5)
695696
mftb r6 /* current host timebase */
696697
add r8,r8,r6
697698
mtspr SPRN_TBU40,r8 /* update upper 40 bits */
@@ -940,18 +941,6 @@ FTR_SECTION_ELSE
940941
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
941942
8:
942943

943-
/*
944-
* Set the decrementer to the guest decrementer.
945-
*/
946-
ld r8,VCPU_DEC_EXPIRES(r4)
947-
/* r8 is a host timebase value here, convert to guest TB */
948-
ld r5,HSTATE_KVM_VCORE(r13)
949-
ld r6,VCORE_TB_OFFSET(r5)
950-
add r8,r8,r6
951-
mftb r7
952-
subf r3,r7,r8
953-
mtspr SPRN_DEC,r3
954-
955944
ld r5, VCPU_SPRG0(r4)
956945
ld r6, VCPU_SPRG1(r4)
957946
ld r7, VCPU_SPRG2(r4)
@@ -1005,6 +994,18 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
1005994
mtspr SPRN_LPCR,r8
1006995
isync
1007996

997+
/*
998+
* Set the decrementer to the guest decrementer.
999+
*/
1000+
ld r8,VCPU_DEC_EXPIRES(r4)
1001+
/* r8 is a host timebase value here, convert to guest TB */
1002+
ld r5,HSTATE_KVM_VCORE(r13)
1003+
ld r6,VCORE_TB_OFFSET_APPL(r5)
1004+
add r8,r8,r6
1005+
mftb r7
1006+
subf r3,r7,r8
1007+
mtspr SPRN_DEC,r3
1008+
10081009
/* Check if HDEC expires soon */
10091010
mfspr r3, SPRN_HDEC
10101011
EXTEND_HDEC(r3)
@@ -1597,8 +1598,27 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
15971598

15981599
guest_bypass:
15991600
stw r12, STACK_SLOT_TRAP(r1)
1600-
mr r3, r12
1601+
1602+
/* Save DEC */
1603+
/* Do this before kvmhv_commence_exit so we know TB is guest TB */
1604+
ld r3, HSTATE_KVM_VCORE(r13)
1605+
mfspr r5,SPRN_DEC
1606+
mftb r6
1607+
/* On P9, if the guest has large decr enabled, don't sign extend */
1608+
BEGIN_FTR_SECTION
1609+
ld r4, VCORE_LPCR(r3)
1610+
andis. r4, r4, LPCR_LD@h
1611+
bne 16f
1612+
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1613+
extsw r5,r5
1614+
16: add r5,r5,r6
1615+
/* r5 is a guest timebase value here, convert to host TB */
1616+
ld r4,VCORE_TB_OFFSET_APPL(r3)
1617+
subf r5,r4,r5
1618+
std r5,VCPU_DEC_EXPIRES(r9)
1619+
16011620
/* Increment exit count, poke other threads to exit */
1621+
mr r3, r12
16021622
bl kvmhv_commence_exit
16031623
nop
16041624
ld r9, HSTATE_KVM_VCPU(r13)
@@ -1639,23 +1659,6 @@ guest_bypass:
16391659
mtspr SPRN_PURR,r3
16401660
mtspr SPRN_SPURR,r4
16411661

1642-
/* Save DEC */
1643-
ld r3, HSTATE_KVM_VCORE(r13)
1644-
mfspr r5,SPRN_DEC
1645-
mftb r6
1646-
/* On P9, if the guest has large decr enabled, don't sign extend */
1647-
BEGIN_FTR_SECTION
1648-
ld r4, VCORE_LPCR(r3)
1649-
andis. r4, r4, LPCR_LD@h
1650-
bne 16f
1651-
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1652-
extsw r5,r5
1653-
16: add r5,r5,r6
1654-
/* r5 is a guest timebase value here, convert to host TB */
1655-
ld r4,VCORE_TB_OFFSET(r3)
1656-
subf r5,r4,r5
1657-
std r5,VCPU_DEC_EXPIRES(r9)
1658-
16591662
BEGIN_FTR_SECTION
16601663
b 8f
16611664
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
@@ -1905,6 +1908,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
19051908
cmpwi cr2, r0, 0
19061909
beq cr2, 4f
19071910

1911+
/*
1912+
* Radix: do eieio; tlbsync; ptesync sequence in case we
1913+
* interrupted the guest between a tlbie and a ptesync.
1914+
*/
1915+
eieio
1916+
tlbsync
1917+
ptesync
1918+
19081919
/* Radix: Handle the case where the guest used an illegal PID */
19091920
LOAD_REG_ADDR(r4, mmu_base_pid)
19101921
lwz r3, VCPU_GUEST_PID(r9)
@@ -2017,9 +2028,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
20172028

20182029
27:
20192030
/* Subtract timebase offset from timebase */
2020-
ld r8,VCORE_TB_OFFSET(r5)
2031+
ld r8, VCORE_TB_OFFSET_APPL(r5)
20212032
cmpdi r8,0
20222033
beq 17f
2034+
li r0, 0
2035+
std r0, VCORE_TB_OFFSET_APPL(r5)
20232036
mftb r6 /* current guest timebase */
20242037
subf r8,r8,r6
20252038
mtspr SPRN_TBU40,r8 /* update upper 40 bits */
@@ -2700,7 +2713,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
27002713
add r3, r3, r5
27012714
ld r4, HSTATE_KVM_VCPU(r13)
27022715
ld r5, HSTATE_KVM_VCORE(r13)
2703-
ld r6, VCORE_TB_OFFSET(r5)
2716+
ld r6, VCORE_TB_OFFSET_APPL(r5)
27042717
subf r3, r6, r3 /* convert to host TB value */
27052718
std r3, VCPU_DEC_EXPIRES(r4)
27062719

@@ -2799,7 +2812,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
27992812
/* Restore guest decrementer */
28002813
ld r3, VCPU_DEC_EXPIRES(r4)
28012814
ld r5, HSTATE_KVM_VCORE(r13)
2802-
ld r6, VCORE_TB_OFFSET(r5)
2815+
ld r6, VCORE_TB_OFFSET_APPL(r5)
28032816
add r3, r3, r6 /* convert host TB to guest TB value */
28042817
mftb r7
28052818
subf r3, r7, r3
@@ -3606,12 +3619,9 @@ kvmppc_fix_pmao:
36063619
*/
36073620
kvmhv_start_timing:
36083621
ld r5, HSTATE_KVM_VCORE(r13)
3609-
lbz r6, VCORE_IN_GUEST(r5)
3610-
cmpwi r6, 0
3611-
beq 5f /* if in guest, need to */
3612-
ld r6, VCORE_TB_OFFSET(r5) /* subtract timebase offset */
3613-
5: mftb r5
3614-
subf r5, r6, r5
3622+
ld r6, VCORE_TB_OFFSET_APPL(r5)
3623+
mftb r5
3624+
subf r5, r6, r5 /* subtract current timebase offset */
36153625
std r3, VCPU_CUR_ACTIVITY(r4)
36163626
std r5, VCPU_ACTIVITY_START(r4)
36173627
blr
@@ -3622,15 +3632,12 @@ kvmhv_start_timing:
36223632
*/
36233633
kvmhv_accumulate_time:
36243634
ld r5, HSTATE_KVM_VCORE(r13)
3625-
lbz r8, VCORE_IN_GUEST(r5)
3626-
cmpwi r8, 0
3627-
beq 4f /* if in guest, need to */
3628-
ld r8, VCORE_TB_OFFSET(r5) /* subtract timebase offset */
3629-
4: ld r5, VCPU_CUR_ACTIVITY(r4)
3635+
ld r8, VCORE_TB_OFFSET_APPL(r5)
3636+
ld r5, VCPU_CUR_ACTIVITY(r4)
36303637
ld r6, VCPU_ACTIVITY_START(r4)
36313638
std r3, VCPU_CUR_ACTIVITY(r4)
36323639
mftb r7
3633-
subf r7, r8, r7
3640+
subf r7, r8, r7 /* subtract current timebase offset */
36343641
std r7, VCPU_ACTIVITY_START(r4)
36353642
cmpdi r5, 0
36363643
beqlr

arch/powerpc/kvm/book3s_xive_template.c

Lines changed: 101 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
#define XGLUE(a,b) a##b
1212
#define GLUE(a,b) XGLUE(a,b)
1313

14+
/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
15+
#define XICS_DUMMY 1
16+
1417
static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
1518
{
1619
u8 cppr;
@@ -205,6 +208,10 @@ static u32 GLUE(X_PFX,scan_interrupts)(struct kvmppc_xive_vcpu *xc,
205208
goto skip_ipi;
206209
}
207210

211+
/* If it's the dummy interrupt, continue searching */
212+
if (hirq == XICS_DUMMY)
213+
goto skip_ipi;
214+
208215
/* If fetching, update queue pointers */
209216
if (scan_type == scan_fetch) {
210217
q->idx = idx;
@@ -385,9 +392,76 @@ static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc)
385392
__x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING);
386393
}
387394

395+
static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive,
396+
struct kvmppc_xive_vcpu *xc)
397+
{
398+
unsigned int prio;
399+
400+
/* For each priority that is now masked */
401+
for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
402+
struct xive_q *q = &xc->queues[prio];
403+
struct kvmppc_xive_irq_state *state;
404+
struct kvmppc_xive_src_block *sb;
405+
u32 idx, toggle, entry, irq, hw_num;
406+
struct xive_irq_data *xd;
407+
__be32 *qpage;
408+
u16 src;
409+
410+
idx = q->idx;
411+
toggle = q->toggle;
412+
qpage = READ_ONCE(q->qpage);
413+
if (!qpage)
414+
continue;
415+
416+
/* For each interrupt in the queue */
417+
for (;;) {
418+
entry = be32_to_cpup(qpage + idx);
419+
420+
/* No more ? */
421+
if ((entry >> 31) == toggle)
422+
break;
423+
irq = entry & 0x7fffffff;
424+
425+
/* Skip dummies and IPIs */
426+
if (irq == XICS_DUMMY || irq == XICS_IPI)
427+
goto next;
428+
sb = kvmppc_xive_find_source(xive, irq, &src);
429+
if (!sb)
430+
goto next;
431+
state = &sb->irq_state[src];
432+
433+
/* Has it been rerouted ? */
434+
if (xc->server_num == state->act_server)
435+
goto next;
436+
437+
/*
438+
* Allright, it *has* been re-routed, kill it from
439+
* the queue.
440+
*/
441+
qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
442+
443+
/* Find the HW interrupt */
444+
kvmppc_xive_select_irq(state, &hw_num, &xd);
445+
446+
/* If it's not an LSI, set PQ to 11 the EOI will force a resend */
447+
if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
448+
GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11);
449+
450+
/* EOI the source */
451+
GLUE(X_PFX,source_eoi)(hw_num, xd);
452+
453+
next:
454+
idx = (idx + 1) & q->msk;
455+
if (idx == 0)
456+
toggle ^= 1;
457+
}
458+
}
459+
}
460+
388461
X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
389462
{
390463
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
464+
struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
391465
u8 old_cppr;
392466

393467
pr_devel("H_CPPR(cppr=%ld)\n", cppr);
@@ -407,14 +481,34 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr)
407481
*/
408482
smp_mb();
409483

410-
/*
411-
* We are masking less, we need to look for pending things
412-
* to deliver and set VP pending bits accordingly to trigger
413-
* a new interrupt otherwise we might miss MFRR changes for
414-
* which we have optimized out sending an IPI signal.
415-
*/
416-
if (cppr > old_cppr)
484+
if (cppr > old_cppr) {
485+
/*
486+
* We are masking less, we need to look for pending things
487+
* to deliver and set VP pending bits accordingly to trigger
488+
* a new interrupt otherwise we might miss MFRR changes for
489+
* which we have optimized out sending an IPI signal.
490+
*/
417491
GLUE(X_PFX,push_pending_to_hw)(xc);
492+
} else {
493+
/*
494+
* We are masking more, we need to check the queue for any
495+
* interrupt that has been routed to another CPU, take
496+
* it out (replace it with the dummy) and retrigger it.
497+
*
498+
* This is necessary since those interrupts may otherwise
499+
* never be processed, at least not until this CPU restores
500+
* its CPPR.
501+
*
502+
* This is in theory racy vs. HW adding new interrupts to
503+
* the queue. In practice this works because the interesting
504+
* cases are when the guest has done a set_xive() to move the
505+
* interrupt away, which flushes the xive, followed by the
506+
* target CPU doing a H_CPPR. So any new interrupt coming into
507+
* the queue must still be routed to us and isn't a source
508+
* of concern.
509+
*/
510+
GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc);
511+
}
418512

419513
/* Apply new CPPR */
420514
xc->hw_cppr = cppr;

arch/s390/kvm/vsie.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -578,7 +578,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
578578

579579
gpa = READ_ONCE(scb_o->itdba) & ~0xffUL;
580580
if (gpa && (scb_s->ecb & ECB_TE)) {
581-
if (!(gpa & ~0x1fffU)) {
581+
if (!(gpa & ~0x1fffUL)) {
582582
rc = set_validity_icpt(scb_s, 0x0080U);
583583
goto unpin;
584584
}

arch/x86/kvm/cpuid.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
495495
entry->ecx &= ~F(PKU);
496496
entry->edx &= kvm_cpuid_7_0_edx_x86_features;
497497
cpuid_mask(&entry->edx, CPUID_7_EDX);
498+
/*
499+
* We emulate ARCH_CAPABILITIES in software even
500+
* if the host doesn't support it.
501+
*/
502+
entry->edx |= F(ARCH_CAPABILITIES);
498503
} else {
499504
entry->ebx = 0;
500505
entry->ecx = 0;

0 commit comments

Comments
 (0)