Skip to content

Commit ebd38b2

Browse files
committed
Merge tag 'kvm-x86-misc-6.16' of https://github.com/kvm-x86/linux into HEAD
KVM x86 misc changes for 6.16: - Unify virtualization of IBRS on nested VM-Exit, and cross-vCPU IBPB, between SVM and VMX. - Advertise support to userspace for WRMSRNS and PREFETCHI. - Rescan I/O APIC routes after handling EOI that needed to be intercepted due to the old/previous routing, but not the new/current routing. - Add a module param to control and enumerate support for device posted interrupts. - Misc cleanups.
2 parents cd1be30 + 37d8bad commit ebd38b2

File tree

19 files changed

+120
-79
lines changed

19 files changed

+120
-79
lines changed

arch/x86/include/asm/cpufeatures.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,7 @@
336336
#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
337337
#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
338338
#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */
339+
#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/
339340
#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */
340341
#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */
341342
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */
@@ -457,6 +458,7 @@
457458
#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */
458459
#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */
459460

461+
#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */
460462
#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
461463
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
462464
#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */

arch/x86/include/asm/kvm_host.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1034,6 +1034,7 @@ struct kvm_vcpu_arch {
10341034

10351035
int pending_ioapic_eoi;
10361036
int pending_external_vector;
1037+
int highest_stale_pending_ioapic_eoi;
10371038

10381039
/* be preempted when it's in kernel-mode(cpl=0) */
10391040
bool preempted_in_kernel;
@@ -1941,6 +1942,7 @@ struct kvm_arch_async_pf {
19411942
extern u32 __read_mostly kvm_nr_uret_msrs;
19421943
extern bool __read_mostly allow_smaller_maxphyaddr;
19431944
extern bool __read_mostly enable_apicv;
1945+
extern bool __read_mostly enable_device_posted_irqs;
19441946
extern struct kvm_x86_ops kvm_x86_ops;
19451947

19461948
#define kvm_x86_call(func) static_call(kvm_x86_##func)
@@ -2444,7 +2446,7 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
24442446

24452447
static inline bool kvm_arch_has_irq_bypass(void)
24462448
{
2447-
return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP);
2449+
return enable_device_posted_irqs;
24482450
}
24492451

24502452
#endif /* _ASM_X86_KVM_HOST_H */

arch/x86/include/asm/msr.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ do { \
300300
#endif /* !CONFIG_PARAVIRT_XXL */
301301

302302
/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */
303-
#define WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
303+
#define ASM_WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
304304

305305
/* Non-serializing WRMSR, when available. Falls back to a serializing WRMSR. */
306306
static __always_inline void wrmsrns(u32 msr, u64 val)
@@ -309,7 +309,7 @@ static __always_inline void wrmsrns(u32 msr, u64 val)
309309
* WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant
310310
* DS prefix to avoid a trailing NOP.
311311
*/
312-
asm volatile("1: " ALTERNATIVE("ds wrmsr", WRMSRNS, X86_FEATURE_WRMSRNS)
312+
asm volatile("1: " ALTERNATIVE("ds wrmsr", ASM_WRMSRNS, X86_FEATURE_WRMSRNS)
313313
"2: " _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
314314
: : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)));
315315
}

arch/x86/kvm/cpuid.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -978,6 +978,7 @@ void kvm_set_cpu_caps(void)
978978
F(FZRM),
979979
F(FSRS),
980980
F(FSRC),
981+
F(WRMSRNS),
981982
F(AMX_FP16),
982983
F(AVX_IFMA),
983984
F(LAM),
@@ -1093,6 +1094,7 @@ void kvm_set_cpu_caps(void)
10931094
F(AMD_SSB_NO),
10941095
F(AMD_STIBP),
10951096
F(AMD_STIBP_ALWAYS_ON),
1097+
F(AMD_IBRS_SAME_MODE),
10961098
F(AMD_PSFD),
10971099
F(AMD_IBPB_RET),
10981100
);
@@ -1150,6 +1152,7 @@ void kvm_set_cpu_caps(void)
11501152

11511153
kvm_cpu_cap_init(CPUID_8000_0021_EAX,
11521154
F(NO_NESTED_DATA_BP),
1155+
F(WRMSR_XX_BASE_NS),
11531156
/*
11541157
* Synthesize "LFENCE is serializing" into the AMD-defined entry
11551158
* in KVM's supported CPUID, i.e. if the feature is reported as
@@ -1163,10 +1166,13 @@ void kvm_set_cpu_caps(void)
11631166
SYNTHESIZED_F(LFENCE_RDTSC),
11641167
/* SmmPgCfgLock */
11651168
F(NULL_SEL_CLR_BASE),
1169+
/* UpperAddressIgnore */
11661170
F(AUTOIBRS),
1171+
F(PREFETCHI),
11671172
EMULATED_F(NO_SMM_CTL_MSR),
11681173
/* PrefetchCtlMsr */
1169-
F(WRMSR_XX_BASE_NS),
1174+
/* GpOnUserCpuid */
1175+
/* EPSF */
11701176
SYNTHESIZED_F(SBPB),
11711177
SYNTHESIZED_F(IBPB_BRTYPE),
11721178
SYNTHESIZED_F(SRSO_NO),

arch/x86/kvm/ioapic.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -296,11 +296,8 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
296296
index == RTC_GSI) {
297297
u16 dm = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode);
298298

299-
if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
300-
e->fields.dest_id, dm) ||
301-
kvm_apic_pending_eoi(vcpu, e->fields.vector))
302-
__set_bit(e->fields.vector,
303-
ioapic_handled_vectors);
299+
kvm_scan_ioapic_irq(vcpu, e->fields.dest_id, dm,
300+
e->fields.vector, ioapic_handled_vectors);
304301
}
305302
}
306303
spin_unlock(&ioapic->lock);

arch/x86/kvm/ioapic.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,4 +120,6 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
120120
ulong *ioapic_handled_vectors);
121121
void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
122122
ulong *ioapic_handled_vectors);
123+
void kvm_scan_ioapic_irq(struct kvm_vcpu *vcpu, u32 dest_id, u16 dest_mode,
124+
u8 vector, unsigned long *ioapic_handled_vectors);
123125
#endif

arch/x86/kvm/irq_comm.c

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,33 @@ void kvm_arch_post_irq_routing_update(struct kvm *kvm)
402402
kvm_make_scan_ioapic_request(kvm);
403403
}
404404

405+
void kvm_scan_ioapic_irq(struct kvm_vcpu *vcpu, u32 dest_id, u16 dest_mode,
406+
u8 vector, unsigned long *ioapic_handled_vectors)
407+
{
408+
/*
409+
* Intercept EOI if the vCPU is the target of the new IRQ routing, or
410+
* the vCPU has a pending IRQ from the old routing, i.e. if the vCPU
411+
* may receive a level-triggered IRQ in the future, or already received
412+
* level-triggered IRQ. The EOI needs to be intercepted and forwarded
413+
* to I/O APIC emulation so that the IRQ can be de-asserted.
414+
*/
415+
if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, dest_id, dest_mode)) {
416+
__set_bit(vector, ioapic_handled_vectors);
417+
} else if (kvm_apic_pending_eoi(vcpu, vector)) {
418+
__set_bit(vector, ioapic_handled_vectors);
419+
420+
/*
421+
* Track the highest pending EOI for which the vCPU is NOT the
422+
* target in the new routing. Only the EOI for the IRQ that is
423+
* in-flight (for the old routing) needs to be intercepted, any
424+
* future IRQs that arrive on this vCPU will be coincidental to
425+
* the level-triggered routing and don't need to be intercepted.
426+
*/
427+
if ((int)vector > vcpu->arch.highest_stale_pending_ioapic_eoi)
428+
vcpu->arch.highest_stale_pending_ioapic_eoi = vector;
429+
}
430+
}
431+
405432
void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
406433
ulong *ioapic_handled_vectors)
407434
{
@@ -424,11 +451,11 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
424451

425452
kvm_set_msi_irq(vcpu->kvm, entry, &irq);
426453

427-
if (irq.trig_mode &&
428-
(kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT,
429-
irq.dest_id, irq.dest_mode) ||
430-
kvm_apic_pending_eoi(vcpu, irq.vector)))
431-
__set_bit(irq.vector, ioapic_handled_vectors);
454+
if (!irq.trig_mode)
455+
continue;
456+
457+
kvm_scan_ioapic_irq(vcpu, irq.dest_id, irq.dest_mode,
458+
irq.vector, ioapic_handled_vectors);
432459
}
433460
}
434461
srcu_read_unlock(&kvm->irq_srcu, idx);

arch/x86/kvm/lapic.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1459,6 +1459,14 @@ static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
14591459
if (!kvm_ioapic_handles_vector(apic, vector))
14601460
return;
14611461

1462+
/*
1463+
* If the intercepted EOI is for an IRQ that was pending from previous
1464+
* routing, then re-scan the I/O APIC routes as EOIs for the IRQ likely
1465+
* no longer need to be intercepted.
1466+
*/
1467+
if (apic->vcpu->arch.highest_stale_pending_ioapic_eoi == vector)
1468+
kvm_make_request(KVM_REQ_SCAN_IOAPIC, apic->vcpu);
1469+
14621470
/* Request a KVM exit to inform the userspace IOAPIC. */
14631471
if (irqchip_split(apic->vcpu->kvm)) {
14641472
apic->vcpu->arch.pending_ioapic_eoi = vector;

arch/x86/kvm/svm/nested.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,6 +1041,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
10411041

10421042
nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
10431043

1044+
kvm_nested_vmexit_handle_ibrs(vcpu);
1045+
10441046
svm_switch_vmcb(svm, &svm->vmcb01);
10451047

10461048
/*

arch/x86/kvm/svm/svm.c

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,8 @@ module_param(tsc_scaling, int, 0444);
231231
static bool avic;
232232
module_param(avic, bool, 0444);
233233

234+
module_param(enable_device_posted_irqs, bool, 0444);
235+
234236
bool __read_mostly dump_invalid_vmcb;
235237
module_param(dump_invalid_vmcb, bool, 0644);
236238

@@ -1484,25 +1486,10 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
14841486
return err;
14851487
}
14861488

1487-
static void svm_clear_current_vmcb(struct vmcb *vmcb)
1488-
{
1489-
int i;
1490-
1491-
for_each_online_cpu(i)
1492-
cmpxchg(per_cpu_ptr(&svm_data.current_vmcb, i), vmcb, NULL);
1493-
}
1494-
14951489
static void svm_vcpu_free(struct kvm_vcpu *vcpu)
14961490
{
14971491
struct vcpu_svm *svm = to_svm(vcpu);
14981492

1499-
/*
1500-
* The vmcb page can be recycled, causing a false negative in
1501-
* svm_vcpu_load(). So, ensure that no logical CPU has this
1502-
* vmcb page recorded as its current vmcb.
1503-
*/
1504-
svm_clear_current_vmcb(svm->vmcb);
1505-
15061493
svm_leave_nested(vcpu);
15071494
svm_free_nested(svm);
15081495

@@ -1616,19 +1603,9 @@ static void svm_prepare_host_switch(struct kvm_vcpu *vcpu)
16161603

16171604
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
16181605
{
1619-
struct vcpu_svm *svm = to_svm(vcpu);
1620-
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
1621-
16221606
if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm))
16231607
shrink_ple_window(vcpu);
16241608

1625-
if (sd->current_vmcb != svm->vmcb) {
1626-
sd->current_vmcb = svm->vmcb;
1627-
1628-
if (!cpu_feature_enabled(X86_FEATURE_IBPB_ON_VMEXIT) &&
1629-
static_branch_likely(&switch_vcpu_ibpb))
1630-
indirect_branch_prediction_barrier();
1631-
}
16321609
if (kvm_vcpu_apicv_active(vcpu))
16331610
avic_vcpu_load(vcpu, cpu);
16341611
}

0 commit comments

Comments
 (0)