Skip to content

Commit d05ca6b

Browse files
committed
Merge tag 'kvm-x86-misc-6.18' of https://github.com/kvm-x86/linux into HEAD
KVM x86 changes for 6.18 - Don't (re)check L1 intercepts when completing userspace I/O to fix a flaw where a misbehaving usersepace (a.k.a. syzkaller) could swizzle L1's intercepts and trigger a variety of WARNs in KVM. - Emulate PERF_CNTR_GLOBAL_STATUS_SET for PerfMonV2 guests, as the MSR is supposed to exist for v2 PMUs. - Allow Centaur CPU leaves (base 0xC000_0000) for Zhaoxin CPUs. - Clean up KVM's vector hashing code for delivering lowest priority IRQs. - Clean up the fastpath handler code to only handle IPIs and WRMSRs that are actually "fast", as opposed to handling those that KVM _hopes_ are fast, and in the process of doing so add fastpath support for TSC_DEADLINE writes on AMD CPUs. - Clean up a pile of PMU code in anticipation of adding support for mediated vPMUs. - Add support for the immediate forms of RDMSR and WRMSRNS, sans full emulator support (KVM should never need to emulate the MSRs outside of forced emulation and other contrived testing scenarios). - Clean up the MSR APIs in preparation for CET and FRED virtualization, as well as mediated vPMU support. - Rejecting a fully in-kernel IRQCHIP if EOIs are protected, i.e. for TDX VMs, as KVM can't faithfully emulate an I/O APIC for such guests. - KVM_REQ_MSR_FILTER_CHANGED into a generic RECALC_INTERCEPTS in preparation for mediated vPMU support, as KVM will need to recalculate MSR intercepts in response to PMU refreshes for guests with mediated vPMUs. - Misc cleanups and minor fixes.
2 parents 10ef74c + 86bcd23 commit d05ca6b

File tree

34 files changed

+711
-516
lines changed

34 files changed

+711
-516
lines changed

Documentation/virt/kvm/api.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3075,6 +3075,12 @@ This IOCTL replaces the obsolete KVM_GET_PIT.
30753075
Sets the state of the in-kernel PIT model. Only valid after KVM_CREATE_PIT2.
30763076
See KVM_GET_PIT2 for details on struct kvm_pit_state2.
30773077

3078+
.. Tip::
3079+
``KVM_SET_PIT2`` strictly adheres to the spec of Intel 8254 PIT. For example,
3080+
a ``count`` value of 0 in ``struct kvm_pit_channel_state`` is interpreted as
3081+
65536, which is the maximum count value. Refer to `Intel 8254 programmable
3082+
interval timer <https://www.scs.stanford.edu/10wi-cs140/pintos/specs/8254.pdf>`_.
3083+
30783084
This IOCTL replaces the obsolete KVM_SET_PIT.
30793085

30803086

Documentation/virt/kvm/x86/hypercalls.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ compute the CLOCK_REALTIME for its clock, at the same instant.
137137
Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
138138
or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
139139

140-
6. KVM_HC_SEND_IPI
140+
7. KVM_HC_SEND_IPI
141141
------------------
142142

143143
:Architecture: x86
@@ -158,7 +158,7 @@ corresponds to the APIC ID a2+1, and so on.
158158

159159
Returns the number of CPUs to which the IPIs were delivered successfully.
160160

161-
7. KVM_HC_SCHED_YIELD
161+
8. KVM_HC_SCHED_YIELD
162162
---------------------
163163

164164
:Architecture: x86
@@ -170,7 +170,7 @@ a0: destination APIC ID
170170
:Usage example: When sending a call-function IPI-many to vCPUs, yield if
171171
any of the IPI target vCPUs was preempted.
172172

173-
8. KVM_HC_MAP_GPA_RANGE
173+
9. KVM_HC_MAP_GPA_RANGE
174174
-------------------------
175175
:Architecture: x86
176176
:Status: active

arch/x86/include/asm/cpufeatures.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,7 @@
497497
#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */
498498
#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */
499499
#define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */
500+
#define X86_FEATURE_MSR_IMM (21*32+15) /* MSR immediate form instructions */
500501

501502
/*
502503
* BUG word(s)

arch/x86/include/asm/kvm-x86-ops.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ KVM_X86_OP(check_emulate_instruction)
138138
KVM_X86_OP(apic_init_signal_blocked)
139139
KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush)
140140
KVM_X86_OP_OPTIONAL(migrate_timers)
141-
KVM_X86_OP(recalc_msr_intercepts)
141+
KVM_X86_OP(recalc_intercepts)
142142
KVM_X86_OP(complete_emulated_msr)
143143
KVM_X86_OP(vcpu_deliver_sipi_vector)
144144
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);

arch/x86/include/asm/kvm_host.h

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@
120120
#define KVM_REQ_TLB_FLUSH_GUEST \
121121
KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
122122
#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
123-
#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
123+
#define KVM_REQ_RECALC_INTERCEPTS KVM_ARCH_REQ(29)
124124
#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
125125
KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
126126
#define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \
@@ -545,10 +545,10 @@ struct kvm_pmc {
545545
#define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \
546546
KVM_MAX_NR_AMD_GP_COUNTERS)
547547

548-
#define KVM_MAX_NR_INTEL_FIXED_COUTNERS 3
549-
#define KVM_MAX_NR_AMD_FIXED_COUTNERS 0
550-
#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUTNERS, \
551-
KVM_MAX_NR_AMD_FIXED_COUTNERS)
548+
#define KVM_MAX_NR_INTEL_FIXED_COUNTERS 3
549+
#define KVM_MAX_NR_AMD_FIXED_COUNTERS 0
550+
#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUNTERS, \
551+
KVM_MAX_NR_AMD_FIXED_COUNTERS)
552552

553553
struct kvm_pmu {
554554
u8 version;
@@ -579,6 +579,9 @@ struct kvm_pmu {
579579
DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
580580
DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
581581

582+
DECLARE_BITMAP(pmc_counting_instructions, X86_PMC_IDX_MAX);
583+
DECLARE_BITMAP(pmc_counting_branches, X86_PMC_IDX_MAX);
584+
582585
u64 ds_area;
583586
u64 pebs_enable;
584587
u64 pebs_enable_rsvd;
@@ -771,6 +774,7 @@ enum kvm_only_cpuid_leafs {
771774
CPUID_7_2_EDX,
772775
CPUID_24_0_EBX,
773776
CPUID_8000_0021_ECX,
777+
CPUID_7_1_ECX,
774778
NR_KVM_CPU_CAPS,
775779

776780
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
@@ -926,6 +930,7 @@ struct kvm_vcpu_arch {
926930
bool emulate_regs_need_sync_from_vcpu;
927931
int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
928932
unsigned long cui_linear_rip;
933+
int cui_rdmsr_imm_reg;
929934

930935
gpa_t time;
931936
s8 pvclock_tsc_shift;
@@ -1381,6 +1386,7 @@ struct kvm_arch {
13811386
u8 vm_type;
13821387
bool has_private_mem;
13831388
bool has_protected_state;
1389+
bool has_protected_eoi;
13841390
bool pre_fault_allowed;
13851391
struct hlist_head *mmu_page_hash;
13861392
struct list_head active_mmu_pages;
@@ -1921,7 +1927,7 @@ struct kvm_x86_ops {
19211927
int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu);
19221928

19231929
void (*migrate_timers)(struct kvm_vcpu *vcpu);
1924-
void (*recalc_msr_intercepts)(struct kvm_vcpu *vcpu);
1930+
void (*recalc_intercepts)(struct kvm_vcpu *vcpu);
19251931
int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
19261932

19271933
void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
@@ -2162,13 +2168,16 @@ void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa);
21622168

21632169
void kvm_enable_efer_bits(u64);
21642170
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
2165-
int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data);
2166-
int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data);
2167-
int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated);
2168-
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
2169-
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
2171+
int kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data);
2172+
int kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data);
2173+
int __kvm_emulate_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data);
2174+
int __kvm_emulate_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data);
2175+
int kvm_msr_read(struct kvm_vcpu *vcpu, u32 index, u64 *data);
2176+
int kvm_msr_write(struct kvm_vcpu *vcpu, u32 index, u64 data);
21702177
int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
2178+
int kvm_emulate_rdmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg);
21712179
int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
2180+
int kvm_emulate_wrmsr_imm(struct kvm_vcpu *vcpu, u32 msr, int reg);
21722181
int kvm_emulate_as_nop(struct kvm_vcpu *vcpu);
21732182
int kvm_emulate_invd(struct kvm_vcpu *vcpu);
21742183
int kvm_emulate_mwait(struct kvm_vcpu *vcpu);

arch/x86/include/asm/msr-index.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -315,12 +315,15 @@
315315
#define PERF_CAP_PT_IDX 16
316316

317317
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
318-
#define PERF_CAP_PEBS_TRAP BIT_ULL(6)
319-
#define PERF_CAP_ARCH_REG BIT_ULL(7)
320-
#define PERF_CAP_PEBS_FORMAT 0xf00
321-
#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
322-
#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
323-
PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE)
318+
319+
#define PERF_CAP_LBR_FMT 0x3f
320+
#define PERF_CAP_PEBS_TRAP BIT_ULL(6)
321+
#define PERF_CAP_ARCH_REG BIT_ULL(7)
322+
#define PERF_CAP_PEBS_FORMAT 0xf00
323+
#define PERF_CAP_FW_WRITES BIT_ULL(13)
324+
#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
325+
#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
326+
PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE)
324327

325328
#define MSR_IA32_RTIT_CTL 0x00000570
326329
#define RTIT_CTL_TRACEEN BIT(0)
@@ -733,6 +736,7 @@
733736
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
734737
#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
735738
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
739+
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET 0xc0000303
736740

737741
/* AMD Hardware Feedback Support MSRs */
738742
#define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500

arch/x86/include/uapi/asm/vmx.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@
9494
#define EXIT_REASON_BUS_LOCK 74
9595
#define EXIT_REASON_NOTIFY 75
9696
#define EXIT_REASON_TDCALL 77
97+
#define EXIT_REASON_MSR_READ_IMM 84
98+
#define EXIT_REASON_MSR_WRITE_IMM 85
9799

98100
#define VMX_EXIT_REASONS \
99101
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@@ -158,7 +160,9 @@
158160
{ EXIT_REASON_TPAUSE, "TPAUSE" }, \
159161
{ EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \
160162
{ EXIT_REASON_NOTIFY, "NOTIFY" }, \
161-
{ EXIT_REASON_TDCALL, "TDCALL" }
163+
{ EXIT_REASON_TDCALL, "TDCALL" }, \
164+
{ EXIT_REASON_MSR_READ_IMM, "MSR_READ_IMM" }, \
165+
{ EXIT_REASON_MSR_WRITE_IMM, "MSR_WRITE_IMM" }
162166

163167
#define VMX_EXIT_REASON_FLAGS \
164168
{ VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }

arch/x86/kernel/cpu/scattered.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ static const struct cpuid_bit cpuid_bits[] = {
2727
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
2828
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
2929
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
30+
{ X86_FEATURE_MSR_IMM, CPUID_ECX, 5, 0x00000007, 1 },
3031
{ X86_FEATURE_APX, CPUID_EDX, 21, 0x00000007, 1 },
3132
{ X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
3233
{ X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 },

arch/x86/kvm/cpuid.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,8 @@ void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
448448
* adjustments to the reserved GPA bits.
449449
*/
450450
kvm_mmu_after_set_cpuid(vcpu);
451+
452+
kvm_make_request(KVM_REQ_RECALC_INTERCEPTS, vcpu);
451453
}
452454

453455
int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
@@ -985,6 +987,10 @@ void kvm_set_cpu_caps(void)
985987
F(LAM),
986988
);
987989

990+
kvm_cpu_cap_init(CPUID_7_1_ECX,
991+
SCATTERED_F(MSR_IMM),
992+
);
993+
988994
kvm_cpu_cap_init(CPUID_7_1_EDX,
989995
F(AVX_VNNI_INT8),
990996
F(AVX_NE_CONVERT),
@@ -1411,9 +1417,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
14111417
goto out;
14121418

14131419
cpuid_entry_override(entry, CPUID_7_1_EAX);
1420+
cpuid_entry_override(entry, CPUID_7_1_ECX);
14141421
cpuid_entry_override(entry, CPUID_7_1_EDX);
14151422
entry->ebx = 0;
1416-
entry->ecx = 0;
14171423
}
14181424
if (max_idx >= 2) {
14191425
entry = do_host_cpuid(array, function, 2);
@@ -1820,7 +1826,8 @@ static int get_cpuid_func(struct kvm_cpuid_array *array, u32 func,
18201826
int r;
18211827

18221828
if (func == CENTAUR_CPUID_SIGNATURE &&
1823-
boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR)
1829+
boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR &&
1830+
boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN)
18241831
return 0;
18251832

18261833
r = do_cpuid_func(array, func, type);
@@ -2001,7 +2008,7 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
20012008
if (function == 7 && index == 0) {
20022009
u64 data;
20032010
if ((*ebx & (feature_bit(RTM) | feature_bit(HLE))) &&
2004-
!__kvm_get_msr(vcpu, MSR_IA32_TSX_CTRL, &data, true) &&
2011+
!kvm_msr_read(vcpu, MSR_IA32_TSX_CTRL, &data) &&
20052012
(data & TSX_CTRL_CPUID_CLEAR))
20062013
*ebx &= ~(feature_bit(RTM) | feature_bit(HLE));
20072014
} else if (function == 0x80000007) {

arch/x86/kvm/emulate.c

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4330,8 +4330,8 @@ static const struct opcode opcode_table[256] = {
43304330
I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
43314331
G(ByteOp, group11), G(0, group11),
43324332
/* 0xC8 - 0xCF */
4333-
I(Stack | SrcImmU16 | Src2ImmByte | IsBranch, em_enter),
4334-
I(Stack | IsBranch, em_leave),
4333+
I(Stack | SrcImmU16 | Src2ImmByte, em_enter),
4334+
I(Stack, em_leave),
43354335
I(ImplicitOps | SrcImmU16 | IsBranch, em_ret_far_imm),
43364336
I(ImplicitOps | IsBranch, em_ret_far),
43374337
D(ImplicitOps | IsBranch), DI(SrcImmByte | IsBranch, intn),
@@ -5107,12 +5107,11 @@ void init_decode_cache(struct x86_emulate_ctxt *ctxt)
51075107
ctxt->mem_read.end = 0;
51085108
}
51095109

5110-
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
5110+
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, bool check_intercepts)
51115111
{
51125112
const struct x86_emulate_ops *ops = ctxt->ops;
51135113
int rc = X86EMUL_CONTINUE;
51145114
int saved_dst_type = ctxt->dst.type;
5115-
bool is_guest_mode = ctxt->ops->is_guest_mode(ctxt);
51165115

51175116
ctxt->mem_read.pos = 0;
51185117

@@ -5160,7 +5159,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
51605159
fetch_possible_mmx_operand(&ctxt->dst);
51615160
}
51625161

5163-
if (unlikely(is_guest_mode) && ctxt->intercept) {
5162+
if (unlikely(check_intercepts) && ctxt->intercept) {
51645163
rc = emulator_check_intercept(ctxt, ctxt->intercept,
51655164
X86_ICPT_PRE_EXCEPT);
51665165
if (rc != X86EMUL_CONTINUE)
@@ -5189,7 +5188,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
51895188
goto done;
51905189
}
51915190

5192-
if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) {
5191+
if (unlikely(check_intercepts) && (ctxt->d & Intercept)) {
51935192
rc = emulator_check_intercept(ctxt, ctxt->intercept,
51945193
X86_ICPT_POST_EXCEPT);
51955194
if (rc != X86EMUL_CONTINUE)
@@ -5243,7 +5242,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
52435242

52445243
special_insn:
52455244

5246-
if (unlikely(is_guest_mode) && (ctxt->d & Intercept)) {
5245+
if (unlikely(check_intercepts) && (ctxt->d & Intercept)) {
52475246
rc = emulator_check_intercept(ctxt, ctxt->intercept,
52485247
X86_ICPT_POST_MEMACCESS);
52495248
if (rc != X86EMUL_CONTINUE)

0 commit comments

Comments
 (0)