Skip to content

Commit 039aeb9

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "ARM: - Move the arch-specific code into arch/arm64/kvm - Start the post-32bit cleanup - Cherry-pick a few non-invasive pre-NV patches x86: - Rework of TLB flushing - Rework of event injection, especially with respect to nested virtualization - Nested AMD event injection facelift, building on the rework of generic code and fixing a lot of corner cases - Nested AMD live migration support - Optimization for TSC deadline MSR writes and IPIs - Various cleanups - Asynchronous page fault cleanups (from tglx, common topic branch with tip tree) - Interrupt-based delivery of asynchronous "page ready" events (host side) - Hyper-V MSRs and hypercalls for guest debugging - VMX preemption timer fixes s390: - Cleanups Generic: - switch vCPU thread wakeup from swait to rcuwait The other architectures, and the guest side of the asynchronous page fault work, will come next week" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (256 commits) KVM: selftests: fix rdtsc() for vmx_tsc_adjust_test KVM: check userspace_addr for all memslots KVM: selftests: update hyperv_cpuid with SynDBG tests x86/kvm/hyper-v: Add support for synthetic debugger via hypercalls x86/kvm/hyper-v: enable hypercalls regardless of hypercall page x86/kvm/hyper-v: Add support for synthetic debugger interface x86/hyper-v: Add synthetic debugger definitions KVM: selftests: VMX preemption timer migration test KVM: nVMX: Fix VMX preemption timer migration x86/kvm/hyper-v: Explicitly align hcall param for kvm_hyperv_exit KVM: x86/pmu: Support full width counting KVM: x86/pmu: Tweak kvm_pmu_get_msr to pass 'struct msr_data' in KVM: x86: announce KVM_FEATURE_ASYNC_PF_INT KVM: x86: acknowledgment mechanism for async pf page ready notifications KVM: x86: interrupt based APF 'page ready' event delivery KVM: introduce kvm_read_guest_offset_cached() KVM: rename kvm_arch_can_inject_async_page_present() to kvm_arch_can_dequeue_async_page_present() KVM: x86: extend struct kvm_vcpu_pv_apf_data with token info Revert "KVM: async_pf: Fix #DF due to inject "Page not Present" and "Page Ready" exceptions simultaneously" KVM: VMX: Replace zero-length array with flexible-array ...
2 parents 6b2591c + 13ffbd8 commit 039aeb9

File tree

155 files changed

+5309
-3048
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

155 files changed

+5309
-3048
lines changed

Documentation/virt/kvm/api.rst

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4336,9 +4336,13 @@ Errors:
43364336
#define KVM_STATE_NESTED_VMX_SMM_GUEST_MODE 0x00000001
43374337
#define KVM_STATE_NESTED_VMX_SMM_VMXON 0x00000002
43384338

4339+
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
4340+
43394341
struct kvm_vmx_nested_state_hdr {
4342+
__u32 flags;
43404343
__u64 vmxon_pa;
43414344
__u64 vmcs12_pa;
4345+
__u64 preemption_timer_deadline;
43424346

43434347
struct {
43444348
__u16 flags;
@@ -5068,10 +5072,13 @@ EOI was received.
50685072
struct kvm_hyperv_exit {
50695073
#define KVM_EXIT_HYPERV_SYNIC 1
50705074
#define KVM_EXIT_HYPERV_HCALL 2
5075+
#define KVM_EXIT_HYPERV_SYNDBG 3
50715076
__u32 type;
5077+
__u32 pad1;
50725078
union {
50735079
struct {
50745080
__u32 msr;
5081+
__u32 pad2;
50755082
__u64 control;
50765083
__u64 evt_page;
50775084
__u64 msg_page;
@@ -5081,6 +5088,15 @@ EOI was received.
50815088
__u64 result;
50825089
__u64 params[2];
50835090
} hcall;
5091+
struct {
5092+
__u32 msr;
5093+
__u32 pad2;
5094+
__u64 control;
5095+
__u64 status;
5096+
__u64 send_page;
5097+
__u64 recv_page;
5098+
__u64 pending_page;
5099+
} syndbg;
50845100
} u;
50855101
};
50865102
/* KVM_EXIT_HYPERV */
@@ -5097,6 +5113,12 @@ Hyper-V SynIC state change. Notification is used to remap SynIC
50975113
event/message pages and to enable/disable SynIC messages/events processing
50985114
in userspace.
50995115

5116+
- KVM_EXIT_HYPERV_SYNDBG -- synchronously notify user-space about
5117+
5118+
Hyper-V Synthetic debugger state change. Notification is used to either update
5119+
the pending_page location or to send a control command (send the buffer located
5120+
in send_page or recv a buffer to recv_page).
5121+
51005122
::
51015123

51025124
/* KVM_EXIT_ARM_NISV */
@@ -5779,7 +5801,7 @@ will be initialized to 1 when created. This also improves performance because
57795801
dirty logging can be enabled gradually in small chunks on the first call
57805802
to KVM_CLEAR_DIRTY_LOG. KVM_DIRTY_LOG_INITIALLY_SET depends on
57815803
KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (it is also only available on
5782-
x86 for now).
5804+
x86 and arm64 for now).
57835805

57845806
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name
57855807
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make
@@ -5804,6 +5826,23 @@ If present, this capability can be enabled for a VM, meaning that KVM
58045826
will allow the transition to secure guest mode. Otherwise KVM will
58055827
veto the transition.
58065828

5829+
7.20 KVM_CAP_HALT_POLL
5830+
----------------------
5831+
5832+
:Architectures: all
5833+
:Target: VM
5834+
:Parameters: args[0] is the maximum poll time in nanoseconds
5835+
:Returns: 0 on success; -1 on error
5836+
5837+
This capability overrides the kvm module parameter halt_poll_ns for the
5838+
target VM.
5839+
5840+
VCPU polling allows a VCPU to poll for wakeup events instead of immediately
5841+
scheduling during guest halts. The maximum time a VCPU can spend polling is
5842+
controlled by the kvm module parameter halt_poll_ns. This capability allows
5843+
the maximum halt time to specified on a per-VM basis, effectively overriding
5844+
the module parameter for the target VM.
5845+
58075846
8. Other capabilities.
58085847
======================
58095848

Documentation/virt/kvm/cpuid.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ KVM_FEATURE_NOP_IO_DELAY 1 not necessary to perform delays
5050
KVM_FEATURE_MMU_OP 2 deprecated
5151

5252
KVM_FEATURE_CLOCKSOURCE2 3 kvmclock available at msrs
53-
5453
0x4b564d00 and 0x4b564d01
54+
5555
KVM_FEATURE_ASYNC_PF 4 async pf can be enabled by
5656
writing to msr 0x4b564d02
5757

@@ -86,6 +86,12 @@ KVM_FEATURE_PV_SCHED_YIELD 13 guest checks this feature bit
8686
before using paravirtualized
8787
sched yield.
8888

89+
KVM_FEATURE_ASYNC_PF_INT 14 guest checks this feature bit
90+
before using the second async
91+
pf control msr 0x4b564d06 and
92+
async pf acknowledgment msr
93+
0x4b564d07.
94+
8995
KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24 host will warn if no guest-side
9096
per-cpu warps are expeced in
9197
kvmclock

Documentation/virt/kvm/msr.rst

Lines changed: 88 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -190,41 +190,72 @@ MSR_KVM_ASYNC_PF_EN:
190190
0x4b564d02
191191

192192
data:
193-
Bits 63-6 hold 64-byte aligned physical address of a
194-
64 byte memory area which must be in guest RAM and must be
195-
zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
196-
when asynchronous page faults are enabled on the vcpu 0 when
197-
disabled. Bit 1 is 1 if asynchronous page faults can be injected
198-
when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
199-
are delivered to L1 as #PF vmexits. Bit 2 can be set only if
200-
KVM_FEATURE_ASYNC_PF_VMEXIT is present in CPUID.
201-
202-
First 4 byte of 64 byte memory location will be written to by
203-
the hypervisor at the time of asynchronous page fault (APF)
204-
injection to indicate type of asynchronous page fault. Value
205-
of 1 means that the page referred to by the page fault is not
206-
present. Value 2 means that the page is now available. Disabling
207-
interrupt inhibits APFs. Guest must not enable interrupt
208-
before the reason is read, or it may be overwritten by another
209-
APF. Since APF uses the same exception vector as regular page
210-
fault guest must reset the reason to 0 before it does
211-
something that can generate normal page fault. If during page
212-
fault APF reason is 0 it means that this is regular page
213-
fault.
214-
215-
During delivery of type 1 APF cr2 contains a token that will
216-
be used to notify a guest when missing page becomes
217-
available. When page becomes available type 2 APF is sent with
218-
cr2 set to the token associated with the page. There is special
219-
kind of token 0xffffffff which tells vcpu that it should wake
220-
up all processes waiting for APFs and no individual type 2 APFs
221-
will be sent.
193+
Asynchronous page fault (APF) control MSR.
194+
195+
Bits 63-6 hold 64-byte aligned physical address of a 64 byte memory area
196+
which must be in guest RAM and must be zeroed. This memory is expected
197+
to hold a copy of the following structure::
198+
199+
struct kvm_vcpu_pv_apf_data {
200+
/* Used for 'page not present' events delivered via #PF */
201+
__u32 flags;
202+
203+
/* Used for 'page ready' events delivered via interrupt notification */
204+
__u32 token;
205+
206+
__u8 pad[56];
207+
__u32 enabled;
208+
};
209+
210+
Bits 5-4 of the MSR are reserved and should be zero. Bit 0 is set to 1
211+
when asynchronous page faults are enabled on the vcpu, 0 when disabled.
212+
Bit 1 is 1 if asynchronous page faults can be injected when vcpu is in
213+
cpl == 0. Bit 2 is 1 if asynchronous page faults are delivered to L1 as
214+
#PF vmexits. Bit 2 can be set only if KVM_FEATURE_ASYNC_PF_VMEXIT is
215+
present in CPUID. Bit 3 enables interrupt based delivery of 'page ready'
216+
events. Bit 3 can only be set if KVM_FEATURE_ASYNC_PF_INT is present in
217+
CPUID.
218+
219+
'Page not present' events are currently always delivered as synthetic
220+
#PF exception. During delivery of these events APF CR2 register contains
221+
a token that will be used to notify the guest when missing page becomes
222+
available. Also, to make it possible to distinguish between real #PF and
223+
APF, first 4 bytes of 64 byte memory location ('flags') will be written
224+
to by the hypervisor at the time of injection. Only first bit of 'flags'
225+
is currently supported, when set, it indicates that the guest is dealing
226+
with asynchronous 'page not present' event. If during a page fault APF
227+
'flags' is '0' it means that this is regular page fault. Guest is
228+
supposed to clear 'flags' when it is done handling #PF exception so the
229+
next event can be delivered.
230+
231+
Note, since APF 'page not present' events use the same exception vector
232+
as regular page fault, guest must reset 'flags' to '0' before it does
233+
something that can generate normal page fault.
234+
235+
Bytes 5-7 of 64 byte memory location ('token') will be written to by the
236+
hypervisor at the time of APF 'page ready' event injection. The content
237+
of these bytes is a token which was previously delivered as 'page not
238+
present' event. The event indicates the page in now available. Guest is
239+
supposed to write '0' to 'token' when it is done handling 'page ready'
240+
event and to write 1' to MSR_KVM_ASYNC_PF_ACK after clearing the location;
241+
writing to the MSR forces KVM to re-scan its queue and deliver the next
242+
pending notification.
243+
244+
Note, MSR_KVM_ASYNC_PF_INT MSR specifying the interrupt vector for 'page
245+
ready' APF delivery needs to be written to before enabling APF mechanism
246+
in MSR_KVM_ASYNC_PF_EN or interrupt #0 can get injected. The MSR is
247+
available if KVM_FEATURE_ASYNC_PF_INT is present in CPUID.
248+
249+
Note, previously, 'page ready' events were delivered via the same #PF
250+
exception as 'page not present' events but this is now deprecated. If
251+
bit 3 (interrupt based delivery) is not set APF events are not delivered.
222252

223253
If APF is disabled while there are outstanding APFs, they will
224254
not be delivered.
225255

226-
Currently type 2 APF will be always delivered on the same vcpu as
227-
type 1 was, but guest should not rely on that.
256+
Currently 'page ready' APF events will be always delivered on the
257+
same vcpu as 'page not present' event was, but guest should not rely on
258+
that.
228259

229260
MSR_KVM_STEAL_TIME:
230261
0x4b564d03
@@ -319,3 +350,29 @@ data:
319350

320351
KVM guests can request the host not to poll on HLT, for example if
321352
they are performing polling themselves.
353+
354+
MSR_KVM_ASYNC_PF_INT:
355+
0x4b564d06
356+
357+
data:
358+
Second asynchronous page fault (APF) control MSR.
359+
360+
Bits 0-7: APIC vector for delivery of 'page ready' APF events.
361+
Bits 8-63: Reserved
362+
363+
Interrupt vector for asynchnonous 'page ready' notifications delivery.
364+
The vector has to be set up before asynchronous page fault mechanism
365+
is enabled in MSR_KVM_ASYNC_PF_EN. The MSR is only available if
366+
KVM_FEATURE_ASYNC_PF_INT is present in CPUID.
367+
368+
MSR_KVM_ASYNC_PF_ACK:
369+
0x4b564d07
370+
371+
data:
372+
Asynchronous page fault (APF) acknowledgment.
373+
374+
When the guest is done processing 'page ready' APF event and 'token'
375+
field in 'struct kvm_vcpu_pv_apf_data' is cleared it is supposed to
376+
write '1' to bit 0 of the MSR, this causes the host to re-scan its queue
377+
and check if there are more notifications pending. The MSR is available
378+
if KVM_FEATURE_ASYNC_PF_INT is present in CPUID.

Documentation/virt/kvm/nested-vmx.rst

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,7 @@ struct shadow_vmcs is ever changed.
116116
natural_width cr4_guest_host_mask;
117117
natural_width cr0_read_shadow;
118118
natural_width cr4_read_shadow;
119-
natural_width cr3_target_value0;
120-
natural_width cr3_target_value1;
121-
natural_width cr3_target_value2;
122-
natural_width cr3_target_value3;
119+
natural_width dead_space[4]; /* Last remnants of cr3_target_value[0-3]. */
123120
natural_width exit_qualification;
124121
natural_width guest_linear_address;
125122
natural_width guest_cr0;

MAINTAINERS

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9368,7 +9368,6 @@ F: arch/arm64/include/asm/kvm*
93689368
F: arch/arm64/include/uapi/asm/kvm*
93699369
F: arch/arm64/kvm/
93709370
F: include/kvm/arm_*
9371-
F: virt/kvm/arm/
93729371

93739372
KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips)
93749373

arch/arm64/include/asm/kvm_asm.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,14 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
6464
extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
6565
extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
6666

67-
extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high);
67+
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
6868

6969
extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu);
7070

7171
extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu);
7272

73+
extern void __kvm_enable_ssbs(void);
74+
7375
extern u64 __vgic_v3_get_ich_vtr_el2(void);
7476
extern u64 __vgic_v3_read_vmcr(void);
7577
extern void __vgic_v3_write_vmcr(u32 vmcr);

arch/arm64/include/asm/kvm_host.h

Lines changed: 6 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@
4646
#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
4747
#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
4848

49+
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
50+
KVM_DIRTY_LOG_INITIALLY_SET)
51+
4952
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
5053

5154
extern unsigned int kvm_sve_max_vl;
@@ -112,12 +115,8 @@ struct kvm_vcpu_fault_info {
112115
u64 disr_el1; /* Deferred [SError] Status Register */
113116
};
114117

115-
/*
116-
* 0 is reserved as an invalid value.
117-
* Order should be kept in sync with the save/restore code.
118-
*/
119118
enum vcpu_sysreg {
120-
__INVALID_SYSREG__,
119+
__INVALID_SYSREG__, /* 0 is reserved as an invalid value */
121120
MPIDR_EL1, /* MultiProcessor Affinity Register */
122121
CSSELR_EL1, /* Cache Size Selection Register */
123122
SCTLR_EL1, /* System Control Register */
@@ -415,6 +414,8 @@ struct kvm_vm_stat {
415414
struct kvm_vcpu_stat {
416415
u64 halt_successful_poll;
417416
u64 halt_attempted_poll;
417+
u64 halt_poll_success_ns;
418+
u64 halt_poll_fail_ns;
418419
u64 halt_poll_invalid;
419420
u64 halt_wakeup;
420421
u64 hvc_exit_stat;
@@ -530,39 +531,6 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
530531
cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr();
531532
}
532533

533-
void __kvm_enable_ssbs(void);
534-
535-
static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
536-
unsigned long hyp_stack_ptr,
537-
unsigned long vector_ptr)
538-
{
539-
/*
540-
* Calculate the raw per-cpu offset without a translation from the
541-
* kernel's mapping to the linear mapping, and store it in tpidr_el2
542-
* so that we can use adr_l to access per-cpu variables in EL2.
543-
*/
544-
u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_data) -
545-
(u64)kvm_ksym_ref(kvm_host_data));
546-
547-
/*
548-
* Call initialization code, and switch to the full blown HYP code.
549-
* If the cpucaps haven't been finalized yet, something has gone very
550-
* wrong, and hyp will crash and burn when it uses any
551-
* cpus_have_const_cap() wrapper.
552-
*/
553-
BUG_ON(!system_capabilities_finalized());
554-
__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2);
555-
556-
/*
557-
* Disabling SSBD on a non-VHE system requires us to enable SSBS
558-
* at EL2.
559-
*/
560-
if (!has_vhe() && this_cpu_has_cap(ARM64_SSBS) &&
561-
arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
562-
kvm_call_hyp(__kvm_enable_ssbs);
563-
}
564-
}
565-
566534
static inline bool kvm_arch_requires_vhe(void)
567535
{
568536
/*
@@ -594,8 +562,6 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
594562
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
595563
struct kvm_device_attr *attr);
596564

597-
static inline void __cpu_init_stage2(void) {}
598-
599565
/* Guest/host FPSIMD coordination helpers */
600566
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
601567
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);

arch/arm64/include/asm/kvm_hyp.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,12 @@
5555

5656
int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
5757

58-
void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
59-
void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
60-
void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu);
61-
void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu);
62-
void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
63-
void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
58+
void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if);
59+
void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if);
60+
void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if);
61+
void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if);
62+
void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
63+
void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if);
6464
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
6565

6666
void __timer_enable_traps(struct kvm_vcpu *vcpu);

0 commit comments

Comments
 (0)