Skip to content

Commit 0aa099a

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: - Lots of bug fixes. - Fix virtualization of RDPID - Virtualization of DR6_BUS_LOCK, which on bare metal is new to this release - More nested virtualization migration fixes (nSVM and eVMCS) - Fix for KVM guest hibernation - Fix for warning in SEV-ES SRCU usage - Block KVM from loading on AMD machines with 5-level page tables, due to the APM not mentioning how host CR4.LA57 exactly impacts the guest. * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (48 commits) KVM: SVM: Move GHCB unmapping to fix RCU warning KVM: SVM: Invert user pointer casting in SEV {en,de}crypt helpers kvm: Cap halt polling at kvm->max_halt_poll_ns tools/kvm_stat: Fix documentation typo KVM: x86: Prevent deadlock against tk_core.seq KVM: x86: Cancel pvclock_gtod_work on module removal KVM: x86: Prevent KVM SVM from loading on kernels with 5-level paging KVM: X86: Expose bus lock debug exception to guest KVM: X86: Add support for the emulation of DR6_BUS_LOCK bit KVM: PPC: Book3S HV: Fix conversion to gfn-based MMU notifier callbacks KVM: x86: Hide RDTSCP and RDPID if MSR_TSC_AUX probing failed KVM: x86: Tie Intel and AMD behavior for MSR_TSC_AUX to guest CPU model KVM: x86: Move uret MSR slot management to common x86 KVM: x86: Export the number of uret MSRs to vendor modules KVM: VMX: Disable loading of TSX_CTRL MSR the more conventional way KVM: VMX: Use common x86's uret MSR list as the one true list KVM: VMX: Use flag to indicate "active" uret MSRs instead of sorting list KVM: VMX: Configure list of user return MSRs at module init KVM: x86: Add support for RDPID without RDTSCP KVM: SVM: Probe and load MSR_TSC_AUX regardless of RDTSCP support in host ...
2 parents 6efb943 + ce7ea0c commit 0aa099a

File tree

25 files changed

+542
-342
lines changed

25 files changed

+542
-342
lines changed

Documentation/virt/kvm/api.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4803,7 +4803,7 @@ KVM_PV_VM_VERIFY
48034803
4.126 KVM_X86_SET_MSR_FILTER
48044804
----------------------------
48054805

4806-
:Capability: KVM_X86_SET_MSR_FILTER
4806+
:Capability: KVM_CAP_X86_MSR_FILTER
48074807
:Architectures: x86
48084808
:Type: vm ioctl
48094809
:Parameters: struct kvm_msr_filter
@@ -6715,7 +6715,7 @@ accesses that would usually trigger a #GP by KVM into the guest will
67156715
instead get bounced to user space through the KVM_EXIT_X86_RDMSR and
67166716
KVM_EXIT_X86_WRMSR exit notifications.
67176717

6718-
8.27 KVM_X86_SET_MSR_FILTER
6718+
8.27 KVM_CAP_X86_MSR_FILTER
67196719
---------------------------
67206720

67216721
:Architectures: x86

arch/x86/include/asm/kvm_host.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@
113113
#define VALID_PAGE(x) ((x) != INVALID_PAGE)
114114

115115
#define UNMAPPED_GVA (~(gpa_t)0)
116+
#define INVALID_GPA (~(gpa_t)0)
116117

117118
/* KVM Hugepage definitions for x86 */
118119
#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G
@@ -199,6 +200,7 @@ enum x86_intercept_stage;
199200

200201
#define KVM_NR_DB_REGS 4
201202

203+
#define DR6_BUS_LOCK (1 << 11)
202204
#define DR6_BD (1 << 13)
203205
#define DR6_BS (1 << 14)
204206
#define DR6_BT (1 << 15)
@@ -212,7 +214,7 @@ enum x86_intercept_stage;
212214
* DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
213215
*/
214216
#define DR6_ACTIVE_LOW 0xffff0ff0
215-
#define DR6_VOLATILE 0x0001e00f
217+
#define DR6_VOLATILE 0x0001e80f
216218
#define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE)
217219

218220
#define DR7_BP_EN_MASK 0x000000ff
@@ -407,7 +409,7 @@ struct kvm_mmu {
407409
u32 pkru_mask;
408410

409411
u64 *pae_root;
410-
u64 *lm_root;
412+
u64 *pml4_root;
411413

412414
/*
413415
* check zero bits on shadow page table entries, these
@@ -1417,6 +1419,7 @@ struct kvm_arch_async_pf {
14171419
bool direct_map;
14181420
};
14191421

1422+
extern u32 __read_mostly kvm_nr_uret_msrs;
14201423
extern u64 __read_mostly host_efer;
14211424
extern bool __read_mostly allow_smaller_maxphyaddr;
14221425
extern struct kvm_x86_ops kvm_x86_ops;
@@ -1775,9 +1778,15 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
17751778
unsigned long ipi_bitmap_high, u32 min,
17761779
unsigned long icr, int op_64_bit);
17771780

1778-
void kvm_define_user_return_msr(unsigned index, u32 msr);
1781+
int kvm_add_user_return_msr(u32 msr);
1782+
int kvm_find_user_return_msr(u32 msr);
17791783
int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
17801784

1785+
static inline bool kvm_is_supported_user_return_msr(u32 msr)
1786+
{
1787+
return kvm_find_user_return_msr(msr) >= 0;
1788+
}
1789+
17811790
u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
17821791
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
17831792

arch/x86/include/asm/kvm_para.h

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
#include <linux/interrupt.h>
88
#include <uapi/asm/kvm_para.h>
99

10-
extern void kvmclock_init(void);
11-
1210
#ifdef CONFIG_KVM_GUEST
1311
bool kvm_check_and_clear_guest_paused(void);
1412
#else
@@ -86,13 +84,14 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
8684
}
8785

8886
#ifdef CONFIG_KVM_GUEST
87+
void kvmclock_init(void);
88+
void kvmclock_disable(void);
8989
bool kvm_para_available(void);
9090
unsigned int kvm_arch_para_features(void);
9191
unsigned int kvm_arch_para_hints(void);
9292
void kvm_async_pf_task_wait_schedule(u32 token);
9393
void kvm_async_pf_task_wake(u32 token);
9494
u32 kvm_read_and_reset_apf_flags(void);
95-
void kvm_disable_steal_time(void);
9695
bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
9796

9897
DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@@ -137,11 +136,6 @@ static inline u32 kvm_read_and_reset_apf_flags(void)
137136
return 0;
138137
}
139138

140-
static inline void kvm_disable_steal_time(void)
141-
{
142-
return;
143-
}
144-
145139
static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
146140
{
147141
return false;

arch/x86/include/uapi/asm/kvm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr {
437437
__u16 flags;
438438
} smm;
439439

440+
__u16 pad;
441+
440442
__u32 flags;
441443
__u64 preemption_timer_deadline;
442444
};

arch/x86/kernel/kvm.c

Lines changed: 83 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include <linux/kprobes.h>
2727
#include <linux/nmi.h>
2828
#include <linux/swait.h>
29+
#include <linux/syscore_ops.h>
2930
#include <asm/timer.h>
3031
#include <asm/cpu.h>
3132
#include <asm/traps.h>
@@ -37,6 +38,7 @@
3738
#include <asm/tlb.h>
3839
#include <asm/cpuidle_haltpoll.h>
3940
#include <asm/ptrace.h>
41+
#include <asm/reboot.h>
4042
#include <asm/svm.h>
4143

4244
DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@@ -345,7 +347,7 @@ static void kvm_guest_cpu_init(void)
345347

346348
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
347349
__this_cpu_write(apf_reason.enabled, 1);
348-
pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
350+
pr_info("setup async PF for cpu %d\n", smp_processor_id());
349351
}
350352

351353
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
@@ -371,34 +373,17 @@ static void kvm_pv_disable_apf(void)
371373
wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
372374
__this_cpu_write(apf_reason.enabled, 0);
373375

374-
pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id());
376+
pr_info("disable async PF for cpu %d\n", smp_processor_id());
375377
}
376378

377-
static void kvm_pv_guest_cpu_reboot(void *unused)
379+
static void kvm_disable_steal_time(void)
378380
{
379-
/*
380-
* We disable PV EOI before we load a new kernel by kexec,
381-
* since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.
382-
* New kernel can re-enable when it boots.
383-
*/
384-
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
385-
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
386-
kvm_pv_disable_apf();
387-
kvm_disable_steal_time();
388-
}
381+
if (!has_steal_clock)
382+
return;
389383

390-
static int kvm_pv_reboot_notify(struct notifier_block *nb,
391-
unsigned long code, void *unused)
392-
{
393-
if (code == SYS_RESTART)
394-
on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
395-
return NOTIFY_DONE;
384+
wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
396385
}
397386

398-
static struct notifier_block kvm_pv_reboot_nb = {
399-
.notifier_call = kvm_pv_reboot_notify,
400-
};
401-
402387
static u64 kvm_steal_clock(int cpu)
403388
{
404389
u64 steal;
@@ -416,14 +401,6 @@ static u64 kvm_steal_clock(int cpu)
416401
return steal;
417402
}
418403

419-
void kvm_disable_steal_time(void)
420-
{
421-
if (!has_steal_clock)
422-
return;
423-
424-
wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
425-
}
426-
427404
static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
428405
{
429406
early_set_memory_decrypted((unsigned long) ptr, size);
@@ -451,6 +428,27 @@ static void __init sev_map_percpu_data(void)
451428
}
452429
}
453430

431+
static void kvm_guest_cpu_offline(bool shutdown)
432+
{
433+
kvm_disable_steal_time();
434+
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
435+
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
436+
kvm_pv_disable_apf();
437+
if (!shutdown)
438+
apf_task_wake_all();
439+
kvmclock_disable();
440+
}
441+
442+
static int kvm_cpu_online(unsigned int cpu)
443+
{
444+
unsigned long flags;
445+
446+
local_irq_save(flags);
447+
kvm_guest_cpu_init();
448+
local_irq_restore(flags);
449+
return 0;
450+
}
451+
454452
#ifdef CONFIG_SMP
455453

456454
static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
@@ -635,31 +633,64 @@ static void __init kvm_smp_prepare_boot_cpu(void)
635633
kvm_spinlock_init();
636634
}
637635

638-
static void kvm_guest_cpu_offline(void)
636+
static int kvm_cpu_down_prepare(unsigned int cpu)
639637
{
640-
kvm_disable_steal_time();
641-
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
642-
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
643-
kvm_pv_disable_apf();
644-
apf_task_wake_all();
638+
unsigned long flags;
639+
640+
local_irq_save(flags);
641+
kvm_guest_cpu_offline(false);
642+
local_irq_restore(flags);
643+
return 0;
645644
}
646645

647-
static int kvm_cpu_online(unsigned int cpu)
646+
#endif
647+
648+
static int kvm_suspend(void)
648649
{
649-
local_irq_disable();
650-
kvm_guest_cpu_init();
651-
local_irq_enable();
650+
kvm_guest_cpu_offline(false);
651+
652652
return 0;
653653
}
654654

655-
static int kvm_cpu_down_prepare(unsigned int cpu)
655+
static void kvm_resume(void)
656656
{
657-
local_irq_disable();
658-
kvm_guest_cpu_offline();
659-
local_irq_enable();
660-
return 0;
657+
kvm_cpu_online(raw_smp_processor_id());
658+
}
659+
660+
static struct syscore_ops kvm_syscore_ops = {
661+
.suspend = kvm_suspend,
662+
.resume = kvm_resume,
663+
};
664+
665+
static void kvm_pv_guest_cpu_reboot(void *unused)
666+
{
667+
kvm_guest_cpu_offline(true);
668+
}
669+
670+
static int kvm_pv_reboot_notify(struct notifier_block *nb,
671+
unsigned long code, void *unused)
672+
{
673+
if (code == SYS_RESTART)
674+
on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
675+
return NOTIFY_DONE;
661676
}
662677

678+
static struct notifier_block kvm_pv_reboot_nb = {
679+
.notifier_call = kvm_pv_reboot_notify,
680+
};
681+
682+
/*
683+
* After a PV feature is registered, the host will keep writing to the
684+
* registered memory location. If the guest happens to shutdown, this memory
685+
* won't be valid. In cases like kexec, in which you install a new kernel, this
686+
* means a random memory location will be kept being written.
687+
*/
688+
#ifdef CONFIG_KEXEC_CORE
689+
static void kvm_crash_shutdown(struct pt_regs *regs)
690+
{
691+
kvm_guest_cpu_offline(true);
692+
native_machine_crash_shutdown(regs);
693+
}
663694
#endif
664695

665696
static void __init kvm_guest_init(void)
@@ -704,6 +735,12 @@ static void __init kvm_guest_init(void)
704735
kvm_guest_cpu_init();
705736
#endif
706737

738+
#ifdef CONFIG_KEXEC_CORE
739+
machine_ops.crash_shutdown = kvm_crash_shutdown;
740+
#endif
741+
742+
register_syscore_ops(&kvm_syscore_ops);
743+
707744
/*
708745
* Hard lockup detection is enabled by default. Disable it, as guests
709746
* can get false positives too easily, for example if the host is

arch/x86/kernel/kvmclock.c

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include <asm/hypervisor.h>
2121
#include <asm/mem_encrypt.h>
2222
#include <asm/x86_init.h>
23-
#include <asm/reboot.h>
2423
#include <asm/kvmclock.h>
2524

2625
static int kvmclock __initdata = 1;
@@ -203,28 +202,9 @@ static void kvm_setup_secondary_clock(void)
203202
}
204203
#endif
205204

206-
/*
207-
* After the clock is registered, the host will keep writing to the
208-
* registered memory location. If the guest happens to shutdown, this memory
209-
* won't be valid. In cases like kexec, in which you install a new kernel, this
210-
* means a random memory location will be kept being written. So before any
211-
* kind of shutdown from our side, we unregister the clock by writing anything
212-
* that does not have the 'enable' bit set in the msr
213-
*/
214-
#ifdef CONFIG_KEXEC_CORE
215-
static void kvm_crash_shutdown(struct pt_regs *regs)
216-
{
217-
native_write_msr(msr_kvm_system_time, 0, 0);
218-
kvm_disable_steal_time();
219-
native_machine_crash_shutdown(regs);
220-
}
221-
#endif
222-
223-
static void kvm_shutdown(void)
205+
void kvmclock_disable(void)
224206
{
225207
native_write_msr(msr_kvm_system_time, 0, 0);
226-
kvm_disable_steal_time();
227-
native_machine_shutdown();
228208
}
229209

230210
static void __init kvmclock_init_mem(void)
@@ -351,10 +331,6 @@ void __init kvmclock_init(void)
351331
#endif
352332
x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
353333
x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
354-
machine_ops.shutdown = kvm_shutdown;
355-
#ifdef CONFIG_KEXEC_CORE
356-
machine_ops.crash_shutdown = kvm_crash_shutdown;
357-
#endif
358334
kvm_get_preset_lpj();
359335

360336
/*

0 commit comments

Comments
 (0)