Skip to content

Commit ecd8ee7

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "Fixes for kvm on x86: - new selftests - fixes for migration with HyperV re-enlightenment enabled - fix RCU/SRCU usage - fixes for local_irq_restore misuse false positive" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: documentation/kvm: additional explanations on KVM_SET_BOOT_CPU_ID x86/kvm: Fix broken irq restoration in kvm_wait KVM: X86: Fix missing local pCPU when executing wbinvd on all dirty pCPUs KVM: x86: Protect userspace MSR filter with SRCU, and set atomically-ish selftests: kvm: add set_boot_cpu_id test selftests: kvm: add _vm_ioctl selftests: kvm: add get_msr_index_features selftests: kvm: Add basic Hyper-V clocksources tests KVM: x86: hyper-v: Don't touch TSC page values when guest opted for re-enlightenment KVM: x86: hyper-v: Track Hyper-V TSC page status KVM: x86: hyper-v: Prevent using not-yet-updated TSC page by secondary CPUs KVM: x86: hyper-v: Limit guest to writing zero to HV_X64_MSR_TSC_EMULATION_STATUS KVM: x86/mmu: Store the address space ID in the TDP iterator KVM: x86/mmu: Factor out tdp_iter_return_to_root KVM: x86/mmu: Fix RCU usage when atomically zapping SPTEs KVM: x86/mmu: Fix RCU usage in handle_removed_tdp_mmu_page
2 parents 3149860 + 9ce3746 commit ecd8ee7

File tree

18 files changed

+807
-120
lines changed

18 files changed

+807
-120
lines changed

Documentation/virt/kvm/api.rst

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1495,7 +1495,8 @@ Fails if any VCPU has already been created.
14951495

14961496
Define which vcpu is the Bootstrap Processor (BSP). Values are the same
14971497
as the vcpu id in KVM_CREATE_VCPU. If this ioctl is not called, the default
1498-
is vcpu 0.
1498+
is vcpu 0. This ioctl has to be called before vcpu creation,
1499+
otherwise it will return EBUSY error.
14991500

15001501

15011502
4.42 KVM_GET_XSAVE
@@ -4806,8 +4807,10 @@ If an MSR access is not permitted through the filtering, it generates a
48064807
allows user space to deflect and potentially handle various MSR accesses
48074808
into user space.
48084809

4809-
If a vCPU is in running state while this ioctl is invoked, the vCPU may
4810-
experience inconsistent filtering behavior on MSR accesses.
4810+
Note, invoking this ioctl with a vCPU is running is inherently racy. However,
4811+
KVM does guarantee that vCPUs will see either the previous filter or the new
4812+
filter, e.g. MSRs with identical settings in both the old and new filter will
4813+
have deterministic behavior.
48114814

48124815
4.127 KVM_XEN_HVM_SET_ATTR
48134816
--------------------------

arch/x86/include/asm/kvm_host.h

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -884,12 +884,29 @@ struct kvm_hv_syndbg {
884884
u64 options;
885885
};
886886

887+
/* Current state of Hyper-V TSC page clocksource */
888+
enum hv_tsc_page_status {
889+
/* TSC page was not set up or disabled */
890+
HV_TSC_PAGE_UNSET = 0,
891+
/* TSC page MSR was written by the guest, update pending */
892+
HV_TSC_PAGE_GUEST_CHANGED,
893+
/* TSC page MSR was written by KVM userspace, update pending */
894+
HV_TSC_PAGE_HOST_CHANGED,
895+
/* TSC page was properly set up and is currently active */
896+
HV_TSC_PAGE_SET,
897+
/* TSC page is currently being updated and therefore is inactive */
898+
HV_TSC_PAGE_UPDATING,
899+
/* TSC page was set up with an inaccessible GPA */
900+
HV_TSC_PAGE_BROKEN,
901+
};
902+
887903
/* Hyper-V emulation context */
888904
struct kvm_hv {
889905
struct mutex hv_lock;
890906
u64 hv_guest_os_id;
891907
u64 hv_hypercall;
892908
u64 hv_tsc_page;
909+
enum hv_tsc_page_status hv_tsc_page_status;
893910

894911
/* Hyper-v based guest crash (NT kernel bugcheck) parameters */
895912
u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
@@ -931,6 +948,12 @@ enum kvm_irqchip_mode {
931948
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
932949
};
933950

951+
struct kvm_x86_msr_filter {
952+
u8 count;
953+
bool default_allow:1;
954+
struct msr_bitmap_range ranges[16];
955+
};
956+
934957
#define APICV_INHIBIT_REASON_DISABLE 0
935958
#define APICV_INHIBIT_REASON_HYPERV 1
936959
#define APICV_INHIBIT_REASON_NESTED 2
@@ -1025,16 +1048,11 @@ struct kvm_arch {
10251048
bool guest_can_read_msr_platform_info;
10261049
bool exception_payload_enabled;
10271050

1051+
bool bus_lock_detection_enabled;
1052+
10281053
/* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
10291054
u32 user_space_msr_mask;
1030-
1031-
struct {
1032-
u8 count;
1033-
bool default_allow:1;
1034-
struct msr_bitmap_range ranges[16];
1035-
} msr_filter;
1036-
1037-
bool bus_lock_detection_enabled;
1055+
struct kvm_x86_msr_filter __rcu *msr_filter;
10381056

10391057
struct kvm_pmu_event_filter __rcu *pmu_event_filter;
10401058
struct task_struct *nx_lpage_recovery_thread;

arch/x86/kernel/kvm.c

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -836,28 +836,25 @@ static void kvm_kick_cpu(int cpu)
836836

837837
static void kvm_wait(u8 *ptr, u8 val)
838838
{
839-
unsigned long flags;
840-
841839
if (in_nmi())
842840
return;
843841

844-
local_irq_save(flags);
845-
846-
if (READ_ONCE(*ptr) != val)
847-
goto out;
848-
849842
/*
850843
* halt until it's our turn and kicked. Note that we do safe halt
851844
* for irq enabled case to avoid hang when lock info is overwritten
852845
* in irq spinlock slowpath and no spurious interrupt occur to save us.
853846
*/
854-
if (arch_irqs_disabled_flags(flags))
855-
halt();
856-
else
857-
safe_halt();
847+
if (irqs_disabled()) {
848+
if (READ_ONCE(*ptr) == val)
849+
halt();
850+
} else {
851+
local_irq_disable();
858852

859-
out:
860-
local_irq_restore(flags);
853+
if (READ_ONCE(*ptr) == val)
854+
safe_halt();
855+
856+
local_irq_enable();
857+
}
861858
}
862859

863860
#ifdef CONFIG_X86_32

arch/x86/kvm/hyperv.c

Lines changed: 81 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -520,10 +520,10 @@ static u64 get_time_ref_counter(struct kvm *kvm)
520520
u64 tsc;
521521

522522
/*
523-
* The guest has not set up the TSC page or the clock isn't
524-
* stable, fall back to get_kvmclock_ns.
523+
* Fall back to get_kvmclock_ns() when TSC page hasn't been set up,
524+
* is broken, disabled or being updated.
525525
*/
526-
if (!hv->tsc_ref.tsc_sequence)
526+
if (hv->hv_tsc_page_status != HV_TSC_PAGE_SET)
527527
return div_u64(get_kvmclock_ns(kvm), 100);
528528

529529
vcpu = kvm_get_vcpu(kvm, 0);
@@ -1077,6 +1077,21 @@ static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
10771077
return true;
10781078
}
10791079

1080+
/*
1081+
* Don't touch TSC page values if the guest has opted for TSC emulation after
1082+
* migration. KVM doesn't fully support reenlightenment notifications and TSC
1083+
* access emulation and Hyper-V is known to expect the values in TSC page to
1084+
* stay constant before TSC access emulation is disabled from guest side
1085+
* (HV_X64_MSR_TSC_EMULATION_STATUS). KVM userspace is expected to preserve TSC
1086+
* frequency and guest visible TSC value across migration (and prevent it when
1087+
* TSC scaling is unsupported).
1088+
*/
1089+
static inline bool tsc_page_update_unsafe(struct kvm_hv *hv)
1090+
{
1091+
return (hv->hv_tsc_page_status != HV_TSC_PAGE_GUEST_CHANGED) &&
1092+
hv->hv_tsc_emulation_control;
1093+
}
1094+
10801095
void kvm_hv_setup_tsc_page(struct kvm *kvm,
10811096
struct pvclock_vcpu_time_info *hv_clock)
10821097
{
@@ -1087,7 +1102,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
10871102
BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
10881103
BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0);
10891104

1090-
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
1105+
if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
1106+
hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET)
10911107
return;
10921108

10931109
mutex_lock(&hv->hv_lock);
@@ -1101,7 +1117,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
11011117
*/
11021118
if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
11031119
&tsc_seq, sizeof(tsc_seq))))
1120+
goto out_err;
1121+
1122+
if (tsc_seq && tsc_page_update_unsafe(hv)) {
1123+
if (kvm_read_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
1124+
goto out_err;
1125+
1126+
hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
11041127
goto out_unlock;
1128+
}
11051129

11061130
/*
11071131
* While we're computing and writing the parameters, force the
@@ -1110,15 +1134,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
11101134
hv->tsc_ref.tsc_sequence = 0;
11111135
if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
11121136
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
1113-
goto out_unlock;
1137+
goto out_err;
11141138

11151139
if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
1116-
goto out_unlock;
1140+
goto out_err;
11171141

11181142
/* Ensure sequence is zero before writing the rest of the struct. */
11191143
smp_wmb();
11201144
if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
1121-
goto out_unlock;
1145+
goto out_err;
11221146

11231147
/*
11241148
* Now switch to the TSC page mechanism by writing the sequence.
@@ -1131,8 +1155,45 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
11311155
smp_wmb();
11321156

11331157
hv->tsc_ref.tsc_sequence = tsc_seq;
1134-
kvm_write_guest(kvm, gfn_to_gpa(gfn),
1135-
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence));
1158+
if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
1159+
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
1160+
goto out_err;
1161+
1162+
hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
1163+
goto out_unlock;
1164+
1165+
out_err:
1166+
hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
1167+
out_unlock:
1168+
mutex_unlock(&hv->hv_lock);
1169+
}
1170+
1171+
void kvm_hv_invalidate_tsc_page(struct kvm *kvm)
1172+
{
1173+
struct kvm_hv *hv = to_kvm_hv(kvm);
1174+
u64 gfn;
1175+
1176+
if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
1177+
hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET ||
1178+
tsc_page_update_unsafe(hv))
1179+
return;
1180+
1181+
mutex_lock(&hv->hv_lock);
1182+
1183+
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
1184+
goto out_unlock;
1185+
1186+
/* Preserve HV_TSC_PAGE_GUEST_CHANGED/HV_TSC_PAGE_HOST_CHANGED states */
1187+
if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET)
1188+
hv->hv_tsc_page_status = HV_TSC_PAGE_UPDATING;
1189+
1190+
gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
1191+
1192+
hv->tsc_ref.tsc_sequence = 0;
1193+
if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
1194+
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
1195+
hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
1196+
11361197
out_unlock:
11371198
mutex_unlock(&hv->hv_lock);
11381199
}
@@ -1193,8 +1254,15 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
11931254
}
11941255
case HV_X64_MSR_REFERENCE_TSC:
11951256
hv->hv_tsc_page = data;
1196-
if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
1257+
if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
1258+
if (!host)
1259+
hv->hv_tsc_page_status = HV_TSC_PAGE_GUEST_CHANGED;
1260+
else
1261+
hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED;
11971262
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
1263+
} else {
1264+
hv->hv_tsc_page_status = HV_TSC_PAGE_UNSET;
1265+
}
11981266
break;
11991267
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
12001268
return kvm_hv_msr_set_crash_data(kvm,
@@ -1229,6 +1297,9 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
12291297
hv->hv_tsc_emulation_control = data;
12301298
break;
12311299
case HV_X64_MSR_TSC_EMULATION_STATUS:
1300+
if (data && !host)
1301+
return 1;
1302+
12321303
hv->hv_tsc_emulation_status = data;
12331304
break;
12341305
case HV_X64_MSR_TIME_REF_COUNT:

arch/x86/kvm/hyperv.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
133133

134134
void kvm_hv_setup_tsc_page(struct kvm *kvm,
135135
struct pvclock_vcpu_time_info *hv_clock);
136+
void kvm_hv_invalidate_tsc_page(struct kvm *kvm);
136137

137138
void kvm_hv_init_vm(struct kvm *kvm);
138139
void kvm_hv_destroy_vm(struct kvm *kvm);

arch/x86/kvm/mmu/mmu_internal.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,11 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
7878
return to_shadow_page(__pa(sptep));
7979
}
8080

81+
static inline int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
82+
{
83+
return sp->role.smm ? 1 : 0;
84+
}
85+
8186
static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
8287
{
8388
/*

arch/x86/kvm/mmu/tdp_iter.c

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,21 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level)
2020
return gfn & -KVM_PAGES_PER_HPAGE(level);
2121
}
2222

23+
/*
24+
* Return the TDP iterator to the root PT and allow it to continue its
25+
* traversal over the paging structure from there.
26+
*/
27+
void tdp_iter_restart(struct tdp_iter *iter)
28+
{
29+
iter->yielded_gfn = iter->next_last_level_gfn;
30+
iter->level = iter->root_level;
31+
32+
iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
33+
tdp_iter_refresh_sptep(iter);
34+
35+
iter->valid = true;
36+
}
37+
2338
/*
2439
* Sets a TDP iterator to walk a pre-order traversal of the paging structure
2540
* rooted at root_pt, starting with the walk to translate next_last_level_gfn.
@@ -31,16 +46,12 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
3146
WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
3247

3348
iter->next_last_level_gfn = next_last_level_gfn;
34-
iter->yielded_gfn = iter->next_last_level_gfn;
3549
iter->root_level = root_level;
3650
iter->min_level = min_level;
37-
iter->level = root_level;
38-
iter->pt_path[iter->level - 1] = (tdp_ptep_t)root_pt;
39-
40-
iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
41-
tdp_iter_refresh_sptep(iter);
51+
iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root_pt;
52+
iter->as_id = kvm_mmu_page_as_id(sptep_to_sp(root_pt));
4253

43-
iter->valid = true;
54+
tdp_iter_restart(iter);
4455
}
4556

4657
/*
@@ -159,8 +170,3 @@ void tdp_iter_next(struct tdp_iter *iter)
159170
iter->valid = false;
160171
}
161172

162-
tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter)
163-
{
164-
return iter->pt_path[iter->root_level - 1];
165-
}
166-

arch/x86/kvm/mmu/tdp_iter.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ struct tdp_iter {
3636
int min_level;
3737
/* The iterator's current level within the paging structure */
3838
int level;
39+
/* The address space ID, i.e. SMM vs. regular. */
40+
int as_id;
3941
/* A snapshot of the value at sptep */
4042
u64 old_spte;
4143
/*
@@ -62,6 +64,6 @@ tdp_ptep_t spte_to_child_pt(u64 pte, int level);
6264
void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
6365
int min_level, gfn_t next_last_level_gfn);
6466
void tdp_iter_next(struct tdp_iter *iter);
65-
tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter);
67+
void tdp_iter_restart(struct tdp_iter *iter);
6668

6769
#endif /* __KVM_X86_MMU_TDP_ITER_H */

0 commit comments

Comments
 (0)