Skip to content

Commit 3ebcbd2

Browse files
theli-uasean-jc
authored andcommitted
KVM: x86: Use current rather than snapshotted TSC frequency if it is constant
Don't snapshot tsc_khz into per-cpu cpu_tsc_khz if the host TSC is constant, in which case the actual TSC frequency will never change and thus capturing TSC during initialization is unnecessary, KVM can simply use tsc_khz. This value is snapshotted from kvm_timer_init->kvmclock_cpu_online->tsc_khz_changed(NULL) On CPUs with constant TSC, but not a hardware-specified TSC frequency, snapshotting cpu_tsc_khz and using that to set a VM's target TSC frequency can lead to VM to think its TSC frequency is not what it actually is if refining the TSC completes after KVM snapshots tsc_khz. The actual frequency never changes, only the kernel's calculation of what that frequency is changes. Ideally, KVM would not be able to race with TSC refinement, or would have a hook into tsc_refine_calibration_work() to get an alert when refinement is complete. Avoiding the race altogether isn't practical as refinement takes a relative eternity; it's deliberately put on a work queue outside of the normal boot sequence to avoid unnecessarily delaying boot. Adding a hook is doable, but somewhat gross due to KVM's ability to be built as a module. And if the TSC is constant, which is likely the case for every VMX/SVM-capable CPU produced in the last decade, the race can be hit if and only if userspace is able to create a VM before TSC refinement completes; refinement is slow, but not that slow. For now, punt on a proper fix, as not taking a snapshot can help some uses cases and not taking a snapshot is arguably correct irrespective of the race with refinement. Signed-off-by: Anton Romanov <[email protected]> Reviewed-by: Sean Christopherson <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Sean Christopherson <[email protected]>
1 parent b80732f commit 3ebcbd2

File tree

1 file changed

+33
-11
lines changed

1 file changed

+33
-11
lines changed

arch/x86/kvm/x86.c

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2974,6 +2974,22 @@ static void kvm_update_masterclock(struct kvm *kvm)
29742974
kvm_end_pvclock_update(kvm);
29752975
}
29762976

2977+
/*
2978+
* Use the kernel's tsc_khz directly if the TSC is constant, otherwise use KVM's
2979+
* per-CPU value (which may be zero if a CPU is going offline). Note, tsc_khz
2980+
* can change during boot even if the TSC is constant, as it's possible for KVM
2981+
* to be loaded before TSC calibration completes. Ideally, KVM would get a
2982+
* notification when calibration completes, but practically speaking calibration
2983+
* will complete before userspace is alive enough to create VMs.
2984+
*/
2985+
static unsigned long get_cpu_tsc_khz(void)
2986+
{
2987+
if (static_cpu_has(X86_FEATURE_CONSTANT_TSC))
2988+
return tsc_khz;
2989+
else
2990+
return __this_cpu_read(cpu_tsc_khz);
2991+
}
2992+
29772993
/* Called within read_seqcount_begin/retry for kvm->pvclock_sc. */
29782994
static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
29792995
{
@@ -2984,7 +3000,8 @@ static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
29843000
get_cpu();
29853001

29863002
data->flags = 0;
2987-
if (ka->use_master_clock && __this_cpu_read(cpu_tsc_khz)) {
3003+
if (ka->use_master_clock &&
3004+
(static_cpu_has(X86_FEATURE_CONSTANT_TSC) || __this_cpu_read(cpu_tsc_khz))) {
29883005
#ifdef CONFIG_X86_64
29893006
struct timespec64 ts;
29903007

@@ -2998,7 +3015,7 @@ static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
29983015
data->flags |= KVM_CLOCK_TSC_STABLE;
29993016
hv_clock.tsc_timestamp = ka->master_cycle_now;
30003017
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
3001-
kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
3018+
kvm_get_time_scale(NSEC_PER_SEC, get_cpu_tsc_khz() * 1000LL,
30023019
&hv_clock.tsc_shift,
30033020
&hv_clock.tsc_to_system_mul);
30043021
data->clock = __pvclock_read_cycles(&hv_clock, data->host_tsc);
@@ -3108,7 +3125,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
31083125

31093126
/* Keep irq disabled to prevent changes to the clock */
31103127
local_irq_save(flags);
3111-
tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
3128+
tgt_tsc_khz = get_cpu_tsc_khz();
31123129
if (unlikely(tgt_tsc_khz == 0)) {
31133130
local_irq_restore(flags);
31143131
kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
@@ -9038,9 +9055,11 @@ static void tsc_khz_changed(void *data)
90389055
struct cpufreq_freqs *freq = data;
90399056
unsigned long khz = 0;
90409057

9058+
WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_CONSTANT_TSC));
9059+
90419060
if (data)
90429061
khz = freq->new;
9043-
else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
9062+
else
90449063
khz = cpufreq_quick_get(raw_smp_processor_id());
90459064
if (!khz)
90469065
khz = tsc_khz;
@@ -9061,8 +9080,10 @@ static void kvm_hyperv_tsc_notifier(void)
90619080
hyperv_stop_tsc_emulation();
90629081

90639082
/* TSC frequency always matches when on Hyper-V */
9064-
for_each_present_cpu(cpu)
9065-
per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
9083+
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
9084+
for_each_present_cpu(cpu)
9085+
per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
9086+
}
90669087
kvm_caps.max_guest_tsc_khz = tsc_khz;
90679088

90689089
list_for_each_entry(kvm, &vm_list, vm_list) {
@@ -9199,10 +9220,10 @@ static void kvm_timer_init(void)
91999220
}
92009221
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
92019222
CPUFREQ_TRANSITION_NOTIFIER);
9202-
}
92039223

9204-
cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
9205-
kvmclock_cpu_online, kvmclock_cpu_down_prep);
9224+
cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
9225+
kvmclock_cpu_online, kvmclock_cpu_down_prep);
9226+
}
92069227
}
92079228

92089229
#ifdef CONFIG_X86_64
@@ -9362,10 +9383,11 @@ void kvm_arch_exit(void)
93629383
#endif
93639384
kvm_lapic_exit();
93649385

9365-
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
9386+
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
93669387
cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
93679388
CPUFREQ_TRANSITION_NOTIFIER);
9368-
cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
9389+
cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
9390+
}
93699391
#ifdef CONFIG_X86_64
93709392
pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
93719393
irq_work_sync(&pvclock_irq_work);

0 commit comments

Comments
 (0)