Skip to content

Commit fc1dc0d

Browse files
committed
Merge tag 'x86-timers-2024-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 timer updates from Thomas Gleixner: - Use the topology information of number of packages for making the decision about TSC trust instead of using the number of online nodes which is not reflecting the real topology. - Stop the PIT timer 0 when its not in use as to stop pointless emulation in the VMM. - Fix the PIT timer stop sequence for timer 0 so it truly stops both real hardware and buggy VMM emulations. * tag 'x86-timers-2024-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/tsc: Check for sockets instead of CPUs to make code match comment clockevents/drivers/i8253: Fix stop sequence for timer 0 x86/i8253: Disable PIT timer 0 when not in use x86/tsc: Use topology_max_packages() to get package number
2 parents b507535 + e7ff4eb commit fc1dc0d

File tree

5 files changed

+48
-35
lines changed

5 files changed

+48
-35
lines changed

arch/x86/kernel/cpu/mshyperv.c

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include <linux/interrupt.h>
1717
#include <linux/irq.h>
1818
#include <linux/kexec.h>
19-
#include <linux/i8253.h>
2019
#include <linux/random.h>
2120
#include <asm/processor.h>
2221
#include <asm/hypervisor.h>
@@ -537,16 +536,6 @@ static void __init ms_hyperv_init_platform(void)
537536
if (efi_enabled(EFI_BOOT))
538537
x86_platform.get_nmi_reason = hv_get_nmi_reason;
539538

540-
/*
541-
* Hyper-V VMs have a PIT emulation quirk such that zeroing the
542-
* counter register during PIT shutdown restarts the PIT. So it
543-
* continues to interrupt @18.2 HZ. Setting i8253_clear_counter
544-
* to false tells pit_shutdown() not to zero the counter so that
545-
* the PIT really is shutdown. Generation 2 VMs don't have a PIT,
546-
* and setting this value has no effect.
547-
*/
548-
i8253_clear_counter_on_shutdown = false;
549-
550539
#if IS_ENABLED(CONFIG_HYPERV)
551540
if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) ||
552541
ms_hyperv.paravisor_present)

arch/x86/kernel/i8253.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <linux/timex.h>
99
#include <linux/i8253.h>
1010

11+
#include <asm/hypervisor.h>
1112
#include <asm/apic.h>
1213
#include <asm/hpet.h>
1314
#include <asm/time.h>
@@ -39,9 +40,15 @@ static bool __init use_pit(void)
3940

4041
bool __init pit_timer_init(void)
4142
{
42-
if (!use_pit())
43+
if (!use_pit()) {
44+
/*
45+
* Don't just ignore the PIT. Ensure it's stopped, because
46+
* VMMs otherwise steal CPU time just to pointlessly waggle
47+
* the (masked) IRQ.
48+
*/
49+
clockevent_i8253_disable();
4350
return false;
44-
51+
}
4552
clockevent_i8253_init(true);
4653
global_clock_event = &i8253_clockevent;
4754
return true;

arch/x86/kernel/tsc.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <asm/apic.h>
2929
#include <asm/cpu_device_id.h>
3030
#include <asm/i8259.h>
31+
#include <asm/topology.h>
3132
#include <asm/uv/uv.h>
3233

3334
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
@@ -1253,15 +1254,12 @@ static void __init check_system_tsc_reliable(void)
12531254
* - TSC which does not stop in C-States
12541255
* - the TSC_ADJUST register which allows to detect even minimal
12551256
* modifications
1256-
* - not more than two sockets. As the number of sockets cannot be
1257-
* evaluated at the early boot stage where this has to be
1258-
* invoked, check the number of online memory nodes as a
1259-
* fallback solution which is an reasonable estimate.
1257+
* - not more than four packages
12601258
*/
12611259
if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
12621260
boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
12631261
boot_cpu_has(X86_FEATURE_TSC_ADJUST) &&
1264-
nr_online_nodes <= 4)
1262+
topology_max_packages() <= 4)
12651263
tsc_disable_clocksource_watchdog();
12661264
}
12671265

@@ -1290,7 +1288,7 @@ int unsynchronized_tsc(void)
12901288
*/
12911289
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
12921290
/* assume multi socket systems are not synchronized: */
1293-
if (num_possible_cpus() > 1)
1291+
if (topology_max_packages() > 1)
12941292
return 1;
12951293
}
12961294

drivers/clocksource/i8253.c

Lines changed: 34 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,6 @@
2020
DEFINE_RAW_SPINLOCK(i8253_lock);
2121
EXPORT_SYMBOL(i8253_lock);
2222

23-
/*
24-
* Handle PIT quirk in pit_shutdown() where zeroing the counter register
25-
* restarts the PIT, negating the shutdown. On platforms with the quirk,
26-
* platform specific code can set this to false.
27-
*/
28-
bool i8253_clear_counter_on_shutdown __ro_after_init = true;
29-
3023
#ifdef CONFIG_CLKSRC_I8253
3124
/*
3225
* Since the PIT overflows every tick, its not very useful
@@ -108,21 +101,47 @@ int __init clocksource_i8253_init(void)
108101
#endif
109102

110103
#ifdef CONFIG_CLKEVT_I8253
111-
static int pit_shutdown(struct clock_event_device *evt)
104+
void clockevent_i8253_disable(void)
112105
{
113-
if (!clockevent_state_oneshot(evt) && !clockevent_state_periodic(evt))
114-
return 0;
115-
116106
raw_spin_lock(&i8253_lock);
117107

108+
/*
109+
* Writing the MODE register should stop the counter, according to
110+
* the datasheet. This appears to work on real hardware (well, on
111+
* modern Intel and AMD boxes; I didn't dig the Pegasos out of the
112+
* shed).
113+
*
114+
* However, some virtual implementations differ, and the MODE change
115+
* doesn't have any effect until either the counter is written (KVM
116+
* in-kernel PIT) or the next interrupt (QEMU). And in those cases,
117+
* it may not stop the *count*, only the interrupts. Although in
118+
* the virt case, that probably doesn't matter, as the value of the
119+
* counter will only be calculated on demand if the guest reads it;
120+
* it's the interrupts which cause steal time.
121+
*
122+
* Hyper-V apparently has a bug where even in mode 0, the IRQ keeps
123+
* firing repeatedly if the counter is running. But it *does* do the
124+
* right thing when the MODE register is written.
125+
*
126+
* So: write the MODE and then load the counter, which ensures that
127+
* the IRQ is stopped on those buggy virt implementations. And then
128+
* write the MODE again, which is the right way to stop it.
129+
*/
118130
outb_p(0x30, PIT_MODE);
131+
outb_p(0, PIT_CH0);
132+
outb_p(0, PIT_CH0);
119133

120-
if (i8253_clear_counter_on_shutdown) {
121-
outb_p(0, PIT_CH0);
122-
outb_p(0, PIT_CH0);
123-
}
134+
outb_p(0x30, PIT_MODE);
124135

125136
raw_spin_unlock(&i8253_lock);
137+
}
138+
139+
static int pit_shutdown(struct clock_event_device *evt)
140+
{
141+
if (!clockevent_state_oneshot(evt) && !clockevent_state_periodic(evt))
142+
return 0;
143+
144+
clockevent_i8253_disable();
126145
return 0;
127146
}
128147

include/linux/i8253.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
#define PIT_LATCH ((PIT_TICK_RATE + HZ/2) / HZ)
2222

2323
extern raw_spinlock_t i8253_lock;
24-
extern bool i8253_clear_counter_on_shutdown;
2524
extern struct clock_event_device i8253_clockevent;
2625
extern void clockevent_i8253_init(bool oneshot);
26+
extern void clockevent_i8253_disable(void);
2727

2828
extern void setup_pit_timer(void);
2929

0 commit comments

Comments
 (0)