Skip to content

Commit cec5f26

Browse files
Kyung Min ParkKAGA-KOKO
authored andcommitted
x86/delay: Introduce TPAUSE delay
TPAUSE instructs the processor to enter an implementation-dependent optimized state. The instruction execution wakes up when the time-stamp counter reaches or exceeds the implicit EDX:EAX 64-bit input value. The instruction execution also wakes up due to the expiration of the operating system time-limit or by an external interrupt or exceptions such as a debug exception or a machine check exception. TPAUSE offers a choice of two lower power states: 1. Light-weight power/performance optimized state C0.1 2. Improved power/performance optimized state C0.2 This way, it can save power with low wake-up latency in comparison to spinloop based delay. The selection between the two is governed by the input register. TPAUSE is available on processors with X86_FEATURE_WAITPKG. Co-developed-by: Fenghua Yu <[email protected]> Signed-off-by: Fenghua Yu <[email protected]> Signed-off-by: Kyung Min Park <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Reviewed-by: Tony Luck <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 46f90c7 commit cec5f26

File tree

5 files changed

+57
-0
lines changed

5 files changed

+57
-0
lines changed

arch/x86/Kconfig.assembler

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,7 @@ config AS_SHA256_NI
1515
def_bool $(as-instr,sha256msg1 %xmm0$(comma)%xmm1)
1616
help
1717
Supported by binutils >= 2.24 and LLVM integrated assembler
18+
config AS_TPAUSE
19+
def_bool $(as-instr,tpause %ecx)
20+
help
21+
Supported by binutils >= 2.31.1 and LLVM integrated assembler >= V7

arch/x86/include/asm/delay.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <linux/init.h>
77

88
void __init use_tsc_delay(void);
9+
void __init use_tpause_delay(void);
910
void use_mwaitx_delay(void);
1011

1112
#endif /* _ASM_X86_DELAY_H */

arch/x86/include/asm/mwait.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#define MWAITX_ECX_TIMER_ENABLE BIT(1)
2323
#define MWAITX_MAX_WAIT_CYCLES UINT_MAX
2424
#define MWAITX_DISABLE_CSTATES 0xf0
25+
#define TPAUSE_C01_STATE 1
26+
#define TPAUSE_C02_STATE 0
2527

2628
u32 get_umwait_control_msr(void);
2729

@@ -122,4 +124,24 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
122124
current_clr_polling();
123125
}
124126

127+
/*
128+
* Caller can specify whether to enter C0.1 (low latency, less
129+
* power saving) or C0.2 state (saves more power, but longer wakeup
130+
* latency). This may be overridden by the IA32_UMWAIT_CONTROL MSR
131+
* which can force requests for C0.2 to be downgraded to C0.1.
132+
*/
133+
static inline void __tpause(u32 ecx, u32 edx, u32 eax)
134+
{
135+
/* "tpause %ecx, %edx, %eax;" */
136+
#ifdef CONFIG_AS_TPAUSE
137+
asm volatile("tpause %%ecx\n"
138+
:
139+
: "c"(ecx), "d"(edx), "a"(eax));
140+
#else
141+
asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n"
142+
:
143+
: "c"(ecx), "d"(edx), "a"(eax));
144+
#endif
145+
}
146+
125147
#endif /* _ASM_X86_MWAIT_H */

arch/x86/kernel/time.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,9 @@ static __init void x86_late_time_init(void)
103103
*/
104104
x86_init.irqs.intr_mode_init();
105105
tsc_init();
106+
107+
if (static_cpu_has(X86_FEATURE_WAITPKG))
108+
use_tpause_delay();
106109
}
107110

108111
/*

arch/x86/lib/delay.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,27 @@ static void delay_tsc(u64 cycles)
9696
preempt_enable();
9797
}
9898

99+
/*
100+
* On Intel the TPAUSE instruction waits until any of:
101+
* 1) the TSC counter exceeds the value provided in EDX:EAX
102+
* 2) global timeout in IA32_UMWAIT_CONTROL is exceeded
103+
* 3) an external interrupt occurs
104+
*/
105+
static void delay_halt_tpause(u64 start, u64 cycles)
106+
{
107+
u64 until = start + cycles;
108+
u32 eax, edx;
109+
110+
eax = lower_32_bits(until);
111+
edx = upper_32_bits(until);
112+
113+
/*
114+
* Hard code the deeper (C0.2) sleep state because exit latency is
115+
* small compared to the "microseconds" that usleep() will delay.
116+
*/
117+
__tpause(TPAUSE_C02_STATE, edx, eax);
118+
}
119+
99120
/*
100121
* On some AMD platforms, MWAITX has a configurable 32-bit timer, that
101122
* counts with TSC frequency. The input value is the number of TSC cycles
@@ -156,6 +177,12 @@ void __init use_tsc_delay(void)
156177
delay_fn = delay_tsc;
157178
}
158179

180+
void __init use_tpause_delay(void)
181+
{
182+
delay_halt_fn = delay_halt_tpause;
183+
delay_fn = delay_halt;
184+
}
185+
159186
void use_mwaitx_delay(void)
160187
{
161188
delay_halt_fn = delay_halt_mwaitx;

0 commit comments

Comments
 (0)