Skip to content

Commit d26e46f

Browse files
ubizjakIngo Molnar
authored andcommitted
locking/atomic/x86: Introduce arch_try_cmpxchg64_local()
Introduce arch_try_cmpxchg64_local() for 64-bit and 32-bit targets to improve code using cmpxchg64_local(). On 64-bit targets, the generated assembly improves from: 3e28: 31 c0 xor %eax,%eax 3e2a: 4d 0f b1 7d 00 cmpxchg %r15,0x0(%r13) 3e2f: 48 85 c0 test %rax,%rax 3e32: 0f 85 9f 00 00 00 jne 3ed7 <...> to: 3e28: 31 c0 xor %eax,%eax 3e2a: 4d 0f b1 7d 00 cmpxchg %r15,0x0(%r13) 3e2f: 0f 85 9f 00 00 00 jne 3ed4 <...> where a TEST instruction after CMPXCHG is saved. The improvements for 32-bit targets are even more noticeable, because double-word compare after CMPXCHG8B gets eliminated. Signed-off-by: Uros Bizjak <[email protected]> Signed-off-by: Ingo Molnar <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Waiman Long <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 9109566 commit d26e46f

File tree

2 files changed

+40
-0
lines changed

2 files changed

+40
-0
lines changed

arch/x86/include/asm/cmpxchg_32.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@ static __always_inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 ne
6464
return __arch_try_cmpxchg64(ptr, oldp, new, LOCK_PREFIX);
6565
}
6666

67+
static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new)
68+
{
69+
return __arch_try_cmpxchg64(ptr, oldp, new,);
70+
}
71+
6772
#ifdef CONFIG_X86_CMPXCHG64
6873

6974
#define arch_cmpxchg64 __cmpxchg64
@@ -72,6 +77,8 @@ static __always_inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 ne
7277

7378
#define arch_try_cmpxchg64 __try_cmpxchg64
7479

80+
#define arch_try_cmpxchg64_local __try_cmpxchg64_local
81+
7582
#else
7683

7784
/*
@@ -150,6 +157,33 @@ static __always_inline bool arch_try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64
150157
}
151158
#define arch_try_cmpxchg64 arch_try_cmpxchg64
152159

160+
#define __arch_try_cmpxchg64_emu_local(_ptr, _oldp, _new) \
161+
({ \
162+
union __u64_halves o = { .full = *(_oldp), }, \
163+
n = { .full = (_new), }; \
164+
bool ret; \
165+
\
166+
asm volatile(ALTERNATIVE("call cmpxchg8b_emu", \
167+
"cmpxchg8b %[ptr]", X86_FEATURE_CX8) \
168+
CC_SET(e) \
169+
: CC_OUT(e) (ret), \
170+
[ptr] "+m" (*(_ptr)), \
171+
"+a" (o.low), "+d" (o.high) \
172+
: "b" (n.low), "c" (n.high), "S" (_ptr) \
173+
: "memory"); \
174+
\
175+
if (unlikely(!ret)) \
176+
*(_oldp) = o.full; \
177+
\
178+
likely(ret); \
179+
})
180+
181+
static __always_inline bool arch_try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new)
182+
{
183+
return __arch_try_cmpxchg64_emu_local(ptr, oldp, new);
184+
}
185+
#define arch_try_cmpxchg64_local arch_try_cmpxchg64_local
186+
153187
#endif
154188

155189
#define system_has_cmpxchg64() boot_cpu_has(X86_FEATURE_CX8)

arch/x86/include/asm/cmpxchg_64.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@
2020
arch_try_cmpxchg((ptr), (po), (n)); \
2121
})
2222

23+
#define arch_try_cmpxchg64_local(ptr, po, n) \
24+
({ \
25+
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
26+
arch_try_cmpxchg_local((ptr), (po), (n)); \
27+
})
28+
2329
union __u128_halves {
2430
u128 full;
2531
struct {

0 commit comments

Comments
 (0)