Skip to content

Commit 186f69b

Browse files
committed
csky: atomic: Optimize cmpxchg with acquire & release
Optimize cmpxchg with ASM acquire/release fence ASM instructions instead of previous generic based. Prevent a fence when cmxchg's first load != old. Comments by Rutland: 8e86f0b ("arm64: atomics: fix use of acquire + release for full barrier semantics") Comments by Boqun: FWIW, you probably need to make sure that a barrier instruction inside an lr/sc loop is a good thing. IIUC, the execution time of a barrier instruction is determined by the status of store buffers and invalidate queues (and probably other stuffs), so it may increase the execution time of the lr/sc loop, and make it unlikely to succeed. But this really depends on how the arch executes these instructions. Link: https://lore.kernel.org/linux-riscv/CAJF2gTSAxpAi=LbAdu7jntZRUa=-dJwL0VfmDfBV5MHB=rcZ-w@mail.gmail.com/T/#m27a0f1342995deae49ce1d0e1f2683f8a181d6c3 Signed-off-by: Guo Ren <[email protected]> Signed-off-by: Guo Ren <[email protected]> Cc: Mark Rutland <[email protected]>
1 parent 8318f7c commit 186f69b

File tree

2 files changed

+67
-8
lines changed

2 files changed

+67
-8
lines changed

arch/csky/include/asm/barrier.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,21 @@
3737
* bar.brar
3838
* bar.bwaw
3939
*/
40+
#define FULL_FENCE ".long 0x842fc000\n"
41+
#define ACQUIRE_FENCE ".long 0x8427c000\n"
42+
#define RELEASE_FENCE ".long 0x842ec000\n"
43+
4044
#define __bar_brw() asm volatile (".long 0x842cc000\n":::"memory")
4145
#define __bar_br() asm volatile (".long 0x8424c000\n":::"memory")
4246
#define __bar_bw() asm volatile (".long 0x8428c000\n":::"memory")
4347
#define __bar_arw() asm volatile (".long 0x8423c000\n":::"memory")
4448
#define __bar_ar() asm volatile (".long 0x8421c000\n":::"memory")
4549
#define __bar_aw() asm volatile (".long 0x8422c000\n":::"memory")
46-
#define __bar_brwarw() asm volatile (".long 0x842fc000\n":::"memory")
47-
#define __bar_brarw() asm volatile (".long 0x8427c000\n":::"memory")
50+
#define __bar_brwarw() asm volatile (FULL_FENCE:::"memory")
51+
#define __bar_brarw() asm volatile (ACQUIRE_FENCE:::"memory")
4852
#define __bar_bwarw() asm volatile (".long 0x842bc000\n":::"memory")
4953
#define __bar_brwar() asm volatile (".long 0x842dc000\n":::"memory")
50-
#define __bar_brwaw() asm volatile (".long 0x842ec000\n":::"memory")
54+
#define __bar_brwaw() asm volatile (RELEASE_FENCE:::"memory")
5155
#define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory")
5256
#define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory")
5357
#define __bar_bwaw() asm volatile (".long 0x842ac000\n":::"memory")
@@ -56,7 +60,6 @@
5660
#define __smp_rmb() __bar_brar()
5761
#define __smp_wmb() __bar_bwaw()
5862

59-
#define ACQUIRE_FENCE ".long 0x8427c000\n"
6063
#define __smp_acquire_fence() __bar_brarw()
6164
#define __smp_release_fence() __bar_brwaw()
6265

arch/csky/include/asm/cmpxchg.h

Lines changed: 60 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,71 @@ extern void __bad_xchg(void);
6464
#define arch_cmpxchg_relaxed(ptr, o, n) \
6565
(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
6666

67-
#define arch_cmpxchg(ptr, o, n) \
67+
#define __cmpxchg_acquire(ptr, old, new, size) \
6868
({ \
69+
__typeof__(ptr) __ptr = (ptr); \
70+
__typeof__(new) __new = (new); \
71+
__typeof__(new) __tmp; \
72+
__typeof__(old) __old = (old); \
73+
__typeof__(*(ptr)) __ret; \
74+
switch (size) { \
75+
case 4: \
76+
asm volatile ( \
77+
"1: ldex.w %0, (%3) \n" \
78+
" cmpne %0, %4 \n" \
79+
" bt 2f \n" \
80+
" mov %1, %2 \n" \
81+
" stex.w %1, (%3) \n" \
82+
" bez %1, 1b \n" \
83+
ACQUIRE_FENCE \
84+
"2: \n" \
85+
: "=&r" (__ret), "=&r" (__tmp) \
86+
: "r" (__new), "r"(__ptr), "r"(__old) \
87+
:); \
88+
break; \
89+
default: \
90+
__bad_xchg(); \
91+
} \
92+
__ret; \
93+
})
94+
95+
#define arch_cmpxchg_acquire(ptr, o, n) \
96+
(__cmpxchg_acquire((ptr), (o), (n), sizeof(*(ptr))))
97+
98+
#define __cmpxchg(ptr, old, new, size) \
99+
({ \
100+
__typeof__(ptr) __ptr = (ptr); \
101+
__typeof__(new) __new = (new); \
102+
__typeof__(new) __tmp; \
103+
__typeof__(old) __old = (old); \
69104
__typeof__(*(ptr)) __ret; \
70-
__smp_release_fence(); \
71-
__ret = arch_cmpxchg_relaxed(ptr, o, n); \
72-
__smp_acquire_fence(); \
105+
switch (size) { \
106+
case 4: \
107+
asm volatile ( \
108+
RELEASE_FENCE \
109+
"1: ldex.w %0, (%3) \n" \
110+
" cmpne %0, %4 \n" \
111+
" bt 2f \n" \
112+
" mov %1, %2 \n" \
113+
" stex.w %1, (%3) \n" \
114+
" bez %1, 1b \n" \
115+
FULL_FENCE \
116+
"2: \n" \
117+
: "=&r" (__ret), "=&r" (__tmp) \
118+
: "r" (__new), "r"(__ptr), "r"(__old) \
119+
:); \
120+
break; \
121+
default: \
122+
__bad_xchg(); \
123+
} \
73124
__ret; \
74125
})
75126

127+
#define arch_cmpxchg(ptr, o, n) \
128+
(__cmpxchg((ptr), (o), (n), sizeof(*(ptr))))
129+
130+
#define arch_cmpxchg_local(ptr, o, n) \
131+
(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
76132
#else
77133
#include <asm-generic/cmpxchg.h>
78134
#endif

0 commit comments

Comments
 (0)