Skip to content

Commit 0539084

Browse files
ubizjakIngo Molnar
authored andcommitted
x86/percpu: Convert this_percpu_xchg_op() from asm() to C code, to generate better code
Rewrite percpu_xchg_op() using generic percpu primitives instead of using asm. The new implementation is similar to local_xchg() and allows the compiler to perform various optimizations: e.g. the compiler is able to create fast path through the loop, according to likely/unlikely annotations in percpu_try_cmpxchg_op(). No functional changes intended. Signed-off-by: Uros Bizjak <[email protected]> Signed-off-by: Ingo Molnar <[email protected]> Cc: "H. Peter Anvin" <[email protected]> Cc: Linus Torvalds <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 4ae3dc8 commit 0539084

File tree

1 file changed

+11
-21
lines changed

1 file changed

+11
-21
lines changed

arch/x86/include/asm/percpu.h

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -230,25 +230,15 @@ do { \
230230
})
231231

232232
/*
233-
* xchg is implemented using cmpxchg without a lock prefix. xchg is
234-
* expensive due to the implied lock prefix. The processor cannot prefetch
235-
* cachelines if xchg is used.
233+
* this_cpu_xchg() is implemented using cmpxchg without a lock prefix.
234+
* xchg is expensive due to the implied lock prefix. The processor
235+
* cannot prefetch cachelines if xchg is used.
236236
*/
237-
#define percpu_xchg_op(size, qual, _var, _nval) \
237+
#define this_percpu_xchg_op(_var, _nval) \
238238
({ \
239-
__pcpu_type_##size pxo_old__; \
240-
__pcpu_type_##size pxo_new__ = __pcpu_cast_##size(_nval); \
241-
asm qual (__pcpu_op2_##size("mov", __percpu_arg([var]), \
242-
"%[oval]") \
243-
"\n1:\t" \
244-
__pcpu_op2_##size("cmpxchg", "%[nval]", \
245-
__percpu_arg([var])) \
246-
"\n\tjnz 1b" \
247-
: [oval] "=&a" (pxo_old__), \
248-
[var] "+m" (__my_cpu_var(_var)) \
249-
: [nval] __pcpu_reg_##size(, pxo_new__) \
250-
: "memory"); \
251-
(typeof(_var))(unsigned long) pxo_old__; \
239+
typeof(_var) pxo_old__ = this_cpu_read(_var); \
240+
do { } while (!this_cpu_try_cmpxchg(_var, &pxo_old__, _nval)); \
241+
pxo_old__; \
252242
})
253243

254244
/*
@@ -534,9 +524,9 @@ do { \
534524
#define this_cpu_or_1(pcp, val) percpu_to_op(1, volatile, "or", (pcp), val)
535525
#define this_cpu_or_2(pcp, val) percpu_to_op(2, volatile, "or", (pcp), val)
536526
#define this_cpu_or_4(pcp, val) percpu_to_op(4, volatile, "or", (pcp), val)
537-
#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(1, volatile, pcp, nval)
538-
#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(2, volatile, pcp, nval)
539-
#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(4, volatile, pcp, nval)
527+
#define this_cpu_xchg_1(pcp, nval) this_percpu_xchg_op(pcp, nval)
528+
#define this_cpu_xchg_2(pcp, nval) this_percpu_xchg_op(pcp, nval)
529+
#define this_cpu_xchg_4(pcp, nval) this_percpu_xchg_op(pcp, nval)
540530

541531
#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, val)
542532
#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, val)
@@ -575,7 +565,7 @@ do { \
575565
#define this_cpu_and_8(pcp, val) percpu_to_op(8, volatile, "and", (pcp), val)
576566
#define this_cpu_or_8(pcp, val) percpu_to_op(8, volatile, "or", (pcp), val)
577567
#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val)
578-
#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(8, volatile, pcp, nval)
568+
#define this_cpu_xchg_8(pcp, nval) this_percpu_xchg_op(pcp, nval)
579569
#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval)
580570
#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval)
581571
#endif

0 commit comments

Comments
 (0)