Skip to content

Commit 053f58b

Browse files
mrutland-armctmarinas
authored andcommitted
arm64: atomics: lse: define RETURN ops in terms of FETCH ops
The FEAT_LSE atomic instructions include LD* instructions which return the original value of a memory location can be used to directly implement FETCH opertations. Each RETURN op is implemented as a copy of the corresponding FETCH op with a trailing instruction to generate the new value of the memory location. We only directly implement *_fetch_add*(), for which we have a trailing `add` instruction. As the compiler has no visibility of the `add`, this leads to less than optimal code generation when consuming the result. For example, the compiler cannot constant-fold the addition into later operations, and currently GCC 11.1.0 will compile: return __lse_atomic_sub_return(1, v) == 0; As: mov w1, #0xffffffff ldaddal w1, w2, [x0] add w1, w1, w2 cmp w1, #0x0 cset w0, eq // eq = none ret This patch improves this by replacing the `add` with C addition after the inline assembly block, e.g. ret += i; This allows the compiler to manipulate `i`. This permits the compiler to merge the `add` and `cmp` for the above, e.g. mov w1, #0xffffffff ldaddal w1, w1, [x0] cmp w1, #0x1 cset w0, eq // eq = none ret With this change the assembly for each RETURN op is identical to the corresponding FETCH op (including barriers and clobbers) so I've removed the inline assembly and rewritten each RETURN op in terms of the corresponding FETCH op, e.g. | static inline void __lse_atomic_add_return(int i, atomic_t *v) | { | return __lse_atomic_fetch_add(i, v) + i | } The new construction does not adversely affect the common case, and before and after this patch GCC 11.1.0 can compile: __lse_atomic_add_return(i, v) As: ldaddal w0, w2, [x1] add w0, w0, w2 ... while having the freedom to do better elsewhere. This is intended as an optimization and cleanup. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland <[email protected]> Cc: Boqun Feng <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Will Deacon <[email protected]> Acked-by: Will Deacon <[email protected]> Acked-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Catalin Marinas <[email protected]>
1 parent 8a578a7 commit 053f58b

File tree

1 file changed

+14
-34
lines changed

1 file changed

+14
-34
lines changed

arch/arm64/include/asm/atomic_lse.h

Lines changed: 14 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -75,31 +75,21 @@ ATOMIC_FETCH_OP_SUB( )
7575

7676
#undef ATOMIC_FETCH_OP_SUB
7777

78-
#define ATOMIC_OP_ADD_SUB_RETURN(name, mb, cl...) \
78+
#define ATOMIC_OP_ADD_SUB_RETURN(name) \
7979
static inline int __lse_atomic_add_return##name(int i, atomic_t *v) \
8080
{ \
81-
u32 tmp; \
82-
\
83-
asm volatile( \
84-
__LSE_PREAMBLE \
85-
" ldadd" #mb " %w[i], %w[tmp], %[v]\n" \
86-
" add %w[i], %w[i], %w[tmp]" \
87-
: [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
88-
: "r" (v) \
89-
: cl); \
90-
\
91-
return i; \
81+
return __lse_atomic_fetch_add##name(i, v) + i; \
9282
} \
9383
\
9484
static inline int __lse_atomic_sub_return##name(int i, atomic_t *v) \
9585
{ \
96-
return __lse_atomic_add_return##name(-i, v); \
86+
return __lse_atomic_fetch_sub(i, v) - i; \
9787
}
9888

99-
ATOMIC_OP_ADD_SUB_RETURN(_relaxed, )
100-
ATOMIC_OP_ADD_SUB_RETURN(_acquire, a, "memory")
101-
ATOMIC_OP_ADD_SUB_RETURN(_release, l, "memory")
102-
ATOMIC_OP_ADD_SUB_RETURN( , al, "memory")
89+
ATOMIC_OP_ADD_SUB_RETURN(_relaxed)
90+
ATOMIC_OP_ADD_SUB_RETURN(_acquire)
91+
ATOMIC_OP_ADD_SUB_RETURN(_release)
92+
ATOMIC_OP_ADD_SUB_RETURN( )
10393

10494
#undef ATOMIC_OP_ADD_SUB_RETURN
10595

@@ -186,31 +176,21 @@ ATOMIC64_FETCH_OP_SUB( )
186176

187177
#undef ATOMIC64_FETCH_OP_SUB
188178

189-
#define ATOMIC64_OP_ADD_SUB_RETURN(name, mb, cl...) \
179+
#define ATOMIC64_OP_ADD_SUB_RETURN(name) \
190180
static inline long __lse_atomic64_add_return##name(s64 i, atomic64_t *v)\
191181
{ \
192-
unsigned long tmp; \
193-
\
194-
asm volatile( \
195-
__LSE_PREAMBLE \
196-
" ldadd" #mb " %[i], %x[tmp], %[v]\n" \
197-
" add %[i], %[i], %x[tmp]" \
198-
: [i] "+r" (i), [v] "+Q" (v->counter), [tmp] "=&r" (tmp) \
199-
: "r" (v) \
200-
: cl); \
201-
\
202-
return i; \
182+
return __lse_atomic64_fetch_add##name(i, v) + i; \
203183
} \
204184
\
205185
static inline long __lse_atomic64_sub_return##name(s64 i, atomic64_t *v)\
206186
{ \
207-
return __lse_atomic64_add_return##name(-i, v); \
187+
return __lse_atomic64_fetch_sub##name(i, v) - i; \
208188
}
209189

210-
ATOMIC64_OP_ADD_SUB_RETURN(_relaxed, )
211-
ATOMIC64_OP_ADD_SUB_RETURN(_acquire, a, "memory")
212-
ATOMIC64_OP_ADD_SUB_RETURN(_release, l, "memory")
213-
ATOMIC64_OP_ADD_SUB_RETURN( , al, "memory")
190+
ATOMIC64_OP_ADD_SUB_RETURN(_relaxed)
191+
ATOMIC64_OP_ADD_SUB_RETURN(_acquire)
192+
ATOMIC64_OP_ADD_SUB_RETURN(_release)
193+
ATOMIC64_OP_ADD_SUB_RETURN( )
214194

215195
#undef ATOMIC64_OP_ADD_SUB_RETURN
216196

0 commit comments

Comments
 (0)