Skip to content

Commit 300ce44

Browse files
Merge patch series "Rework & improve riscv cmpxchg.h and atomic.h"
Leonardo Bras <[email protected]> says: While studying riscv's cmpxchg.h file, I got really interested in understanding how RISCV asm implemented the different versions of {cmp,}xchg. When I understood the pattern, it made sense for me to remove the duplications and create macros to make it easier to understand what exactly changes between the versions: Instruction sufixes & barriers. Also, did the same kind of work on atomic.c. After that, I noted both cmpxchg and xchg only accept variables of size 4 and 8, compared to x86 and arm64 which do 1,2,4,8. Now that deduplication is done, it is quite direct to implement them for variable sizes 1 and 2, so I did it. Then Guo Ren already presented me some possible users :) I did compare the generated asm on a test.c that contained usage for every changed function, and could not detect any change on patches 1 + 2 + 3 compared with upstream. Pathes 4 & 5 were compiled-tested, merged with guoren/qspinlock_v11 and booted just fine with qemu -machine virt -append "qspinlock". (tree: https://gitlab.com/LeoBras/linux/-/commits/guo_qspinlock_v11) Latest tests happened based on this tree: https://github.com/guoren83/linux/tree/qspinlock_v12 * b4-shazam-lts: riscv/cmpxchg: Implement xchg for variables of size 1 and 2 riscv/cmpxchg: Implement cmpxchg for variables of size 1 and 2 riscv/atomic.h : Deduplicate arch_atomic.* riscv/cmpxchg: Deduplicate cmpxchg() asm and macros riscv/cmpxchg: Deduplicate xchg() asm functions Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Palmer Dabbelt <[email protected]>
2 parents 542124f + a8ed2b7 commit 300ce44

File tree

2 files changed

+200
-368
lines changed

2 files changed

+200
-368
lines changed

arch/riscv/include/asm/atomic.h

Lines changed: 76 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -195,22 +195,28 @@ ATOMIC_OPS(xor, xor, i)
195195
#undef ATOMIC_FETCH_OP
196196
#undef ATOMIC_OP_RETURN
197197

198+
#define _arch_atomic_fetch_add_unless(_prev, _rc, counter, _a, _u, sfx) \
199+
({ \
200+
__asm__ __volatile__ ( \
201+
"0: lr." sfx " %[p], %[c]\n" \
202+
" beq %[p], %[u], 1f\n" \
203+
" add %[rc], %[p], %[a]\n" \
204+
" sc." sfx ".rl %[rc], %[rc], %[c]\n" \
205+
" bnez %[rc], 0b\n" \
206+
" fence rw, rw\n" \
207+
"1:\n" \
208+
: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
209+
: [a]"r" (_a), [u]"r" (_u) \
210+
: "memory"); \
211+
})
212+
198213
/* This is required to provide a full barrier on success. */
199214
static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
200215
{
201216
int prev, rc;
202217

203-
__asm__ __volatile__ (
204-
"0: lr.w %[p], %[c]\n"
205-
" beq %[p], %[u], 1f\n"
206-
" add %[rc], %[p], %[a]\n"
207-
" sc.w.rl %[rc], %[rc], %[c]\n"
208-
" bnez %[rc], 0b\n"
209-
RISCV_FULL_BARRIER
210-
"1:\n"
211-
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
212-
: [a]"r" (a), [u]"r" (u)
213-
: "memory");
218+
_arch_atomic_fetch_add_unless(prev, rc, v->counter, a, u, "w");
219+
214220
return prev;
215221
}
216222
#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
@@ -221,77 +227,86 @@ static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a,
221227
s64 prev;
222228
long rc;
223229

224-
__asm__ __volatile__ (
225-
"0: lr.d %[p], %[c]\n"
226-
" beq %[p], %[u], 1f\n"
227-
" add %[rc], %[p], %[a]\n"
228-
" sc.d.rl %[rc], %[rc], %[c]\n"
229-
" bnez %[rc], 0b\n"
230-
RISCV_FULL_BARRIER
231-
"1:\n"
232-
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
233-
: [a]"r" (a), [u]"r" (u)
234-
: "memory");
230+
_arch_atomic_fetch_add_unless(prev, rc, v->counter, a, u, "d");
231+
235232
return prev;
236233
}
237234
#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
238235
#endif
239236

237+
#define _arch_atomic_inc_unless_negative(_prev, _rc, counter, sfx) \
238+
({ \
239+
__asm__ __volatile__ ( \
240+
"0: lr." sfx " %[p], %[c]\n" \
241+
" bltz %[p], 1f\n" \
242+
" addi %[rc], %[p], 1\n" \
243+
" sc." sfx ".rl %[rc], %[rc], %[c]\n" \
244+
" bnez %[rc], 0b\n" \
245+
" fence rw, rw\n" \
246+
"1:\n" \
247+
: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
248+
: \
249+
: "memory"); \
250+
})
251+
240252
static __always_inline bool arch_atomic_inc_unless_negative(atomic_t *v)
241253
{
242254
int prev, rc;
243255

244-
__asm__ __volatile__ (
245-
"0: lr.w %[p], %[c]\n"
246-
" bltz %[p], 1f\n"
247-
" addi %[rc], %[p], 1\n"
248-
" sc.w.rl %[rc], %[rc], %[c]\n"
249-
" bnez %[rc], 0b\n"
250-
RISCV_FULL_BARRIER
251-
"1:\n"
252-
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
253-
:
254-
: "memory");
256+
_arch_atomic_inc_unless_negative(prev, rc, v->counter, "w");
257+
255258
return !(prev < 0);
256259
}
257260

258261
#define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative
259262

263+
#define _arch_atomic_dec_unless_positive(_prev, _rc, counter, sfx) \
264+
({ \
265+
__asm__ __volatile__ ( \
266+
"0: lr." sfx " %[p], %[c]\n" \
267+
" bgtz %[p], 1f\n" \
268+
" addi %[rc], %[p], -1\n" \
269+
" sc." sfx ".rl %[rc], %[rc], %[c]\n" \
270+
" bnez %[rc], 0b\n" \
271+
" fence rw, rw\n" \
272+
"1:\n" \
273+
: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
274+
: \
275+
: "memory"); \
276+
})
277+
260278
static __always_inline bool arch_atomic_dec_unless_positive(atomic_t *v)
261279
{
262280
int prev, rc;
263281

264-
__asm__ __volatile__ (
265-
"0: lr.w %[p], %[c]\n"
266-
" bgtz %[p], 1f\n"
267-
" addi %[rc], %[p], -1\n"
268-
" sc.w.rl %[rc], %[rc], %[c]\n"
269-
" bnez %[rc], 0b\n"
270-
RISCV_FULL_BARRIER
271-
"1:\n"
272-
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
273-
:
274-
: "memory");
282+
_arch_atomic_dec_unless_positive(prev, rc, v->counter, "w");
283+
275284
return !(prev > 0);
276285
}
277286

278287
#define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive
279288

289+
#define _arch_atomic_dec_if_positive(_prev, _rc, counter, sfx) \
290+
({ \
291+
__asm__ __volatile__ ( \
292+
"0: lr." sfx " %[p], %[c]\n" \
293+
" addi %[rc], %[p], -1\n" \
294+
" bltz %[rc], 1f\n" \
295+
" sc." sfx ".rl %[rc], %[rc], %[c]\n" \
296+
" bnez %[rc], 0b\n" \
297+
" fence rw, rw\n" \
298+
"1:\n" \
299+
: [p]"=&r" (_prev), [rc]"=&r" (_rc), [c]"+A" (counter) \
300+
: \
301+
: "memory"); \
302+
})
303+
280304
static __always_inline int arch_atomic_dec_if_positive(atomic_t *v)
281305
{
282306
int prev, rc;
283307

284-
__asm__ __volatile__ (
285-
"0: lr.w %[p], %[c]\n"
286-
" addi %[rc], %[p], -1\n"
287-
" bltz %[rc], 1f\n"
288-
" sc.w.rl %[rc], %[rc], %[c]\n"
289-
" bnez %[rc], 0b\n"
290-
RISCV_FULL_BARRIER
291-
"1:\n"
292-
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
293-
:
294-
: "memory");
308+
_arch_atomic_dec_if_positive(prev, rc, v->counter, "w");
309+
295310
return prev - 1;
296311
}
297312

@@ -303,17 +318,8 @@ static __always_inline bool arch_atomic64_inc_unless_negative(atomic64_t *v)
303318
s64 prev;
304319
long rc;
305320

306-
__asm__ __volatile__ (
307-
"0: lr.d %[p], %[c]\n"
308-
" bltz %[p], 1f\n"
309-
" addi %[rc], %[p], 1\n"
310-
" sc.d.rl %[rc], %[rc], %[c]\n"
311-
" bnez %[rc], 0b\n"
312-
RISCV_FULL_BARRIER
313-
"1:\n"
314-
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
315-
:
316-
: "memory");
321+
_arch_atomic_inc_unless_negative(prev, rc, v->counter, "d");
322+
317323
return !(prev < 0);
318324
}
319325

@@ -324,17 +330,8 @@ static __always_inline bool arch_atomic64_dec_unless_positive(atomic64_t *v)
324330
s64 prev;
325331
long rc;
326332

327-
__asm__ __volatile__ (
328-
"0: lr.d %[p], %[c]\n"
329-
" bgtz %[p], 1f\n"
330-
" addi %[rc], %[p], -1\n"
331-
" sc.d.rl %[rc], %[rc], %[c]\n"
332-
" bnez %[rc], 0b\n"
333-
RISCV_FULL_BARRIER
334-
"1:\n"
335-
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
336-
:
337-
: "memory");
333+
_arch_atomic_dec_unless_positive(prev, rc, v->counter, "d");
334+
338335
return !(prev > 0);
339336
}
340337

@@ -345,17 +342,8 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
345342
s64 prev;
346343
long rc;
347344

348-
__asm__ __volatile__ (
349-
"0: lr.d %[p], %[c]\n"
350-
" addi %[rc], %[p], -1\n"
351-
" bltz %[rc], 1f\n"
352-
" sc.d.rl %[rc], %[rc], %[c]\n"
353-
" bnez %[rc], 0b\n"
354-
RISCV_FULL_BARRIER
355-
"1:\n"
356-
: [p]"=&r" (prev), [rc]"=&r" (rc), [c]"+A" (v->counter)
357-
:
358-
: "memory");
345+
_arch_atomic_dec_if_positive(prev, rc, v->counter, "d");
346+
359347
return prev - 1;
360348
}
361349

0 commit comments

Comments
 (0)