Skip to content

Commit 720dc7a

Browse files
committed
LoongArch: Add subword xchg/cmpxchg emulation
LoongArch only support 32-bit/64-bit xchg/cmpxchg in native. But percpu operation, qspinlock and some drivers need 8-bit/16-bit xchg/cmpxchg. We add subword xchg/cmpxchg emulation in this patch because the emulation has better performance than the generic implementation (on NUMA system), and it can fix some build errors meanwhile [1]. LoongArch's guarantee for forward progress (avoid many ll/sc happening at the same time and no one succeeds): We have the "exclusive access (with timeout) of ll" feature to avoid simultaneous ll (which also blocks other memory load/store on the same address), and the "random delay of sc" feature to avoid simultaneous sc. It is a mandatory requirement for multi-core LoongArch processors to implement such features, only except those single-core and dual-core processors (they also don't support multi-chip interconnection). Feature bits are introduced in CPUCFG3, bit 3 and bit 4 [2]. [1] https://lore.kernel.org/loongarch/CAAhV-H6vvkuOzy8OemWdYK3taj5Jn3bFX0ZTwE=twM8ywpBUYA@mail.gmail.com/T/#t [2] https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#_cpucfg Reported-by: Sudip Mukherjee (Codethink) <[email protected]> Suggested-by: Linus Torvalds <[email protected]> Signed-off-by: Rui Wang <[email protected]> Signed-off-by: Huacai Chen <[email protected]>
1 parent 092e9eb commit 720dc7a

File tree

2 files changed

+105
-1
lines changed

2 files changed

+105
-1
lines changed

arch/loongarch/include/asm/cmpxchg.h

Lines changed: 97 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
#ifndef __ASM_CMPXCHG_H
66
#define __ASM_CMPXCHG_H
77

8-
#include <asm/barrier.h>
8+
#include <linux/bits.h>
99
#include <linux/build_bug.h>
10+
#include <asm/barrier.h>
1011

1112
#define __xchg_asm(amswap_db, m, val) \
1213
({ \
@@ -21,10 +22,53 @@
2122
__ret; \
2223
})
2324

25+
static inline unsigned int __xchg_small(volatile void *ptr, unsigned int val,
26+
unsigned int size)
27+
{
28+
unsigned int shift;
29+
u32 old32, mask, temp;
30+
volatile u32 *ptr32;
31+
32+
/* Mask value to the correct size. */
33+
mask = GENMASK((size * BITS_PER_BYTE) - 1, 0);
34+
val &= mask;
35+
36+
/*
37+
* Calculate a shift & mask that correspond to the value we wish to
38+
* exchange within the naturally aligned 4 byte integerthat includes
39+
* it.
40+
*/
41+
shift = (unsigned long)ptr & 0x3;
42+
shift *= BITS_PER_BYTE;
43+
mask <<= shift;
44+
45+
/*
46+
* Calculate a pointer to the naturally aligned 4 byte integer that
47+
* includes our byte of interest, and load its value.
48+
*/
49+
ptr32 = (volatile u32 *)((unsigned long)ptr & ~0x3);
50+
51+
asm volatile (
52+
"1: ll.w %0, %3 \n"
53+
" andn %1, %0, %z4 \n"
54+
" or %1, %1, %z5 \n"
55+
" sc.w %1, %2 \n"
56+
" beqz %1, 1b \n"
57+
: "=&r" (old32), "=&r" (temp), "=ZC" (*ptr32)
58+
: "ZC" (*ptr32), "Jr" (mask), "Jr" (val << shift)
59+
: "memory");
60+
61+
return (old32 & mask) >> shift;
62+
}
63+
2464
static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
2565
int size)
2666
{
2767
switch (size) {
68+
case 1:
69+
case 2:
70+
return __xchg_small(ptr, x, size);
71+
2872
case 4:
2973
return __xchg_asm("amswap_db.w", (volatile u32 *)ptr, (u32)x);
3074

@@ -67,10 +111,62 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
67111
__ret; \
68112
})
69113

114+
static inline unsigned int __cmpxchg_small(volatile void *ptr, unsigned int old,
115+
unsigned int new, unsigned int size)
116+
{
117+
unsigned int shift;
118+
u32 old32, mask, temp;
119+
volatile u32 *ptr32;
120+
121+
/* Mask inputs to the correct size. */
122+
mask = GENMASK((size * BITS_PER_BYTE) - 1, 0);
123+
old &= mask;
124+
new &= mask;
125+
126+
/*
127+
* Calculate a shift & mask that correspond to the value we wish to
128+
* compare & exchange within the naturally aligned 4 byte integer
129+
* that includes it.
130+
*/
131+
shift = (unsigned long)ptr & 0x3;
132+
shift *= BITS_PER_BYTE;
133+
old <<= shift;
134+
new <<= shift;
135+
mask <<= shift;
136+
137+
/*
138+
* Calculate a pointer to the naturally aligned 4 byte integer that
139+
* includes our byte of interest, and load its value.
140+
*/
141+
ptr32 = (volatile u32 *)((unsigned long)ptr & ~0x3);
142+
143+
asm volatile (
144+
"1: ll.w %0, %3 \n"
145+
" and %1, %0, %z4 \n"
146+
" bne %1, %z5, 2f \n"
147+
" andn %1, %0, %z4 \n"
148+
" or %1, %1, %z6 \n"
149+
" sc.w %1, %2 \n"
150+
" beqz %1, 1b \n"
151+
" b 3f \n"
152+
"2: \n"
153+
__WEAK_LLSC_MB
154+
"3: \n"
155+
: "=&r" (old32), "=&r" (temp), "=ZC" (*ptr32)
156+
: "ZC" (*ptr32), "Jr" (mask), "Jr" (old), "Jr" (new)
157+
: "memory");
158+
159+
return (old32 & mask) >> shift;
160+
}
161+
70162
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
71163
unsigned long new, unsigned int size)
72164
{
73165
switch (size) {
166+
case 1:
167+
case 2:
168+
return __cmpxchg_small(ptr, old, new, size);
169+
74170
case 4:
75171
return __cmpxchg_asm("ll.w", "sc.w", (volatile u32 *)ptr,
76172
(u32)old, new);

arch/loongarch/include/asm/percpu.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,10 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
123123
int size)
124124
{
125125
switch (size) {
126+
case 1:
127+
case 2:
128+
return __xchg_small((volatile void *)ptr, val, size);
129+
126130
case 4:
127131
return __xchg_asm("amswap.w", (volatile u32 *)ptr, (u32)val);
128132

@@ -204,9 +208,13 @@ do { \
204208
#define this_cpu_write_4(pcp, val) _percpu_write(pcp, val)
205209
#define this_cpu_write_8(pcp, val) _percpu_write(pcp, val)
206210

211+
#define this_cpu_xchg_1(pcp, val) _percpu_xchg(pcp, val)
212+
#define this_cpu_xchg_2(pcp, val) _percpu_xchg(pcp, val)
207213
#define this_cpu_xchg_4(pcp, val) _percpu_xchg(pcp, val)
208214
#define this_cpu_xchg_8(pcp, val) _percpu_xchg(pcp, val)
209215

216+
#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
217+
#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
210218
#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
211219
#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
212220

0 commit comments

Comments
 (0)