Skip to content

Commit 457926b

Browse files
XiaoWang1772palmer-dabbelt
authored andcommitted
riscv: Optimize bitops with Zbb extension
This patch leverages the alternative mechanism to dynamically optimize bitops (including __ffs, __fls, ffs, fls) with Zbb instructions. When Zbb ext is not supported by the runtime CPU, legacy implementation is used. If Zbb is supported, then the optimized variants will be selected via alternative patching. The legacy bitops support is taken from the generic C implementation as fallback. If the parameter is a build-time constant, we leverage compiler builtin to calculate the result directly, this approach is inspired by x86 bitops implementation. EFI stub runs before the kernel, so alternative mechanism should not be used there, this patch introduces a macro NO_ALTERNATIVE for this purpose. Signed-off-by: Xiao Wang <[email protected]> Reviewed-by: Charlie Jenkins <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Palmer Dabbelt <[email protected]>
1 parent e72c433 commit 457926b

File tree

2 files changed

+252
-4
lines changed

2 files changed

+252
-4
lines changed

arch/riscv/include/asm/bitops.h

Lines changed: 251 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,261 @@
1515
#include <asm/barrier.h>
1616
#include <asm/bitsperlong.h>
1717

18+
#if !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE)
1819
#include <asm-generic/bitops/__ffs.h>
19-
#include <asm-generic/bitops/ffz.h>
20-
#include <asm-generic/bitops/fls.h>
2120
#include <asm-generic/bitops/__fls.h>
21+
#include <asm-generic/bitops/ffs.h>
22+
#include <asm-generic/bitops/fls.h>
23+
24+
#else
25+
#include <asm/alternative-macros.h>
26+
#include <asm/hwcap.h>
27+
28+
#if (BITS_PER_LONG == 64)
29+
#define CTZW "ctzw "
30+
#define CLZW "clzw "
31+
#elif (BITS_PER_LONG == 32)
32+
#define CTZW "ctz "
33+
#define CLZW "clz "
34+
#else
35+
#error "Unexpected BITS_PER_LONG"
36+
#endif
37+
38+
static __always_inline unsigned long variable__ffs(unsigned long word)
39+
{
40+
int num;
41+
42+
asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
43+
RISCV_ISA_EXT_ZBB, 1)
44+
: : : : legacy);
45+
46+
asm volatile (".option push\n"
47+
".option arch,+zbb\n"
48+
"ctz %0, %1\n"
49+
".option pop\n"
50+
: "=r" (word) : "r" (word) :);
51+
52+
return word;
53+
54+
legacy:
55+
num = 0;
56+
#if BITS_PER_LONG == 64
57+
if ((word & 0xffffffff) == 0) {
58+
num += 32;
59+
word >>= 32;
60+
}
61+
#endif
62+
if ((word & 0xffff) == 0) {
63+
num += 16;
64+
word >>= 16;
65+
}
66+
if ((word & 0xff) == 0) {
67+
num += 8;
68+
word >>= 8;
69+
}
70+
if ((word & 0xf) == 0) {
71+
num += 4;
72+
word >>= 4;
73+
}
74+
if ((word & 0x3) == 0) {
75+
num += 2;
76+
word >>= 2;
77+
}
78+
if ((word & 0x1) == 0)
79+
num += 1;
80+
return num;
81+
}
82+
83+
/**
84+
* __ffs - find first set bit in a long word
85+
* @word: The word to search
86+
*
87+
* Undefined if no set bit exists, so code should check against 0 first.
88+
*/
89+
#define __ffs(word) \
90+
(__builtin_constant_p(word) ? \
91+
(unsigned long)__builtin_ctzl(word) : \
92+
variable__ffs(word))
93+
94+
static __always_inline unsigned long variable__fls(unsigned long word)
95+
{
96+
int num;
97+
98+
asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
99+
RISCV_ISA_EXT_ZBB, 1)
100+
: : : : legacy);
101+
102+
asm volatile (".option push\n"
103+
".option arch,+zbb\n"
104+
"clz %0, %1\n"
105+
".option pop\n"
106+
: "=r" (word) : "r" (word) :);
107+
108+
return BITS_PER_LONG - 1 - word;
109+
110+
legacy:
111+
num = BITS_PER_LONG - 1;
112+
#if BITS_PER_LONG == 64
113+
if (!(word & (~0ul << 32))) {
114+
num -= 32;
115+
word <<= 32;
116+
}
117+
#endif
118+
if (!(word & (~0ul << (BITS_PER_LONG - 16)))) {
119+
num -= 16;
120+
word <<= 16;
121+
}
122+
if (!(word & (~0ul << (BITS_PER_LONG - 8)))) {
123+
num -= 8;
124+
word <<= 8;
125+
}
126+
if (!(word & (~0ul << (BITS_PER_LONG - 4)))) {
127+
num -= 4;
128+
word <<= 4;
129+
}
130+
if (!(word & (~0ul << (BITS_PER_LONG - 2)))) {
131+
num -= 2;
132+
word <<= 2;
133+
}
134+
if (!(word & (~0ul << (BITS_PER_LONG - 1))))
135+
num -= 1;
136+
return num;
137+
}
138+
139+
/**
140+
* __fls - find last set bit in a long word
141+
* @word: the word to search
142+
*
143+
* Undefined if no set bit exists, so code should check against 0 first.
144+
*/
145+
#define __fls(word) \
146+
(__builtin_constant_p(word) ? \
147+
(unsigned long)(BITS_PER_LONG - 1 - __builtin_clzl(word)) : \
148+
variable__fls(word))
149+
150+
static __always_inline int variable_ffs(int x)
151+
{
152+
int r;
153+
154+
if (!x)
155+
return 0;
156+
157+
asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
158+
RISCV_ISA_EXT_ZBB, 1)
159+
: : : : legacy);
160+
161+
asm volatile (".option push\n"
162+
".option arch,+zbb\n"
163+
CTZW "%0, %1\n"
164+
".option pop\n"
165+
: "=r" (r) : "r" (x) :);
166+
167+
return r + 1;
168+
169+
legacy:
170+
r = 1;
171+
if (!(x & 0xffff)) {
172+
x >>= 16;
173+
r += 16;
174+
}
175+
if (!(x & 0xff)) {
176+
x >>= 8;
177+
r += 8;
178+
}
179+
if (!(x & 0xf)) {
180+
x >>= 4;
181+
r += 4;
182+
}
183+
if (!(x & 3)) {
184+
x >>= 2;
185+
r += 2;
186+
}
187+
if (!(x & 1)) {
188+
x >>= 1;
189+
r += 1;
190+
}
191+
return r;
192+
}
193+
194+
/**
195+
* ffs - find first set bit in a word
196+
* @x: the word to search
197+
*
198+
* This is defined the same way as the libc and compiler builtin ffs routines.
199+
*
200+
* ffs(value) returns 0 if value is 0 or the position of the first set bit if
201+
* value is nonzero. The first (least significant) bit is at position 1.
202+
*/
203+
#define ffs(x) (__builtin_constant_p(x) ? __builtin_ffs(x) : variable_ffs(x))
204+
205+
static __always_inline int variable_fls(unsigned int x)
206+
{
207+
int r;
208+
209+
if (!x)
210+
return 0;
211+
212+
asm_volatile_goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
213+
RISCV_ISA_EXT_ZBB, 1)
214+
: : : : legacy);
215+
216+
asm volatile (".option push\n"
217+
".option arch,+zbb\n"
218+
CLZW "%0, %1\n"
219+
".option pop\n"
220+
: "=r" (r) : "r" (x) :);
221+
222+
return 32 - r;
223+
224+
legacy:
225+
r = 32;
226+
if (!(x & 0xffff0000u)) {
227+
x <<= 16;
228+
r -= 16;
229+
}
230+
if (!(x & 0xff000000u)) {
231+
x <<= 8;
232+
r -= 8;
233+
}
234+
if (!(x & 0xf0000000u)) {
235+
x <<= 4;
236+
r -= 4;
237+
}
238+
if (!(x & 0xc0000000u)) {
239+
x <<= 2;
240+
r -= 2;
241+
}
242+
if (!(x & 0x80000000u)) {
243+
x <<= 1;
244+
r -= 1;
245+
}
246+
return r;
247+
}
248+
249+
/**
250+
* fls - find last set bit in a word
251+
* @x: the word to search
252+
*
253+
* This is defined in a similar way as ffs, but returns the position of the most
254+
* significant set bit.
255+
*
256+
* fls(value) returns 0 if value is 0 or the position of the last set bit if
257+
* value is nonzero. The last (most significant) bit is at position 32.
258+
*/
259+
#define fls(x) \
260+
({ \
261+
typeof(x) x_ = (x); \
262+
__builtin_constant_p(x_) ? \
263+
(int)((x_ != 0) ? (32 - __builtin_clz(x_)) : 0) \
264+
: \
265+
variable_fls(x_); \
266+
})
267+
268+
#endif /* !defined(CONFIG_RISCV_ISA_ZBB) || defined(NO_ALTERNATIVE) */
269+
270+
#include <asm-generic/bitops/ffz.h>
22271
#include <asm-generic/bitops/fls64.h>
23272
#include <asm-generic/bitops/sched.h>
24-
#include <asm-generic/bitops/ffs.h>
25273

26274
#include <asm-generic/bitops/hweight.h>
27275

drivers/firmware/efi/libstub/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ cflags-$(CONFIG_ARM) += -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \
2828
-DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \
2929
-DEFI_HAVE_STRCMP -fno-builtin -fpic \
3030
$(call cc-option,-mno-single-pic-base)
31-
cflags-$(CONFIG_RISCV) += -fpic
31+
cflags-$(CONFIG_RISCV) += -fpic -DNO_ALTERNATIVE
3232
cflags-$(CONFIG_LOONGARCH) += -fpie
3333

3434
cflags-$(CONFIG_EFI_PARAMS_FROM_FDT) += -I$(srctree)/scripts/dtc/libfdt

0 commit comments

Comments
 (0)