Skip to content

Commit 17f2c30

Browse files
Merge patch series "riscv: enable EFFICIENT_UNALIGNED_ACCESS and DCACHE_WORD_ACCESS"
Jisheng Zhang <[email protected]> says: Some riscv implementations such as T-HEAD's C906, C908, C910 and C920 support efficient unaligned access, for performance reason we want to enable HAVE_EFFICIENT_UNALIGNED_ACCESS on these platforms. To avoid performance regressions on non efficient unaligned access platforms, HAVE_EFFICIENT_UNALIGNED_ACCESS can't be globally selected. To solve this problem, runtime code patching based on the detected speed is a good solution. But that's not easy, it involves lots of work to modify vairous subsystems such as net, mm, lib and so on. This can be done step by step. So let's take an easier solution: add support to efficient unaligned access and hide the support under NONPORTABLE. patch1 introduces RISCV_EFFICIENT_UNALIGNED_ACCESS which depends on NONPORTABLE, if users know during config time that the kernel will be only run on those efficient unaligned access hw platforms, they can enable it. Obviously, generic unified kernel Image shouldn't enable it. patch2 adds support DCACHE_WORD_ACCESS when MMU and RISCV_EFFICIENT_UNALIGNED_ACCESS. Below test program and step shows how much performance can be improved: $ cat tt.c #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> #define ITERATIONS 1000000 #define PATH "123456781234567812345678123456781" int main(void) { unsigned long i; struct stat buf; for (i = 0; i < ITERATIONS; i++) stat(PATH, &buf); return 0; } $ gcc -O2 tt.c $ touch 123456781234567812345678123456781 $ time ./a.out Per my test on T-HEAD C910 platforms, the above test performance is improved by about 7.5%. * b4-shazam-merge: riscv: select DCACHE_WORD_ACCESS for efficient unaligned access HW riscv: introduce RISCV_EFFICIENT_UNALIGNED_ACCESS Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Palmer Dabbelt <[email protected]>
2 parents cb51bfe + d0fdc20 commit 17f2c30

File tree

5 files changed

+89
-0
lines changed

5 files changed

+89
-0
lines changed

arch/riscv/Kconfig

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,20 @@ config RISCV_MISALIGNED
652652
load/store for both kernel and userspace. When disable, misaligned
653653
accesses will generate SIGBUS in userspace and panic in kernel.
654654

655+
config RISCV_EFFICIENT_UNALIGNED_ACCESS
656+
bool "Assume the CPU supports fast unaligned memory accesses"
657+
depends on NONPORTABLE
658+
select DCACHE_WORD_ACCESS if MMU
659+
select HAVE_EFFICIENT_UNALIGNED_ACCESS
660+
help
661+
Say Y here if you want the kernel to assume that the CPU supports
662+
efficient unaligned memory accesses. When enabled, this option
663+
improves the performance of the kernel on such CPUs. However, the
664+
kernel will run much more slowly, or will not be able to run at all,
665+
on CPUs that do not support efficient unaligned memory accesses.
666+
667+
If unsure what to do here, say N.
668+
655669
endmenu # "Platform type"
656670

657671
menu "Kernel features"

arch/riscv/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,9 @@ KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax)
108108
# unaligned accesses. While unaligned accesses are explicitly allowed in the
109109
# RISC-V ISA, they're emulated by machine mode traps on all extant
110110
# architectures. It's faster to have GCC emit only aligned accesses.
111+
ifneq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS),y)
111112
KBUILD_CFLAGS += $(call cc-option,-mstrict-align)
113+
endif
112114

113115
ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y)
114116
prepare: stack_protector_prepare

arch/riscv/include/asm/asm-extable.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#define EX_TYPE_FIXUP 1
77
#define EX_TYPE_BPF 2
88
#define EX_TYPE_UACCESS_ERR_ZERO 3
9+
#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4
910

1011
#ifdef CONFIG_MMU
1112

@@ -47,6 +48,11 @@
4748
#define EX_DATA_REG_ZERO_SHIFT 5
4849
#define EX_DATA_REG_ZERO GENMASK(9, 5)
4950

51+
#define EX_DATA_REG_DATA_SHIFT 0
52+
#define EX_DATA_REG_DATA GENMASK(4, 0)
53+
#define EX_DATA_REG_ADDR_SHIFT 5
54+
#define EX_DATA_REG_ADDR GENMASK(9, 5)
55+
5056
#define EX_DATA_REG(reg, gpr) \
5157
"((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
5258

@@ -62,6 +68,15 @@
6268
#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \
6369
_ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero)
6470

71+
#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \
72+
__DEFINE_ASM_GPR_NUMS \
73+
__ASM_EXTABLE_RAW(#insn, #fixup, \
74+
__stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD), \
75+
"(" \
76+
EX_DATA_REG(DATA, data) " | " \
77+
EX_DATA_REG(ADDR, addr) \
78+
")")
79+
6580
#endif /* __ASSEMBLY__ */
6681

6782
#else /* CONFIG_MMU */

arch/riscv/include/asm/word-at-a-time.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#define _ASM_RISCV_WORD_AT_A_TIME_H
1010

1111

12+
#include <asm/asm-extable.h>
1213
#include <linux/kernel.h>
1314

1415
struct word_at_a_time {
@@ -45,4 +46,30 @@ static inline unsigned long find_zero(unsigned long mask)
4546
/* The mask we created is directly usable as a bytemask */
4647
#define zero_bytemask(mask) (mask)
4748

49+
#ifdef CONFIG_DCACHE_WORD_ACCESS
50+
51+
/*
52+
* Load an unaligned word from kernel space.
53+
*
54+
* In the (very unlikely) case of the word being a page-crosser
55+
* and the next page not being mapped, take the exception and
56+
* return zeroes in the non-existing part.
57+
*/
58+
static inline unsigned long load_unaligned_zeropad(const void *addr)
59+
{
60+
unsigned long ret;
61+
62+
/* Load word from unaligned pointer addr */
63+
asm(
64+
"1: " REG_L " %0, %2\n"
65+
"2:\n"
66+
_ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1)
67+
: "=&r" (ret)
68+
: "r" (addr), "m" (*(unsigned long *)addr));
69+
70+
return ret;
71+
}
72+
73+
#endif /* CONFIG_DCACHE_WORD_ACCESS */
74+
4875
#endif /* _ASM_RISCV_WORD_AT_A_TIME_H */

arch/riscv/mm/extable.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,14 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex,
2727
return true;
2828
}
2929

30+
static inline unsigned long regs_get_gpr(struct pt_regs *regs, unsigned int offset)
31+
{
32+
if (unlikely(!offset || offset > MAX_REG_OFFSET))
33+
return 0;
34+
35+
return *(unsigned long *)((unsigned long)regs + offset);
36+
}
37+
3038
static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset,
3139
unsigned long val)
3240
{
@@ -50,6 +58,27 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
5058
return true;
5159
}
5260

61+
static bool
62+
ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
63+
struct pt_regs *regs)
64+
{
65+
int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data);
66+
int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
67+
unsigned long data, addr, offset;
68+
69+
addr = regs_get_gpr(regs, reg_addr * sizeof(unsigned long));
70+
71+
offset = addr & 0x7UL;
72+
addr &= ~0x7UL;
73+
74+
data = *(unsigned long *)addr >> (offset * 8);
75+
76+
regs_set_gpr(regs, reg_data * sizeof(unsigned long), data);
77+
78+
regs->epc = get_ex_fixup(ex);
79+
return true;
80+
}
81+
5382
bool fixup_exception(struct pt_regs *regs)
5483
{
5584
const struct exception_table_entry *ex;
@@ -65,6 +94,8 @@ bool fixup_exception(struct pt_regs *regs)
6594
return ex_handler_bpf(ex, regs);
6695
case EX_TYPE_UACCESS_ERR_ZERO:
6796
return ex_handler_uaccess_err_zero(ex, regs);
97+
case EX_TYPE_LOAD_UNALIGNED_ZEROPAD:
98+
return ex_handler_load_unaligned_zeropad(ex, regs);
6899
}
69100

70101
BUG();

0 commit comments

Comments
 (0)