Skip to content

Commit d0fdc20

Browse files
xhackerustcpalmer-dabbelt
authored andcommitted
riscv: select DCACHE_WORD_ACCESS for efficient unaligned access HW
DCACHE_WORD_ACCESS uses the word-at-a-time API for optimised string comparisons in the vfs layer. This patch implements support for load_unaligned_zeropad in much the same way as has been done for arm64. Here is the test program and step: $ cat tt.c #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> #define ITERATIONS 1000000 #define PATH "123456781234567812345678123456781" int main(void) { unsigned long i; struct stat buf; for (i = 0; i < ITERATIONS; i++) stat(PATH, &buf); return 0; } $ gcc -O2 tt.c $ touch 123456781234567812345678123456781 $ time ./a.out Per my test on T-HEAD C910 platforms, the above test performance is improved by about 7.5%. Signed-off-by: Jisheng Zhang <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Palmer Dabbelt <[email protected]>
1 parent b6da6cb commit d0fdc20

File tree

4 files changed

+74
-0
lines changed

4 files changed

+74
-0
lines changed

arch/riscv/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,7 @@ config RISCV_MISALIGNED
654654
config RISCV_EFFICIENT_UNALIGNED_ACCESS
655655
bool "Assume the CPU supports fast unaligned memory accesses"
656656
depends on NONPORTABLE
657+
select DCACHE_WORD_ACCESS if MMU
657658
select HAVE_EFFICIENT_UNALIGNED_ACCESS
658659
help
659660
Say Y here if you want the kernel to assume that the CPU supports

arch/riscv/include/asm/asm-extable.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#define EX_TYPE_FIXUP 1
77
#define EX_TYPE_BPF 2
88
#define EX_TYPE_UACCESS_ERR_ZERO 3
9+
#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4
910

1011
#ifdef CONFIG_MMU
1112

@@ -47,6 +48,11 @@
4748
#define EX_DATA_REG_ZERO_SHIFT 5
4849
#define EX_DATA_REG_ZERO GENMASK(9, 5)
4950

51+
#define EX_DATA_REG_DATA_SHIFT 0
52+
#define EX_DATA_REG_DATA GENMASK(4, 0)
53+
#define EX_DATA_REG_ADDR_SHIFT 5
54+
#define EX_DATA_REG_ADDR GENMASK(9, 5)
55+
5056
#define EX_DATA_REG(reg, gpr) \
5157
"((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
5258

@@ -62,6 +68,15 @@
6268
#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \
6369
_ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero)
6470

71+
#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \
72+
__DEFINE_ASM_GPR_NUMS \
73+
__ASM_EXTABLE_RAW(#insn, #fixup, \
74+
__stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD), \
75+
"(" \
76+
EX_DATA_REG(DATA, data) " | " \
77+
EX_DATA_REG(ADDR, addr) \
78+
")")
79+
6580
#endif /* __ASSEMBLY__ */
6681

6782
#else /* CONFIG_MMU */

arch/riscv/include/asm/word-at-a-time.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#define _ASM_RISCV_WORD_AT_A_TIME_H
1010

1111

12+
#include <asm/asm-extable.h>
1213
#include <linux/kernel.h>
1314

1415
struct word_at_a_time {
@@ -45,4 +46,30 @@ static inline unsigned long find_zero(unsigned long mask)
4546
/* The mask we created is directly usable as a bytemask */
4647
#define zero_bytemask(mask) (mask)
4748

49+
#ifdef CONFIG_DCACHE_WORD_ACCESS
50+
51+
/*
52+
* Load an unaligned word from kernel space.
53+
*
54+
* In the (very unlikely) case of the word being a page-crosser
55+
* and the next page not being mapped, take the exception and
56+
* return zeroes in the non-existing part.
57+
*/
58+
static inline unsigned long load_unaligned_zeropad(const void *addr)
59+
{
60+
unsigned long ret;
61+
62+
/* Load word from unaligned pointer addr */
63+
asm(
64+
"1: " REG_L " %0, %2\n"
65+
"2:\n"
66+
_ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1)
67+
: "=&r" (ret)
68+
: "r" (addr), "m" (*(unsigned long *)addr));
69+
70+
return ret;
71+
}
72+
73+
#endif /* CONFIG_DCACHE_WORD_ACCESS */
74+
4875
#endif /* _ASM_RISCV_WORD_AT_A_TIME_H */

arch/riscv/mm/extable.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,14 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex,
2727
return true;
2828
}
2929

30+
static inline unsigned long regs_get_gpr(struct pt_regs *regs, unsigned int offset)
31+
{
32+
if (unlikely(!offset || offset > MAX_REG_OFFSET))
33+
return 0;
34+
35+
return *(unsigned long *)((unsigned long)regs + offset);
36+
}
37+
3038
static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset,
3139
unsigned long val)
3240
{
@@ -50,6 +58,27 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
5058
return true;
5159
}
5260

61+
static bool
62+
ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
63+
struct pt_regs *regs)
64+
{
65+
int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data);
66+
int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
67+
unsigned long data, addr, offset;
68+
69+
addr = regs_get_gpr(regs, reg_addr * sizeof(unsigned long));
70+
71+
offset = addr & 0x7UL;
72+
addr &= ~0x7UL;
73+
74+
data = *(unsigned long *)addr >> (offset * 8);
75+
76+
regs_set_gpr(regs, reg_data * sizeof(unsigned long), data);
77+
78+
regs->epc = get_ex_fixup(ex);
79+
return true;
80+
}
81+
5382
bool fixup_exception(struct pt_regs *regs)
5483
{
5584
const struct exception_table_entry *ex;
@@ -65,6 +94,8 @@ bool fixup_exception(struct pt_regs *regs)
6594
return ex_handler_bpf(ex, regs);
6695
case EX_TYPE_UACCESS_ERR_ZERO:
6796
return ex_handler_uaccess_err_zero(ex, regs);
97+
case EX_TYPE_LOAD_UNALIGNED_ZEROPAD:
98+
return ex_handler_load_unaligned_zeropad(ex, regs);
6899
}
69100

70101
BUG();

0 commit comments

Comments
 (0)