Skip to content

Commit 259aaf0

Browse files
Merge patch series "riscv: uaccess: optimisations"
Cyril Bur <[email protected]> says: This series tries to optimize riscv uaccess by allowing the use of user_access_begin() and user_access_end() which permits grouping user accesses and avoiding the CSR write penalty for each access. The error path can also be optimised using asm goto which patches 3 and 4 achieve. This will speed up jumping to labels by avoiding the need of an intermediary error type variable within the uaccess macros I did read the discussion this series generated. It isn't clear to me which direction to take the patches, if any. * b4-shazam-merge: riscv: uaccess: use 'asm_goto_output' for get_user() riscv: uaccess: use 'asm goto' for put_user() riscv: uaccess: use input constraints for ptr of __put_user() riscv: implement user_access_begin() and families riscv: save the SR_SUM status over switches Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Palmer Dabbelt <[email protected]>
2 parents 85f79de + f6bff78 commit 259aaf0

File tree

4 files changed

+179
-53
lines changed

4 files changed

+179
-53
lines changed

arch/riscv/include/asm/processor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ struct thread_struct {
103103
struct __riscv_d_ext_state fstate;
104104
unsigned long bad_cause;
105105
unsigned long envcfg;
106+
unsigned long status;
106107
u32 riscv_v_flags;
107108
u32 vstate_ctrl;
108109
struct __riscv_v_ext_state vstate;

arch/riscv/include/asm/uaccess.h

Lines changed: 165 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,19 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, unsigne
6161
#define __disable_user_access() \
6262
__asm__ __volatile__ ("csrc sstatus, %0" : : "r" (SR_SUM) : "memory")
6363

64+
/*
65+
* This is the smallest unsigned integer type that can fit a value
66+
* (up to 'long long')
67+
*/
68+
#define __inttype(x) __typeof__( \
69+
__typefits(x, char, \
70+
__typefits(x, short, \
71+
__typefits(x, int, \
72+
__typefits(x, long, 0ULL)))))
73+
74+
#define __typefits(x, type, not) \
75+
__builtin_choose_expr(sizeof(x) <= sizeof(type), (unsigned type)0, not)
76+
6477
/*
6578
* The exception table consists of pairs of addresses: the first is the
6679
* address of an instruction that is allowed to fault, and the second is
@@ -83,27 +96,58 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm, unsigne
8396
* call.
8497
*/
8598

86-
#define __get_user_asm(insn, x, ptr, err) \
99+
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
100+
#define __get_user_asm(insn, x, ptr, label) \
101+
asm_goto_output( \
102+
"1:\n" \
103+
" " insn " %0, %1\n" \
104+
_ASM_EXTABLE_UACCESS_ERR(1b, %l2, %0) \
105+
: "=&r" (x) \
106+
: "m" (*(ptr)) : : label)
107+
#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
108+
#define __get_user_asm(insn, x, ptr, label) \
87109
do { \
88-
__typeof__(x) __x; \
110+
long __gua_err = 0; \
89111
__asm__ __volatile__ ( \
90112
"1:\n" \
91113
" " insn " %1, %2\n" \
92114
"2:\n" \
93115
_ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %0, %1) \
94-
: "+r" (err), "=&r" (__x) \
116+
: "+r" (__gua_err), "=&r" (x) \
95117
: "m" (*(ptr))); \
96-
(x) = __x; \
118+
if (__gua_err) \
119+
goto label; \
97120
} while (0)
121+
#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
98122

99123
#ifdef CONFIG_64BIT
100-
#define __get_user_8(x, ptr, err) \
101-
__get_user_asm("ld", x, ptr, err)
124+
#define __get_user_8(x, ptr, label) \
125+
__get_user_asm("ld", x, ptr, label)
102126
#else /* !CONFIG_64BIT */
103-
#define __get_user_8(x, ptr, err) \
127+
128+
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
129+
#define __get_user_8(x, ptr, label) \
130+
u32 __user *__ptr = (u32 __user *)(ptr); \
131+
u32 __lo, __hi; \
132+
asm_goto_output( \
133+
"1:\n" \
134+
" lw %0, %2\n" \
135+
"2:\n" \
136+
" lw %1, %3\n" \
137+
_ASM_EXTABLE_UACCESS_ERR(1b, %l4, %0) \
138+
_ASM_EXTABLE_UACCESS_ERR(2b, %l4, %0) \
139+
: "=&r" (__lo), "=r" (__hi) \
140+
: "m" (__ptr[__LSW]), "m" (__ptr[__MSW]) \
141+
: : label); \
142+
(x) = (__typeof__(x))((__typeof__((x) - (x)))( \
143+
(((u64)__hi << 32) | __lo))); \
144+
145+
#else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
146+
#define __get_user_8(x, ptr, label) \
104147
do { \
105148
u32 __user *__ptr = (u32 __user *)(ptr); \
106149
u32 __lo, __hi; \
150+
long __gu8_err = 0; \
107151
__asm__ __volatile__ ( \
108152
"1:\n" \
109153
" lw %1, %3\n" \
@@ -112,35 +156,51 @@ do { \
112156
"3:\n" \
113157
_ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 3b, %0, %1) \
114158
_ASM_EXTABLE_UACCESS_ERR_ZERO(2b, 3b, %0, %1) \
115-
: "+r" (err), "=&r" (__lo), "=r" (__hi) \
159+
: "+r" (__gu8_err), "=&r" (__lo), "=r" (__hi) \
116160
: "m" (__ptr[__LSW]), "m" (__ptr[__MSW])); \
117-
if (err) \
161+
if (__gu8_err) { \
118162
__hi = 0; \
119-
(x) = (__typeof__(x))((__typeof__((x)-(x)))( \
163+
goto label; \
164+
} \
165+
(x) = (__typeof__(x))((__typeof__((x) - (x)))( \
120166
(((u64)__hi << 32) | __lo))); \
121167
} while (0)
168+
#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
169+
122170
#endif /* CONFIG_64BIT */
123171

124-
#define __get_user_nocheck(x, __gu_ptr, __gu_err) \
172+
#define __get_user_nocheck(x, __gu_ptr, label) \
125173
do { \
126174
switch (sizeof(*__gu_ptr)) { \
127175
case 1: \
128-
__get_user_asm("lb", (x), __gu_ptr, __gu_err); \
176+
__get_user_asm("lb", (x), __gu_ptr, label); \
129177
break; \
130178
case 2: \
131-
__get_user_asm("lh", (x), __gu_ptr, __gu_err); \
179+
__get_user_asm("lh", (x), __gu_ptr, label); \
132180
break; \
133181
case 4: \
134-
__get_user_asm("lw", (x), __gu_ptr, __gu_err); \
182+
__get_user_asm("lw", (x), __gu_ptr, label); \
135183
break; \
136184
case 8: \
137-
__get_user_8((x), __gu_ptr, __gu_err); \
185+
__get_user_8((x), __gu_ptr, label); \
138186
break; \
139187
default: \
140188
BUILD_BUG(); \
141189
} \
142190
} while (0)
143191

192+
#define __get_user_error(x, ptr, err) \
193+
do { \
194+
__label__ __gu_failed; \
195+
\
196+
__get_user_nocheck(x, ptr, __gu_failed); \
197+
err = 0; \
198+
break; \
199+
__gu_failed: \
200+
x = 0; \
201+
err = -EFAULT; \
202+
} while (0)
203+
144204
/**
145205
* __get_user: - Get a simple variable from user space, with less checking.
146206
* @x: Variable to store result.
@@ -165,13 +225,16 @@ do { \
165225
({ \
166226
const __typeof__(*(ptr)) __user *__gu_ptr = untagged_addr(ptr); \
167227
long __gu_err = 0; \
228+
__typeof__(x) __gu_val; \
168229
\
169230
__chk_user_ptr(__gu_ptr); \
170231
\
171232
__enable_user_access(); \
172-
__get_user_nocheck(x, __gu_ptr, __gu_err); \
233+
__get_user_error(__gu_val, __gu_ptr, __gu_err); \
173234
__disable_user_access(); \
174235
\
236+
(x) = __gu_val; \
237+
\
175238
__gu_err; \
176239
})
177240

@@ -201,61 +264,66 @@ do { \
201264
((x) = (__force __typeof__(x))0, -EFAULT); \
202265
})
203266

204-
#define __put_user_asm(insn, x, ptr, err) \
267+
#define __put_user_asm(insn, x, ptr, label) \
205268
do { \
206269
__typeof__(*(ptr)) __x = x; \
207-
__asm__ __volatile__ ( \
270+
asm goto( \
208271
"1:\n" \
209-
" " insn " %z2, %1\n" \
210-
"2:\n" \
211-
_ASM_EXTABLE_UACCESS_ERR(1b, 2b, %0) \
212-
: "+r" (err), "=m" (*(ptr)) \
213-
: "rJ" (__x)); \
272+
" " insn " %z0, %1\n" \
273+
_ASM_EXTABLE(1b, %l2) \
274+
: : "rJ" (__x), "m"(*(ptr)) : : label); \
214275
} while (0)
215276

216277
#ifdef CONFIG_64BIT
217-
#define __put_user_8(x, ptr, err) \
218-
__put_user_asm("sd", x, ptr, err)
278+
#define __put_user_8(x, ptr, label) \
279+
__put_user_asm("sd", x, ptr, label)
219280
#else /* !CONFIG_64BIT */
220-
#define __put_user_8(x, ptr, err) \
281+
#define __put_user_8(x, ptr, label) \
221282
do { \
222283
u32 __user *__ptr = (u32 __user *)(ptr); \
223284
u64 __x = (__typeof__((x)-(x)))(x); \
224-
__asm__ __volatile__ ( \
285+
asm goto( \
225286
"1:\n" \
226-
" sw %z3, %1\n" \
287+
" sw %z0, %2\n" \
227288
"2:\n" \
228-
" sw %z4, %2\n" \
229-
"3:\n" \
230-
_ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0) \
231-
_ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0) \
232-
: "+r" (err), \
233-
"=m" (__ptr[__LSW]), \
234-
"=m" (__ptr[__MSW]) \
235-
: "rJ" (__x), "rJ" (__x >> 32)); \
289+
" sw %z1, %3\n" \
290+
_ASM_EXTABLE(1b, %l4) \
291+
_ASM_EXTABLE(2b, %l4) \
292+
: : "rJ" (__x), "rJ" (__x >> 32), \
293+
"m" (__ptr[__LSW]), \
294+
"m" (__ptr[__MSW]) : : label); \
236295
} while (0)
237296
#endif /* CONFIG_64BIT */
238297

239-
#define __put_user_nocheck(x, __gu_ptr, __pu_err) \
298+
#define __put_user_nocheck(x, __gu_ptr, label) \
240299
do { \
241300
switch (sizeof(*__gu_ptr)) { \
242301
case 1: \
243-
__put_user_asm("sb", (x), __gu_ptr, __pu_err); \
302+
__put_user_asm("sb", (x), __gu_ptr, label); \
244303
break; \
245304
case 2: \
246-
__put_user_asm("sh", (x), __gu_ptr, __pu_err); \
305+
__put_user_asm("sh", (x), __gu_ptr, label); \
247306
break; \
248307
case 4: \
249-
__put_user_asm("sw", (x), __gu_ptr, __pu_err); \
308+
__put_user_asm("sw", (x), __gu_ptr, label); \
250309
break; \
251310
case 8: \
252-
__put_user_8((x), __gu_ptr, __pu_err); \
311+
__put_user_8((x), __gu_ptr, label); \
253312
break; \
254313
default: \
255314
BUILD_BUG(); \
256315
} \
257316
} while (0)
258317

318+
#define __put_user_error(x, ptr, err) \
319+
do { \
320+
__label__ err_label; \
321+
__put_user_nocheck(x, ptr, err_label); \
322+
break; \
323+
err_label: \
324+
(err) = -EFAULT; \
325+
} while (0)
326+
259327
/**
260328
* __put_user: - Write a simple value into user space, with less checking.
261329
* @x: Value to copy to user space.
@@ -286,7 +354,7 @@ do { \
286354
__chk_user_ptr(__gu_ptr); \
287355
\
288356
__enable_user_access(); \
289-
__put_user_nocheck(__val, __gu_ptr, __pu_err); \
357+
__put_user_error(__val, __gu_ptr, __pu_err); \
290358
__disable_user_access(); \
291359
\
292360
__pu_err; \
@@ -351,21 +419,65 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
351419
}
352420

353421
#define __get_kernel_nofault(dst, src, type, err_label) \
422+
__get_user_nocheck(*((type *)(dst)), (type *)(src), err_label)
423+
424+
#define __put_kernel_nofault(dst, src, type, err_label) \
425+
__put_user_nocheck(*((type *)(src)), (type *)(dst), err_label)
426+
427+
static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
428+
{
429+
if (unlikely(!access_ok(ptr, len)))
430+
return 0;
431+
__enable_user_access();
432+
return 1;
433+
}
434+
#define user_access_begin user_access_begin
435+
#define user_access_end __disable_user_access
436+
437+
static inline unsigned long user_access_save(void) { return 0UL; }
438+
static inline void user_access_restore(unsigned long enabled) { }
439+
440+
/*
441+
* We want the unsafe accessors to always be inlined and use
442+
* the error labels - thus the macro games.
443+
*/
444+
#define unsafe_put_user(x, ptr, label) \
445+
__put_user_nocheck(x, (ptr), label)
446+
447+
#define unsafe_get_user(x, ptr, label) do { \
448+
__inttype(*(ptr)) __gu_val; \
449+
__get_user_nocheck(__gu_val, (ptr), label); \
450+
(x) = (__force __typeof__(*(ptr)))__gu_val; \
451+
} while (0)
452+
453+
#define unsafe_copy_loop(dst, src, len, type, op, label) \
454+
while (len >= sizeof(type)) { \
455+
op(*(type *)(src), (type __user *)(dst), label); \
456+
dst += sizeof(type); \
457+
src += sizeof(type); \
458+
len -= sizeof(type); \
459+
}
460+
461+
#define unsafe_copy_to_user(_dst, _src, _len, label) \
354462
do { \
355-
long __kr_err = 0; \
356-
\
357-
__get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \
358-
if (unlikely(__kr_err)) \
359-
goto err_label; \
463+
char __user *__ucu_dst = (_dst); \
464+
const char *__ucu_src = (_src); \
465+
size_t __ucu_len = (_len); \
466+
unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, unsafe_put_user, label); \
467+
unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, unsafe_put_user, label); \
468+
unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, unsafe_put_user, label); \
469+
unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, unsafe_put_user, label); \
360470
} while (0)
361471

362-
#define __put_kernel_nofault(dst, src, type, err_label) \
472+
#define unsafe_copy_from_user(_dst, _src, _len, label) \
363473
do { \
364-
long __kr_err = 0; \
365-
\
366-
__put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \
367-
if (unlikely(__kr_err)) \
368-
goto err_label; \
474+
char *__ucu_dst = (_dst); \
475+
const char __user *__ucu_src = (_src); \
476+
size_t __ucu_len = (_len); \
477+
unsafe_copy_loop(__ucu_src, __ucu_dst, __ucu_len, u64, unsafe_get_user, label); \
478+
unsafe_copy_loop(__ucu_src, __ucu_dst, __ucu_len, u32, unsafe_get_user, label); \
479+
unsafe_copy_loop(__ucu_src, __ucu_dst, __ucu_len, u16, unsafe_get_user, label); \
480+
unsafe_copy_loop(__ucu_src, __ucu_dst, __ucu_len, u8, unsafe_get_user, label); \
369481
} while (0)
370482

371483
#else /* CONFIG_MMU */

arch/riscv/kernel/asm-offsets.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ void asm_offsets(void)
3434
OFFSET(TASK_THREAD_S9, task_struct, thread.s[9]);
3535
OFFSET(TASK_THREAD_S10, task_struct, thread.s[10]);
3636
OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]);
37+
OFFSET(TASK_THREAD_STATUS, task_struct, thread.status);
3738

3839
OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu);
3940
OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
@@ -346,6 +347,10 @@ void asm_offsets(void)
346347
offsetof(struct task_struct, thread.s[11])
347348
- offsetof(struct task_struct, thread.ra)
348349
);
350+
DEFINE(TASK_THREAD_STATUS_RA,
351+
offsetof(struct task_struct, thread.status)
352+
- offsetof(struct task_struct, thread.ra)
353+
);
349354

350355
DEFINE(TASK_THREAD_F0_F0,
351356
offsetof(struct task_struct, thread.fstate.f[0])

arch/riscv/kernel/entry.S

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,9 +397,17 @@ SYM_FUNC_START(__switch_to)
397397
REG_S s9, TASK_THREAD_S9_RA(a3)
398398
REG_S s10, TASK_THREAD_S10_RA(a3)
399399
REG_S s11, TASK_THREAD_S11_RA(a3)
400+
401+
/* save the user space access flag */
402+
li s0, SR_SUM
403+
csrr s1, CSR_STATUS
404+
REG_S s1, TASK_THREAD_STATUS_RA(a3)
405+
400406
/* Save the kernel shadow call stack pointer */
401407
scs_save_current
402408
/* Restore context from next->thread */
409+
REG_L s0, TASK_THREAD_STATUS_RA(a4)
410+
csrs CSR_STATUS, s0
403411
REG_L ra, TASK_THREAD_RA_RA(a4)
404412
REG_L sp, TASK_THREAD_SP_RA(a4)
405413
REG_L s0, TASK_THREAD_S0_RA(a4)

0 commit comments

Comments
 (0)