Skip to content

Commit 4202f62

Browse files
Merge patch series "riscv: Create and document PR_RISCV_SET_ICACHE_FLUSH_CTX prctl"
Charlie Jenkins <[email protected]> says: Improve the performance of icache flushing by creating a new prctl flag PR_RISCV_SET_ICACHE_FLUSH_CTX. The interface is left generic to allow for future expansions such as with the proposed J extension [1]. Documentation is also provided to explain the use case. Patch sent to add PR_RISCV_SET_ICACHE_FLUSH_CTX to man-pages [2]. [1] https://github.com/riscv/riscv-j-extension [2] https://lore.kernel.org/linux-man/[email protected] * b4-shazam-merge: cpumask: Add assign cpu documentation: Document PR_RISCV_SET_ICACHE_FLUSH_CTX prctl riscv: Include riscv_set_icache_flush_ctx prctl riscv: Remove unnecessary irqflags processor.h include Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Palmer Dabbelt <[email protected]>
2 parents 3f45244 + decde1f commit 4202f62

File tree

11 files changed

+286
-9
lines changed

11 files changed

+286
-9
lines changed

Documentation/arch/riscv/cmodx.rst

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
.. SPDX-License-Identifier: GPL-2.0
2+
3+
==============================================================================
4+
Concurrent Modification and Execution of Instructions (CMODX) for RISC-V Linux
5+
==============================================================================
6+
7+
CMODX is a programming technique where a program executes instructions that were
8+
modified by the program itself. Instruction storage and the instruction cache
9+
(icache) are not guaranteed to be synchronized on RISC-V hardware. Therefore, the
10+
program must enforce its own synchronization with the unprivileged fence.i
11+
instruction.
12+
13+
However, the default Linux ABI prohibits the use of fence.i in userspace
14+
applications. At any point the scheduler may migrate a task onto a new hart. If
15+
migration occurs after the userspace synchronized the icache and instruction
16+
storage with fence.i, the icache on the new hart will no longer be clean. This
17+
is due to the behavior of fence.i only affecting the hart that it is called on.
18+
Thus, the hart that the task has been migrated to may not have synchronized
19+
instruction storage and icache.
20+
21+
There are two ways to solve this problem: use the riscv_flush_icache() syscall,
22+
or use the ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` prctl() and emit fence.i in
23+
userspace. The syscall performs a one-off icache flushing operation. The prctl
24+
changes the Linux ABI to allow userspace to emit icache flushing operations.
25+
26+
As an aside, "deferred" icache flushes can sometimes be triggered in the kernel.
27+
At the time of writing, this only occurs during the riscv_flush_icache() syscall
28+
and when the kernel uses copy_to_user_page(). These deferred flushes happen only
29+
when the memory map being used by a hart changes. If the prctl() context caused
30+
an icache flush, this deferred icache flush will be skipped as it is redundant.
31+
Therefore, there will be no additional flush when using the riscv_flush_icache()
32+
syscall inside of the prctl() context.
33+
34+
prctl() Interface
35+
---------------------
36+
37+
Call prctl() with ``PR_RISCV_SET_ICACHE_FLUSH_CTX`` as the first argument. The
38+
remaining arguments will be delegated to the riscv_set_icache_flush_ctx
39+
function detailed below.
40+
41+
.. kernel-doc:: arch/riscv/mm/cacheflush.c
42+
:identifiers: riscv_set_icache_flush_ctx
43+
44+
Example usage:
45+
46+
The following files are meant to be compiled and linked with each other. The
47+
modify_instruction() function replaces an add with 0 with an add with one,
48+
causing the instruction sequence in get_value() to change from returning a zero
49+
to returning a one.
50+
51+
cmodx.c::
52+
53+
#include <stdio.h>
54+
#include <sys/prctl.h>
55+
56+
extern int get_value();
57+
extern void modify_instruction();
58+
59+
int main()
60+
{
61+
int value = get_value();
62+
printf("Value before cmodx: %d\n", value);
63+
64+
// Call prctl before first fence.i is called inside modify_instruction
65+
prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_ON, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS);
66+
modify_instruction();
67+
// Call prctl after final fence.i is called in process
68+
prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX_OFF, PR_RISCV_CTX_SW_FENCEI, PR_RISCV_SCOPE_PER_PROCESS);
69+
70+
value = get_value();
71+
printf("Value after cmodx: %d\n", value);
72+
return 0;
73+
}
74+
75+
cmodx.S::
76+
77+
.option norvc
78+
79+
.text
80+
.global modify_instruction
81+
modify_instruction:
82+
lw a0, new_insn
83+
lui a5,%hi(old_insn)
84+
sw a0,%lo(old_insn)(a5)
85+
fence.i
86+
ret
87+
88+
.section modifiable, "awx"
89+
.global get_value
90+
get_value:
91+
li a0, 0
92+
old_insn:
93+
addi a0, a0, 0
94+
ret
95+
96+
.data
97+
new_insn:
98+
addi a0, a0, 1

Documentation/arch/riscv/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ RISC-V architecture
1313
patch-acceptance
1414
uabi
1515
vector
16+
cmodx
1617

1718
features
1819

arch/riscv/include/asm/irqflags.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#ifndef _ASM_RISCV_IRQFLAGS_H
88
#define _ASM_RISCV_IRQFLAGS_H
99

10-
#include <asm/processor.h>
1110
#include <asm/csr.h>
1211

1312
/* read interrupt enabled status */

arch/riscv/include/asm/mmu.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ typedef struct {
1919
#ifdef CONFIG_SMP
2020
/* A local icache flush is needed before user execution can resume. */
2121
cpumask_t icache_stale_mask;
22+
/* Force local icache flush on all migrations. */
23+
bool force_icache_flush;
2224
#endif
2325
#ifdef CONFIG_BINFMT_ELF_FDPIC
2426
unsigned long exec_fdpic_loadmap;

arch/riscv/include/asm/processor.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
#endif
6969

7070
#ifndef __ASSEMBLY__
71+
#include <linux/cpumask.h>
7172

7273
struct task_struct;
7374
struct pt_regs;
@@ -122,6 +123,12 @@ struct thread_struct {
122123
struct __riscv_v_ext_state vstate;
123124
unsigned long align_ctl;
124125
struct __riscv_v_ext_state kernel_vstate;
126+
#ifdef CONFIG_SMP
127+
/* Flush the icache on migration */
128+
bool force_icache_flush;
129+
/* A forced icache flush is not needed if migrating to the previous cpu. */
130+
unsigned int prev_cpu;
131+
#endif
125132
};
126133

127134
/* Whitelist the fstate from the task_struct for hardened usercopy */
@@ -183,6 +190,9 @@ extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
183190
#define GET_UNALIGN_CTL(tsk, addr) get_unalign_ctl((tsk), (addr))
184191
#define SET_UNALIGN_CTL(tsk, val) set_unalign_ctl((tsk), (val))
185192

193+
#define RISCV_SET_ICACHE_FLUSH_CTX(arg1, arg2) riscv_set_icache_flush_ctx(arg1, arg2)
194+
extern int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long per_thread);
195+
186196
#endif /* __ASSEMBLY__ */
187197

188198
#endif /* _ASM_RISCV_PROCESSOR_H */

arch/riscv/include/asm/switch_to.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include <linux/jump_label.h>
1010
#include <linux/sched/task_stack.h>
11+
#include <linux/mm_types.h>
1112
#include <asm/vector.h>
1213
#include <asm/cpufeature.h>
1314
#include <asm/processor.h>
@@ -72,14 +73,36 @@ static __always_inline bool has_fpu(void) { return false; }
7273
extern struct task_struct *__switch_to(struct task_struct *,
7374
struct task_struct *);
7475

76+
static inline bool switch_to_should_flush_icache(struct task_struct *task)
77+
{
78+
#ifdef CONFIG_SMP
79+
bool stale_mm = task->mm && task->mm->context.force_icache_flush;
80+
bool stale_thread = task->thread.force_icache_flush;
81+
bool thread_migrated = smp_processor_id() != task->thread.prev_cpu;
82+
83+
return thread_migrated && (stale_mm || stale_thread);
84+
#else
85+
return false;
86+
#endif
87+
}
88+
89+
#ifdef CONFIG_SMP
90+
#define __set_prev_cpu(thread) ((thread).prev_cpu = smp_processor_id())
91+
#else
92+
#define __set_prev_cpu(thread)
93+
#endif
94+
7595
#define switch_to(prev, next, last) \
7696
do { \
7797
struct task_struct *__prev = (prev); \
7898
struct task_struct *__next = (next); \
99+
__set_prev_cpu(__prev->thread); \
79100
if (has_fpu()) \
80101
__switch_to_fpu(__prev, __next); \
81102
if (has_vector()) \
82103
__switch_to_vector(__prev, __next); \
104+
if (switch_to_should_flush_icache(__next)) \
105+
local_flush_icache_all(); \
83106
((last) = __switch_to(__prev, __next)); \
84107
} while (0)
85108

arch/riscv/mm/cacheflush.c

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
#include <linux/acpi.h>
77
#include <linux/of.h>
8+
#include <linux/prctl.h>
89
#include <asm/acpi.h>
910
#include <asm/cacheflush.h>
1011

@@ -152,3 +153,115 @@ void __init riscv_init_cbo_blocksizes(void)
152153
if (cboz_block_size)
153154
riscv_cboz_block_size = cboz_block_size;
154155
}
156+
157+
#ifdef CONFIG_SMP
158+
static void set_icache_stale_mask(void)
159+
{
160+
cpumask_t *mask;
161+
bool stale_cpu;
162+
163+
/*
164+
* Mark every other hart's icache as needing a flush for
165+
* this MM. Maintain the previous value of the current
166+
* cpu to handle the case when this function is called
167+
* concurrently on different harts.
168+
*/
169+
mask = &current->mm->context.icache_stale_mask;
170+
stale_cpu = cpumask_test_cpu(smp_processor_id(), mask);
171+
172+
cpumask_setall(mask);
173+
cpumask_assign_cpu(smp_processor_id(), mask, stale_cpu);
174+
}
175+
#endif
176+
177+
/**
178+
* riscv_set_icache_flush_ctx() - Enable/disable icache flushing instructions in
179+
* userspace.
180+
* @ctx: Set the type of icache flushing instructions permitted/prohibited in
181+
* userspace. Supported values described below.
182+
*
183+
* Supported values for ctx:
184+
*
185+
* * %PR_RISCV_CTX_SW_FENCEI_ON: Allow fence.i in user space.
186+
*
187+
* * %PR_RISCV_CTX_SW_FENCEI_OFF: Disallow fence.i in user space. All threads in
188+
* a process will be affected when ``scope == PR_RISCV_SCOPE_PER_PROCESS``.
189+
* Therefore, caution must be taken; use this flag only when you can guarantee
190+
* that no thread in the process will emit fence.i from this point onward.
191+
*
192+
* @scope: Set scope of where icache flushing instructions are allowed to be
193+
* emitted. Supported values described below.
194+
*
195+
* Supported values for scope:
196+
*
197+
* * %PR_RISCV_SCOPE_PER_PROCESS: Ensure the icache of any thread in this process
198+
* is coherent with instruction storage upon
199+
* migration.
200+
*
201+
* * %PR_RISCV_SCOPE_PER_THREAD: Ensure the icache of the current thread is
202+
* coherent with instruction storage upon
203+
* migration.
204+
*
205+
* When ``scope == PR_RISCV_SCOPE_PER_PROCESS``, all threads in the process are
206+
* permitted to emit icache flushing instructions. Whenever any thread in the
207+
* process is migrated, the corresponding hart's icache will be guaranteed to be
208+
* consistent with instruction storage. This does not enforce any guarantees
209+
* outside of migration. If a thread modifies an instruction that another thread
210+
* may attempt to execute, the other thread must still emit an icache flushing
211+
* instruction before attempting to execute the potentially modified
212+
* instruction. This must be performed by the user-space program.
213+
*
214+
* In per-thread context (eg. ``scope == PR_RISCV_SCOPE_PER_THREAD``) only the
215+
* thread calling this function is permitted to emit icache flushing
216+
* instructions. When the thread is migrated, the corresponding hart's icache
217+
* will be guaranteed to be consistent with instruction storage.
218+
*
219+
* On kernels configured without SMP, this function is a nop as migrations
220+
* across harts will not occur.
221+
*/
222+
int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long scope)
223+
{
224+
#ifdef CONFIG_SMP
225+
switch (ctx) {
226+
case PR_RISCV_CTX_SW_FENCEI_ON:
227+
switch (scope) {
228+
case PR_RISCV_SCOPE_PER_PROCESS:
229+
current->mm->context.force_icache_flush = true;
230+
break;
231+
case PR_RISCV_SCOPE_PER_THREAD:
232+
current->thread.force_icache_flush = true;
233+
break;
234+
default:
235+
return -EINVAL;
236+
}
237+
break;
238+
case PR_RISCV_CTX_SW_FENCEI_OFF:
239+
switch (scope) {
240+
case PR_RISCV_SCOPE_PER_PROCESS:
241+
current->mm->context.force_icache_flush = false;
242+
243+
set_icache_stale_mask();
244+
break;
245+
case PR_RISCV_SCOPE_PER_THREAD:
246+
current->thread.force_icache_flush = false;
247+
248+
set_icache_stale_mask();
249+
break;
250+
default:
251+
return -EINVAL;
252+
}
253+
break;
254+
default:
255+
return -EINVAL;
256+
}
257+
return 0;
258+
#else
259+
switch (ctx) {
260+
case PR_RISCV_CTX_SW_FENCEI_ON:
261+
case PR_RISCV_CTX_SW_FENCEI_OFF:
262+
return 0;
263+
default:
264+
return -EINVAL;
265+
}
266+
#endif
267+
}

arch/riscv/mm/context.c

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <asm/tlbflush.h>
1616
#include <asm/cacheflush.h>
1717
#include <asm/mmu_context.h>
18+
#include <asm/switch_to.h>
1819

1920
#ifdef CONFIG_MMU
2021

@@ -297,21 +298,23 @@ static inline void set_mm(struct mm_struct *prev,
297298
*
298299
* The "cpu" argument must be the current local CPU number.
299300
*/
300-
static inline void flush_icache_deferred(struct mm_struct *mm, unsigned int cpu)
301+
static inline void flush_icache_deferred(struct mm_struct *mm, unsigned int cpu,
302+
struct task_struct *task)
301303
{
302304
#ifdef CONFIG_SMP
303-
cpumask_t *mask = &mm->context.icache_stale_mask;
304-
305-
if (cpumask_test_cpu(cpu, mask)) {
306-
cpumask_clear_cpu(cpu, mask);
305+
if (cpumask_test_and_clear_cpu(cpu, &mm->context.icache_stale_mask)) {
307306
/*
308307
* Ensure the remote hart's writes are visible to this hart.
309308
* This pairs with a barrier in flush_icache_mm.
310309
*/
311310
smp_mb();
312-
local_flush_icache_all();
313-
}
314311

312+
/*
313+
* If cache will be flushed in switch_to, no need to flush here.
314+
*/
315+
if (!(task && switch_to_should_flush_icache(task)))
316+
local_flush_icache_all();
317+
}
315318
#endif
316319
}
317320

@@ -334,5 +337,5 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
334337

335338
set_mm(prev, next, cpu);
336339

337-
flush_icache_deferred(next, cpu);
340+
flush_icache_deferred(next, cpu, task);
338341
}

include/linux/cpumask.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,22 @@ static __always_inline void __cpumask_clear_cpu(int cpu, struct cpumask *dstp)
493493
__clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
494494
}
495495

496+
/**
497+
* cpumask_assign_cpu - assign a cpu in a cpumask
498+
* @cpu: cpu number (< nr_cpu_ids)
499+
* @dstp: the cpumask pointer
500+
* @bool: the value to assign
501+
*/
502+
static __always_inline void cpumask_assign_cpu(int cpu, struct cpumask *dstp, bool value)
503+
{
504+
assign_bit(cpumask_check(cpu), cpumask_bits(dstp), value);
505+
}
506+
507+
static __always_inline void __cpumask_assign_cpu(int cpu, struct cpumask *dstp, bool value)
508+
{
509+
__assign_bit(cpumask_check(cpu), cpumask_bits(dstp), value);
510+
}
511+
496512
/**
497513
* cpumask_test_cpu - test for a cpu in a cpumask
498514
* @cpu: cpu number (< nr_cpu_ids)

0 commit comments

Comments
 (0)