Skip to content

Commit 79eb42b

Browse files
committed
Merge branch 'for-next/fpsimd' into for-next/core
* for-next/fpsimd: arm64: fpsimd: Implement lazy restore for kernel mode FPSIMD arm64: fpsimd: Preserve/restore kernel mode NEON at context switch arm64: fpsimd: Drop unneeded 'busy' flag
2 parents e90a8a2 + 2632e25 commit 79eb42b

File tree

4 files changed

+111
-69
lines changed

4 files changed

+111
-69
lines changed

arch/arm64/include/asm/processor.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,9 @@ struct thread_struct {
167167
unsigned long fault_address; /* fault info */
168168
unsigned long fault_code; /* ESR_EL1 value */
169169
struct debug_info debug; /* debugging */
170+
171+
struct user_fpsimd_state kernel_fpsimd_state;
172+
unsigned int kernel_fpsimd_cpu;
170173
#ifdef CONFIG_ARM64_PTR_AUTH
171174
struct ptrauth_keys_user keys_user;
172175
#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL

arch/arm64/include/asm/simd.h

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
#include <linux/preempt.h>
1313
#include <linux/types.h>
1414

15-
DECLARE_PER_CPU(bool, fpsimd_context_busy);
16-
1715
#ifdef CONFIG_KERNEL_MODE_NEON
1816

1917
/*
@@ -28,17 +26,10 @@ static __must_check inline bool may_use_simd(void)
2826
/*
2927
* We must make sure that the SVE has been initialized properly
3028
* before using the SIMD in kernel.
31-
* fpsimd_context_busy is only set while preemption is disabled,
32-
* and is clear whenever preemption is enabled. Since
33-
* this_cpu_read() is atomic w.r.t. preemption, fpsimd_context_busy
34-
* cannot change under our feet -- if it's set we cannot be
35-
* migrated, and if it's clear we cannot be migrated to a CPU
36-
* where it is set.
3729
*/
3830
return !WARN_ON(!system_capabilities_finalized()) &&
3931
system_supports_fpsimd() &&
40-
!in_hardirq() && !irqs_disabled() && !in_nmi() &&
41-
!this_cpu_read(fpsimd_context_busy);
32+
!in_hardirq() && !irqs_disabled() && !in_nmi();
4233
}
4334

4435
#else /* ! CONFIG_KERNEL_MODE_NEON */

arch/arm64/include/asm/thread_info.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ void arch_setup_new_exec(void);
8080
#define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */
8181
#define TIF_SME 27 /* SME in use */
8282
#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */
83+
#define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */
8384

8485
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
8586
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)

arch/arm64/kernel/fpsimd.c

Lines changed: 106 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -85,13 +85,13 @@
8585
* softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and
8686
* flag the register state as invalid.
8787
*
88-
* In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
89-
* save the task's FPSIMD context back to task_struct from softirq context.
90-
* To prevent this from racing with the manipulation of the task's FPSIMD state
91-
* from task context and thereby corrupting the state, it is necessary to
92-
* protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
93-
* flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to
94-
* run but prevent them to use FPSIMD.
88+
* In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may be
89+
* called from softirq context, which will save the task's FPSIMD context back
90+
* to task_struct. To prevent this from racing with the manipulation of the
91+
* task's FPSIMD state from task context and thereby corrupting the state, it
92+
* is necessary to protect any manipulation of a task's fpsimd_state or
93+
* TIF_FOREIGN_FPSTATE flag with get_cpu_fpsimd_context(), which will suspend
94+
* softirq servicing entirely until put_cpu_fpsimd_context() is called.
9595
*
9696
* For a certain task, the sequence may look something like this:
9797
* - the task gets scheduled in; if both the task's fpsimd_cpu field
@@ -209,27 +209,14 @@ static inline void sme_free(struct task_struct *t) { }
209209

210210
#endif
211211

212-
DEFINE_PER_CPU(bool, fpsimd_context_busy);
213-
EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy);
214-
215212
static void fpsimd_bind_task_to_cpu(void);
216213

217-
static void __get_cpu_fpsimd_context(void)
218-
{
219-
bool busy = __this_cpu_xchg(fpsimd_context_busy, true);
220-
221-
WARN_ON(busy);
222-
}
223-
224214
/*
225215
* Claim ownership of the CPU FPSIMD context for use by the calling context.
226216
*
227217
* The caller may freely manipulate the FPSIMD context metadata until
228218
* put_cpu_fpsimd_context() is called.
229219
*
230-
* The double-underscore version must only be called if you know the task
231-
* can't be preempted.
232-
*
233220
* On RT kernels local_bh_disable() is not sufficient because it only
234221
* serializes soft interrupt related sections via a local lock, but stays
235222
* preemptible. Disabling preemption is the right choice here as bottom
@@ -242,14 +229,6 @@ static void get_cpu_fpsimd_context(void)
242229
local_bh_disable();
243230
else
244231
preempt_disable();
245-
__get_cpu_fpsimd_context();
246-
}
247-
248-
static void __put_cpu_fpsimd_context(void)
249-
{
250-
bool busy = __this_cpu_xchg(fpsimd_context_busy, false);
251-
252-
WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */
253232
}
254233

255234
/*
@@ -261,18 +240,12 @@ static void __put_cpu_fpsimd_context(void)
261240
*/
262241
static void put_cpu_fpsimd_context(void)
263242
{
264-
__put_cpu_fpsimd_context();
265243
if (!IS_ENABLED(CONFIG_PREEMPT_RT))
266244
local_bh_enable();
267245
else
268246
preempt_enable();
269247
}
270248

271-
static bool have_cpu_fpsimd_context(void)
272-
{
273-
return !preemptible() && __this_cpu_read(fpsimd_context_busy);
274-
}
275-
276249
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type)
277250
{
278251
return task->thread.vl[type];
@@ -383,7 +356,8 @@ static void task_fpsimd_load(void)
383356
bool restore_ffr;
384357

385358
WARN_ON(!system_supports_fpsimd());
386-
WARN_ON(!have_cpu_fpsimd_context());
359+
WARN_ON(preemptible());
360+
WARN_ON(test_thread_flag(TIF_KERNEL_FPSTATE));
387361

388362
if (system_supports_sve() || system_supports_sme()) {
389363
switch (current->thread.fp_type) {
@@ -406,7 +380,7 @@ static void task_fpsimd_load(void)
406380
default:
407381
/*
408382
* This indicates either a bug in
409-
* fpsimd_save() or memory corruption, we
383+
* fpsimd_save_user_state() or memory corruption, we
410384
* should always record an explicit format
411385
* when we save. We always at least have the
412386
* memory allocated for FPSMID registers so
@@ -457,7 +431,7 @@ static void task_fpsimd_load(void)
457431
* than via current, if we are saving KVM state then it will have
458432
* ensured that the type of registers to save is set in last->to_save.
459433
*/
460-
static void fpsimd_save(void)
434+
static void fpsimd_save_user_state(void)
461435
{
462436
struct cpu_fp_state const *last =
463437
this_cpu_ptr(&fpsimd_last_state);
@@ -467,7 +441,7 @@ static void fpsimd_save(void)
467441
unsigned int vl;
468442

469443
WARN_ON(!system_supports_fpsimd());
470-
WARN_ON(!have_cpu_fpsimd_context());
444+
WARN_ON(preemptible());
471445

472446
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
473447
return;
@@ -888,7 +862,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
888862
if (task == current) {
889863
get_cpu_fpsimd_context();
890864

891-
fpsimd_save();
865+
fpsimd_save_user_state();
892866
}
893867

894868
fpsimd_flush_task_state(task);
@@ -1500,31 +1474,66 @@ void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs)
15001474
current);
15011475
}
15021476

1477+
static void fpsimd_load_kernel_state(struct task_struct *task)
1478+
{
1479+
struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
1480+
1481+
/*
1482+
* Elide the load if this CPU holds the most recent kernel mode
1483+
* FPSIMD context of the current task.
1484+
*/
1485+
if (last->st == &task->thread.kernel_fpsimd_state &&
1486+
task->thread.kernel_fpsimd_cpu == smp_processor_id())
1487+
return;
1488+
1489+
fpsimd_load_state(&task->thread.kernel_fpsimd_state);
1490+
}
1491+
1492+
static void fpsimd_save_kernel_state(struct task_struct *task)
1493+
{
1494+
struct cpu_fp_state cpu_fp_state = {
1495+
.st = &task->thread.kernel_fpsimd_state,
1496+
.to_save = FP_STATE_FPSIMD,
1497+
};
1498+
1499+
fpsimd_save_state(&task->thread.kernel_fpsimd_state);
1500+
fpsimd_bind_state_to_cpu(&cpu_fp_state);
1501+
1502+
task->thread.kernel_fpsimd_cpu = smp_processor_id();
1503+
}
1504+
15031505
void fpsimd_thread_switch(struct task_struct *next)
15041506
{
15051507
bool wrong_task, wrong_cpu;
15061508

15071509
if (!system_supports_fpsimd())
15081510
return;
15091511

1510-
__get_cpu_fpsimd_context();
1512+
WARN_ON_ONCE(!irqs_disabled());
15111513

15121514
/* Save unsaved fpsimd state, if any: */
1513-
fpsimd_save();
1514-
1515-
/*
1516-
* Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
1517-
* state. For kernel threads, FPSIMD registers are never loaded
1518-
* and wrong_task and wrong_cpu will always be true.
1519-
*/
1520-
wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
1521-
&next->thread.uw.fpsimd_state;
1522-
wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
1515+
if (test_thread_flag(TIF_KERNEL_FPSTATE))
1516+
fpsimd_save_kernel_state(current);
1517+
else
1518+
fpsimd_save_user_state();
15231519

1524-
update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
1525-
wrong_task || wrong_cpu);
1520+
if (test_tsk_thread_flag(next, TIF_KERNEL_FPSTATE)) {
1521+
fpsimd_load_kernel_state(next);
1522+
set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
1523+
} else {
1524+
/*
1525+
* Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
1526+
* state. For kernel threads, FPSIMD registers are never
1527+
* loaded with user mode FPSIMD state and so wrong_task and
1528+
* wrong_cpu will always be true.
1529+
*/
1530+
wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
1531+
&next->thread.uw.fpsimd_state;
1532+
wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
15261533

1527-
__put_cpu_fpsimd_context();
1534+
update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
1535+
wrong_task || wrong_cpu);
1536+
}
15281537
}
15291538

15301539
static void fpsimd_flush_thread_vl(enum vec_type type)
@@ -1614,7 +1623,7 @@ void fpsimd_preserve_current_state(void)
16141623
return;
16151624

16161625
get_cpu_fpsimd_context();
1617-
fpsimd_save();
1626+
fpsimd_save_user_state();
16181627
put_cpu_fpsimd_context();
16191628
}
16201629

@@ -1826,13 +1835,15 @@ static void fpsimd_flush_cpu_state(void)
18261835
*/
18271836
void fpsimd_save_and_flush_cpu_state(void)
18281837
{
1838+
unsigned long flags;
1839+
18291840
if (!system_supports_fpsimd())
18301841
return;
18311842
WARN_ON(preemptible());
1832-
__get_cpu_fpsimd_context();
1833-
fpsimd_save();
1843+
local_irq_save(flags);
1844+
fpsimd_save_user_state();
18341845
fpsimd_flush_cpu_state();
1835-
__put_cpu_fpsimd_context();
1846+
local_irq_restore(flags);
18361847
}
18371848

18381849
#ifdef CONFIG_KERNEL_MODE_NEON
@@ -1864,10 +1875,37 @@ void kernel_neon_begin(void)
18641875
get_cpu_fpsimd_context();
18651876

18661877
/* Save unsaved fpsimd state, if any: */
1867-
fpsimd_save();
1878+
if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
1879+
BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
1880+
fpsimd_save_kernel_state(current);
1881+
} else {
1882+
fpsimd_save_user_state();
1883+
1884+
/*
1885+
* Set the thread flag so that the kernel mode FPSIMD state
1886+
* will be context switched along with the rest of the task
1887+
* state.
1888+
*
1889+
* On non-PREEMPT_RT, softirqs may interrupt task level kernel
1890+
* mode FPSIMD, but the task will not be preemptible so setting
1891+
* TIF_KERNEL_FPSTATE for those would be both wrong (as it
1892+
* would mark the task context FPSIMD state as requiring a
1893+
* context switch) and unnecessary.
1894+
*
1895+
* On PREEMPT_RT, softirqs are serviced from a separate thread,
1896+
* which is scheduled as usual, and this guarantees that these
1897+
* softirqs are not interrupting use of the FPSIMD in kernel
1898+
* mode in task context. So in this case, setting the flag here
1899+
* is always appropriate.
1900+
*/
1901+
if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
1902+
set_thread_flag(TIF_KERNEL_FPSTATE);
1903+
}
18681904

18691905
/* Invalidate any task state remaining in the fpsimd regs: */
18701906
fpsimd_flush_cpu_state();
1907+
1908+
put_cpu_fpsimd_context();
18711909
}
18721910
EXPORT_SYMBOL_GPL(kernel_neon_begin);
18731911

@@ -1885,7 +1923,16 @@ void kernel_neon_end(void)
18851923
if (!system_supports_fpsimd())
18861924
return;
18871925

1888-
put_cpu_fpsimd_context();
1926+
/*
1927+
* If we are returning from a nested use of kernel mode FPSIMD, restore
1928+
* the task context kernel mode FPSIMD state. This can only happen when
1929+
* running in softirq context on non-PREEMPT_RT.
1930+
*/
1931+
if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
1932+
test_thread_flag(TIF_KERNEL_FPSTATE))
1933+
fpsimd_load_kernel_state(current);
1934+
else
1935+
clear_thread_flag(TIF_KERNEL_FPSTATE);
18891936
}
18901937
EXPORT_SYMBOL_GPL(kernel_neon_end);
18911938

0 commit comments

Comments
 (0)