Skip to content

Commit 500afbf

Browse files
ChangSeokBaesuryasaimadhu
authored andcommitted
x86/fpu/xstate: Add fpstate_realloc()/free()
The fpstate embedded in struct fpu is the default state for storing the FPU registers. It's sized so that the default supported features can be stored. For dynamically enabled features the register buffer is too small. The #NM handler detects first use of a feature which is disabled in the XFD MSR. After handling permission checks it recalculates the size for kernel space and user space state and invokes fpstate_realloc() which tries to reallocate fpstate and install it. Provide the allocator function which checks whether the current buffer size is sufficient and if not allocates one. If allocation is successful the new fpstate is initialized with the new features and sizes and the now enabled features is removed from the task's XFD mask. realloc_fpstate() uses vzalloc(). If use of this mechanism grows to re-allocate buffers larger than 64KB, a more sophisticated allocation scheme that includes purpose-built reclaim capability might be justified. Signed-off-by: Chang S. Bae <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 783e87b commit 500afbf

File tree

3 files changed

+106
-8
lines changed

3 files changed

+106
-8
lines changed

arch/x86/include/asm/fpu/api.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,13 @@ static inline void fpstate_init_soft(struct swregs_state *soft) {}
130130
/* State tracking */
131131
DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
132132

133+
/* Process cleanup */
134+
#ifdef CONFIG_X86_64
135+
extern void fpstate_free(struct fpu *fpu);
136+
#else
137+
static inline void fpstate_free(struct fpu *fpu) { }
138+
#endif
139+
133140
/* fpstate-related functions which are exported to KVM */
134141
extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature);
135142

arch/x86/kernel/fpu/xstate.c

Lines changed: 89 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <linux/pkeys.h>
1313
#include <linux/seq_file.h>
1414
#include <linux/proc_fs.h>
15+
#include <linux/vmalloc.h>
1516

1617
#include <asm/fpu/api.h>
1718
#include <asm/fpu/regset.h>
@@ -22,6 +23,7 @@
2223
#include <asm/prctl.h>
2324
#include <asm/elf.h>
2425

26+
#include "context.h"
2527
#include "internal.h"
2628
#include "legacy.h"
2729
#include "xstate.h"
@@ -1371,6 +1373,91 @@ void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rstor)
13711373
}
13721374
#endif /* CONFIG_X86_DEBUG_FPU */
13731375

1376+
void fpstate_free(struct fpu *fpu)
1377+
{
1378+
if (fpu->fpstate || fpu->fpstate != &fpu->__fpstate)
1379+
vfree(fpu->fpstate);
1380+
}
1381+
1382+
/**
1383+
* fpu_install_fpstate - Update the active fpstate in the FPU
1384+
*
1385+
* @fpu: A struct fpu * pointer
1386+
* @newfps: A struct fpstate * pointer
1387+
*
1388+
* Returns: A null pointer if the last active fpstate is the embedded
1389+
* one or the new fpstate is already installed;
1390+
* otherwise, a pointer to the old fpstate which has to
1391+
* be freed by the caller.
1392+
*/
1393+
static struct fpstate *fpu_install_fpstate(struct fpu *fpu,
1394+
struct fpstate *newfps)
1395+
{
1396+
struct fpstate *oldfps = fpu->fpstate;
1397+
1398+
if (fpu->fpstate == newfps)
1399+
return NULL;
1400+
1401+
fpu->fpstate = newfps;
1402+
return oldfps != &fpu->__fpstate ? oldfps : NULL;
1403+
}
1404+
1405+
/**
1406+
* fpstate_realloc - Reallocate struct fpstate for the requested new features
1407+
*
1408+
* @xfeatures: A bitmap of xstate features which extend the enabled features
1409+
* of that task
1410+
* @ksize: The required size for the kernel buffer
1411+
* @usize: The required size for user space buffers
1412+
*
1413+
* Note vs. vmalloc(): If the task with a vzalloc()-allocated buffer
1414+
* terminates quickly, vfree()-induced IPIs may be a concern, but tasks
1415+
* with large states are likely to live longer.
1416+
*
1417+
* Returns: 0 on success, -ENOMEM on allocation error.
1418+
*/
1419+
static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
1420+
unsigned int usize)
1421+
{
1422+
struct fpu *fpu = &current->thread.fpu;
1423+
struct fpstate *curfps, *newfps = NULL;
1424+
unsigned int fpsize;
1425+
1426+
curfps = fpu->fpstate;
1427+
fpsize = ksize + ALIGN(offsetof(struct fpstate, regs), 64);
1428+
1429+
newfps = vzalloc(fpsize);
1430+
if (!newfps)
1431+
return -ENOMEM;
1432+
newfps->size = ksize;
1433+
newfps->user_size = usize;
1434+
newfps->is_valloc = true;
1435+
1436+
fpregs_lock();
1437+
/*
1438+
* Ensure that the current state is in the registers before
1439+
* swapping fpstate as that might invalidate it due to layout
1440+
* changes.
1441+
*/
1442+
if (test_thread_flag(TIF_NEED_FPU_LOAD))
1443+
fpregs_restore_userregs();
1444+
1445+
newfps->xfeatures = curfps->xfeatures | xfeatures;
1446+
newfps->user_xfeatures = curfps->user_xfeatures | xfeatures;
1447+
newfps->xfd = curfps->xfd & ~xfeatures;
1448+
1449+
curfps = fpu_install_fpstate(fpu, newfps);
1450+
1451+
/* Do the final updates within the locked region */
1452+
xstate_init_xcomp_bv(&newfps->regs.xsave, newfps->xfeatures);
1453+
xfd_update_state(newfps);
1454+
1455+
fpregs_unlock();
1456+
1457+
vfree(curfps);
1458+
return 0;
1459+
}
1460+
13741461
static int validate_sigaltstack(unsigned int usize)
13751462
{
13761463
struct task_struct *thread, *leader = current->group_leader;
@@ -1393,7 +1480,8 @@ static int __xstate_request_perm(u64 permitted, u64 requested)
13931480
/*
13941481
* This deliberately does not exclude !XSAVES as we still might
13951482
* decide to optionally context switch XCR0 or talk the silicon
1396-
* vendors into extending XFD for the pre AMX states.
1483+
* vendors into extending XFD for the pre AMX states, especially
1484+
* AVX512.
13971485
*/
13981486
bool compacted = cpu_feature_enabled(X86_FEATURE_XSAVES);
13991487
struct fpu *fpu = &current->group_leader->thread.fpu;
@@ -1465,13 +1553,6 @@ static int xstate_request_perm(unsigned long idx)
14651553
return ret;
14661554
}
14671555

1468-
/* Place holder for now */
1469-
static int fpstate_realloc(u64 xfeatures, unsigned int ksize,
1470-
unsigned int usize)
1471-
{
1472-
return -ENOMEM;
1473-
}
1474-
14751556
int xfd_enable_feature(u64 xfd_err)
14761557
{
14771558
u64 xfd_event = xfd_err & XFEATURE_MASK_USER_DYNAMIC;

arch/x86/kernel/process.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include <asm/mwait.h>
3333
#include <asm/fpu/api.h>
3434
#include <asm/fpu/sched.h>
35+
#include <asm/fpu/xstate.h>
3536
#include <asm/debugreg.h>
3637
#include <asm/nmi.h>
3738
#include <asm/tlbflush.h>
@@ -90,9 +91,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
9091
#endif
9192
/* Drop the copied pointer to current's fpstate */
9293
dst->thread.fpu.fpstate = NULL;
94+
9395
return 0;
9496
}
9597

98+
#ifdef CONFIG_X86_64
99+
void arch_release_task_struct(struct task_struct *tsk)
100+
{
101+
if (fpu_state_size_dynamic())
102+
fpstate_free(&tsk->thread.fpu);
103+
}
104+
#endif
105+
96106
/*
97107
* Free thread data structures etc..
98108
*/

0 commit comments

Comments
 (0)