Skip to content

Commit b5f06f6

Browse files
balbir-awsKAGA-KOKO
authored andcommitted
x86/mm: Prepare for opt-in based L1D flush in switch_mm()
The goal of this is to allow tasks that want to protect sensitive information, against e.g. the recently found snoop assisted data sampling vulnerabilites, to flush their L1D on being switched out. This protects their data from being snooped or leaked via side channels after the task has context switched out. This could also be used to wipe L1D when an untrusted task is switched in, but that's not a really well defined scenario while the opt-in variant is clearly defined. The mechanism is default disabled and can be enabled on the kernel command line. Prepare for the actual prctl based opt-in: 1) Provide the necessary setup functionality similar to the other mitigations and enable the static branch when the command line option is set and the CPU provides support for hardware assisted L1D flushing. Software based L1D flush is not supported because it's CPU model specific and not really well defined. This does not come with a sysfs file like the other mitigations because it is not bound to any specific vulnerability. Support has to be queried via the prctl(2) interface. 2) Add TIF_SPEC_L1D_FLUSH next to L1D_SPEC_IB so the two bits can be mangled into the mm pointer in one go which allows to reuse the existing mechanism in switch_mm() for the conditional IBPB speculation barrier efficiently. 3) Add the L1D flush specific functionality which flushes L1D when the outgoing task opted in. Also check whether the incoming task has requested L1D flush and if so validate that it is not accidentaly running on an SMT sibling as this makes the whole excercise moot because SMT siblings share L1D which opens tons of other attack vectors. If that happens schedule task work which signals the incoming task on return to user/guest with SIGBUS as this is part of the paranoid L1D flush contract. Suggested-by: Thomas Gleixner <[email protected]> Signed-off-by: Balbir Singh <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 8aacd1e commit b5f06f6

File tree

5 files changed

+98
-2
lines changed

5 files changed

+98
-2
lines changed

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ config X86
119119
select ARCH_WANT_HUGE_PMD_SHARE
120120
select ARCH_WANT_LD_ORPHAN_WARN
121121
select ARCH_WANTS_THP_SWAP if X86_64
122+
select ARCH_HAS_PARANOID_L1D_FLUSH
122123
select BUILDTIME_TABLE_SORT
123124
select CLKEVT_I8253
124125
select CLOCKSOURCE_VALIDATE_LAST_CYCLE

arch/x86/include/asm/nospec-branch.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
252252
DECLARE_STATIC_KEY_FALSE(mds_user_clear);
253253
DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
254254

255+
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
256+
255257
#include <asm/segment.h>
256258

257259
/**

arch/x86/include/asm/thread_info.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ struct thread_info {
8181
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
8282
#define TIF_SSBD 5 /* Speculative store bypass disable */
8383
#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
84+
#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */
8485
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
8586
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
8687
#define TIF_PATCH_PENDING 13 /* pending live patching update */
@@ -104,6 +105,7 @@ struct thread_info {
104105
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
105106
#define _TIF_SSBD (1 << TIF_SSBD)
106107
#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
108+
#define _TIF_SPEC_L1D_FLUSH (1 << TIF_SPEC_L1D_FLUSH)
107109
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
108110
#define _TIF_UPROBE (1 << TIF_UPROBE)
109111
#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)

arch/x86/kernel/cpu/bugs.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ static void __init mds_select_mitigation(void);
4343
static void __init mds_print_mitigation(void);
4444
static void __init taa_select_mitigation(void);
4545
static void __init srbds_select_mitigation(void);
46+
static void __init l1d_flush_select_mitigation(void);
4647

4748
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
4849
u64 x86_spec_ctrl_base;
@@ -76,6 +77,13 @@ EXPORT_SYMBOL_GPL(mds_user_clear);
7677
DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
7778
EXPORT_SYMBOL_GPL(mds_idle_clear);
7879

80+
/*
81+
* Controls whether l1d flush based mitigations are enabled,
82+
* based on hw features and admin setting via boot parameter
83+
* defaults to false
84+
*/
85+
DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
86+
7987
void __init check_bugs(void)
8088
{
8189
identify_boot_cpu();
@@ -111,6 +119,7 @@ void __init check_bugs(void)
111119
mds_select_mitigation();
112120
taa_select_mitigation();
113121
srbds_select_mitigation();
122+
l1d_flush_select_mitigation();
114123

115124
/*
116125
* As MDS and TAA mitigations are inter-related, print MDS
@@ -491,6 +500,34 @@ static int __init srbds_parse_cmdline(char *str)
491500
}
492501
early_param("srbds", srbds_parse_cmdline);
493502

503+
#undef pr_fmt
504+
#define pr_fmt(fmt) "L1D Flush : " fmt
505+
506+
enum l1d_flush_mitigations {
507+
L1D_FLUSH_OFF = 0,
508+
L1D_FLUSH_ON,
509+
};
510+
511+
static enum l1d_flush_mitigations l1d_flush_mitigation __initdata = L1D_FLUSH_OFF;
512+
513+
static void __init l1d_flush_select_mitigation(void)
514+
{
515+
if (!l1d_flush_mitigation || !boot_cpu_has(X86_FEATURE_FLUSH_L1D))
516+
return;
517+
518+
static_branch_enable(&switch_mm_cond_l1d_flush);
519+
pr_info("Conditional flush on switch_mm() enabled\n");
520+
}
521+
522+
static int __init l1d_flush_parse_cmdline(char *str)
523+
{
524+
if (!strcmp(str, "on"))
525+
l1d_flush_mitigation = L1D_FLUSH_ON;
526+
527+
return 0;
528+
}
529+
early_param("l1d_flush", l1d_flush_parse_cmdline);
530+
494531
#undef pr_fmt
495532
#define pr_fmt(fmt) "Spectre V1 : " fmt
496533

arch/x86/mm/tlb.c

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@
88
#include <linux/export.h>
99
#include <linux/cpu.h>
1010
#include <linux/debugfs.h>
11+
#include <linux/sched/smt.h>
1112

1213
#include <asm/tlbflush.h>
1314
#include <asm/mmu_context.h>
1415
#include <asm/nospec-branch.h>
1516
#include <asm/cache.h>
17+
#include <asm/cacheflush.h>
1618
#include <asm/apic.h>
1719
#include <asm/perf_event.h>
1820

@@ -43,11 +45,12 @@
4345
*/
4446

4547
/*
46-
* Bits to mangle the TIF_SPEC_IB state into the mm pointer which is
48+
* Bits to mangle the TIF_SPEC_* state into the mm pointer which is
4749
* stored in cpu_tlb_state.last_user_mm_spec.
4850
*/
4951
#define LAST_USER_MM_IBPB 0x1UL
50-
#define LAST_USER_MM_SPEC_MASK (LAST_USER_MM_IBPB)
52+
#define LAST_USER_MM_L1D_FLUSH 0x2UL
53+
#define LAST_USER_MM_SPEC_MASK (LAST_USER_MM_IBPB | LAST_USER_MM_L1D_FLUSH)
5154

5255
/* Bits to set when tlbstate and flush is (re)initialized */
5356
#define LAST_USER_MM_INIT LAST_USER_MM_IBPB
@@ -321,11 +324,52 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
321324
local_irq_restore(flags);
322325
}
323326

327+
/*
328+
* Invoked from return to user/guest by a task that opted-in to L1D
329+
* flushing but ended up running on an SMT enabled core due to wrong
330+
* affinity settings or CPU hotplug. This is part of the paranoid L1D flush
331+
* contract which this task requested.
332+
*/
333+
static void l1d_flush_force_sigbus(struct callback_head *ch)
334+
{
335+
force_sig(SIGBUS);
336+
}
337+
338+
static void l1d_flush_evaluate(unsigned long prev_mm, unsigned long next_mm,
339+
struct task_struct *next)
340+
{
341+
/* Flush L1D if the outgoing task requests it */
342+
if (prev_mm & LAST_USER_MM_L1D_FLUSH)
343+
wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
344+
345+
/* Check whether the incoming task opted in for L1D flush */
346+
if (likely(!(next_mm & LAST_USER_MM_L1D_FLUSH)))
347+
return;
348+
349+
/*
350+
* Validate that it is not running on an SMT sibling as this would
351+
* make the excercise pointless because the siblings share L1D. If
352+
* it runs on a SMT sibling, notify it with SIGBUS on return to
353+
* user/guest
354+
*/
355+
if (this_cpu_read(cpu_info.smt_active)) {
356+
clear_ti_thread_flag(&next->thread_info, TIF_SPEC_L1D_FLUSH);
357+
next->l1d_flush_kill.func = l1d_flush_force_sigbus;
358+
task_work_add(next, &next->l1d_flush_kill, TWA_RESUME);
359+
}
360+
}
361+
324362
static unsigned long mm_mangle_tif_spec_bits(struct task_struct *next)
325363
{
326364
unsigned long next_tif = task_thread_info(next)->flags;
327365
unsigned long spec_bits = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_SPEC_MASK;
328366

367+
/*
368+
* Ensure that the bit shift above works as expected and the two flags
369+
* end up in bit 0 and 1.
370+
*/
371+
BUILD_BUG_ON(TIF_SPEC_L1D_FLUSH != TIF_SPEC_IB + 1);
372+
329373
return (unsigned long)next->mm | spec_bits;
330374
}
331375

@@ -403,6 +447,16 @@ static void cond_mitigation(struct task_struct *next)
403447
indirect_branch_prediction_barrier();
404448
}
405449

450+
if (static_branch_unlikely(&switch_mm_cond_l1d_flush)) {
451+
/*
452+
* Flush L1D when the outgoing task requested it and/or
453+
* check whether the incoming task requested L1D flushing
454+
* and ended up on an SMT sibling.
455+
*/
456+
if (unlikely((prev_mm | next_mm) & LAST_USER_MM_L1D_FLUSH))
457+
l1d_flush_evaluate(prev_mm, next_mm, next);
458+
}
459+
406460
this_cpu_write(cpu_tlbstate.last_user_mm_spec, next_mm);
407461
}
408462

0 commit comments

Comments
 (0)