Skip to content

Commit 2853d5f

Browse files
committed
Merge tag 'x86-splitlock-2020-03-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 splitlock updates from Thomas Gleixner: "Support for 'split lock' detection: Atomic operations (lock prefixed instructions) which span two cache lines have to acquire the global bus lock. This is at least 1k cycles slower than an atomic operation within a cache line and disrupts performance on other cores. Aside of performance disruption this is a unpriviledged form of DoS. Some newer CPUs have the capability to raise an #AC trap when such an operation is attempted. The detection is by default enabled in warning mode which will warn once when a user space application is caught. A command line option allows to disable the detection or to select fatal mode which will terminate offending applications with SIGBUS" * tag 'x86-splitlock-2020-03-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/split_lock: Avoid runtime reads of the TEST_CTRL MSR x86/split_lock: Rework the initialization flow of split lock detection x86/split_lock: Enable split lock detection by kernel
2 parents d5f744f + a6a6074 commit 2853d5f

File tree

9 files changed

+258
-3
lines changed

9 files changed

+258
-3
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4711,6 +4711,28 @@
47114711
spia_pedr=
47124712
spia_peddr=
47134713

4714+
split_lock_detect=
4715+
[X86] Enable split lock detection
4716+
4717+
When enabled (and if hardware support is present), atomic
4718+
instructions that access data across cache line
4719+
boundaries will result in an alignment check exception.
4720+
4721+
off - not enabled
4722+
4723+
warn - the kernel will emit rate limited warnings
4724+
about applications triggering the #AC
4725+
exception. This mode is the default on CPUs
4726+
that supports split lock detection.
4727+
4728+
fatal - the kernel will send SIGBUS to applications
4729+
that trigger the #AC exception.
4730+
4731+
If an #AC exception is hit in the kernel or in
4732+
firmware (i.e. not while executing in user mode)
4733+
the kernel will oops in either "warn" or "fatal"
4734+
mode.
4735+
47144736
srcutree.counter_wrap_check [KNL]
47154737
Specifies how frequently to check for
47164738
grace-period sequence counter wrap for the

arch/x86/include/asm/cpu.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,16 @@ int mwait_usable(const struct cpuinfo_x86 *);
4040
unsigned int x86_family(unsigned int sig);
4141
unsigned int x86_model(unsigned int sig);
4242
unsigned int x86_stepping(unsigned int sig);
43+
#ifdef CONFIG_CPU_SUP_INTEL
44+
extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c);
45+
extern void switch_to_sld(unsigned long tifn);
46+
extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
47+
#else
48+
static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
49+
static inline void switch_to_sld(unsigned long tifn) {}
50+
static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
51+
{
52+
return false;
53+
}
54+
#endif
4355
#endif /* _ASM_X86_CPU_H */

arch/x86/include/asm/cpufeatures.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@
285285
#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
286286
#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
287287
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
288+
#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
288289

289290
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
290291
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
@@ -368,6 +369,7 @@
368369
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
369370
#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
370371
#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
372+
#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */
371373
#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
372374

373375
/*

arch/x86/include/asm/msr-index.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@
4141

4242
/* Intel MSRs. Some also available on other CPUs */
4343

44+
#define MSR_TEST_CTRL 0x00000033
45+
#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT 29
46+
#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT BIT(MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT)
47+
4448
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
4549
#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */
4650
#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */
@@ -70,6 +74,11 @@
7074
*/
7175
#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U)
7276

77+
/* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */
78+
#define MSR_IA32_CORE_CAPS 0x000000cf
79+
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5
80+
#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT)
81+
7382
#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
7483
#define NHM_C3_AUTO_DEMOTE (1UL << 25)
7584
#define NHM_C1_AUTO_DEMOTE (1UL << 26)

arch/x86/include/asm/thread_info.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ struct thread_info {
9292
#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
9393
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
9494
#define TIF_IA32 17 /* IA32 compatibility process */
95+
#define TIF_SLD 18 /* Restore split lock detection on context switch */
9596
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
9697
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
9798
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
@@ -121,6 +122,7 @@ struct thread_info {
121122
#define _TIF_NOCPUID (1 << TIF_NOCPUID)
122123
#define _TIF_NOTSC (1 << TIF_NOTSC)
123124
#define _TIF_IA32 (1 << TIF_IA32)
125+
#define _TIF_SLD (1 << TIF_SLD)
124126
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
125127
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
126128
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
@@ -139,7 +141,7 @@ struct thread_info {
139141
/* flags to check in __switch_to() */
140142
#define _TIF_WORK_CTXSW_BASE \
141143
(_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \
142-
_TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
144+
_TIF_SSBD | _TIF_SPEC_FORCE_UPDATE | _TIF_SLD)
143145

144146
/*
145147
* Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.

arch/x86/kernel/cpu/common.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1224,6 +1224,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
12241224

12251225
cpu_set_bug_bits(c);
12261226

1227+
cpu_set_core_cap_bits(c);
1228+
12271229
fpu__init_system(c);
12281230

12291231
#ifdef CONFIG_X86_32

arch/x86/kernel/cpu/intel.c

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
#include <asm/microcode_intel.h>
2020
#include <asm/hwcap2.h>
2121
#include <asm/elf.h>
22+
#include <asm/cpu_device_id.h>
23+
#include <asm/cmdline.h>
2224

2325
#ifdef CONFIG_X86_64
2426
#include <linux/topology.h>
@@ -31,6 +33,20 @@
3133
#include <asm/apic.h>
3234
#endif
3335

36+
enum split_lock_detect_state {
37+
sld_off = 0,
38+
sld_warn,
39+
sld_fatal,
40+
};
41+
42+
/*
43+
* Default to sld_off because most systems do not support split lock detection
44+
* split_lock_setup() will switch this to sld_warn on systems that support
45+
* split lock detect, unless there is a command line override.
46+
*/
47+
static enum split_lock_detect_state sld_state __ro_after_init = sld_off;
48+
static u64 msr_test_ctrl_cache __ro_after_init;
49+
3450
/*
3551
* Processors which have self-snooping capability can handle conflicting
3652
* memory type across CPUs by snooping its own cache. However, there exists
@@ -570,6 +586,8 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
570586
wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
571587
}
572588

589+
static void split_lock_init(void);
590+
573591
static void init_intel(struct cpuinfo_x86 *c)
574592
{
575593
early_init_intel(c);
@@ -684,6 +702,8 @@ static void init_intel(struct cpuinfo_x86 *c)
684702
tsx_enable();
685703
if (tsx_ctrl_state == TSX_CTRL_DISABLE)
686704
tsx_disable();
705+
706+
split_lock_init();
687707
}
688708

689709
#ifdef CONFIG_X86_32
@@ -945,3 +965,166 @@ static const struct cpu_dev intel_cpu_dev = {
945965
};
946966

947967
cpu_dev_register(intel_cpu_dev);
968+
969+
#undef pr_fmt
970+
#define pr_fmt(fmt) "x86/split lock detection: " fmt
971+
972+
static const struct {
973+
const char *option;
974+
enum split_lock_detect_state state;
975+
} sld_options[] __initconst = {
976+
{ "off", sld_off },
977+
{ "warn", sld_warn },
978+
{ "fatal", sld_fatal },
979+
};
980+
981+
static inline bool match_option(const char *arg, int arglen, const char *opt)
982+
{
983+
int len = strlen(opt);
984+
985+
return len == arglen && !strncmp(arg, opt, len);
986+
}
987+
988+
static bool split_lock_verify_msr(bool on)
989+
{
990+
u64 ctrl, tmp;
991+
992+
if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl))
993+
return false;
994+
if (on)
995+
ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
996+
else
997+
ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
998+
if (wrmsrl_safe(MSR_TEST_CTRL, ctrl))
999+
return false;
1000+
rdmsrl(MSR_TEST_CTRL, tmp);
1001+
return ctrl == tmp;
1002+
}
1003+
1004+
static void __init split_lock_setup(void)
1005+
{
1006+
enum split_lock_detect_state state = sld_warn;
1007+
char arg[20];
1008+
int i, ret;
1009+
1010+
if (!split_lock_verify_msr(false)) {
1011+
pr_info("MSR access failed: Disabled\n");
1012+
return;
1013+
}
1014+
1015+
ret = cmdline_find_option(boot_command_line, "split_lock_detect",
1016+
arg, sizeof(arg));
1017+
if (ret >= 0) {
1018+
for (i = 0; i < ARRAY_SIZE(sld_options); i++) {
1019+
if (match_option(arg, ret, sld_options[i].option)) {
1020+
state = sld_options[i].state;
1021+
break;
1022+
}
1023+
}
1024+
}
1025+
1026+
switch (state) {
1027+
case sld_off:
1028+
pr_info("disabled\n");
1029+
return;
1030+
case sld_warn:
1031+
pr_info("warning about user-space split_locks\n");
1032+
break;
1033+
case sld_fatal:
1034+
pr_info("sending SIGBUS on user-space split_locks\n");
1035+
break;
1036+
}
1037+
1038+
rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache);
1039+
1040+
if (!split_lock_verify_msr(true)) {
1041+
pr_info("MSR access failed: Disabled\n");
1042+
return;
1043+
}
1044+
1045+
sld_state = state;
1046+
setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT);
1047+
}
1048+
1049+
/*
1050+
* MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking
1051+
* is not implemented as one thread could undo the setting of the other
1052+
* thread immediately after dropping the lock anyway.
1053+
*/
1054+
static void sld_update_msr(bool on)
1055+
{
1056+
u64 test_ctrl_val = msr_test_ctrl_cache;
1057+
1058+
if (on)
1059+
test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT;
1060+
1061+
wrmsrl(MSR_TEST_CTRL, test_ctrl_val);
1062+
}
1063+
1064+
static void split_lock_init(void)
1065+
{
1066+
split_lock_verify_msr(sld_state != sld_off);
1067+
}
1068+
1069+
bool handle_user_split_lock(struct pt_regs *regs, long error_code)
1070+
{
1071+
if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
1072+
return false;
1073+
1074+
pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
1075+
current->comm, current->pid, regs->ip);
1076+
1077+
/*
1078+
* Disable the split lock detection for this task so it can make
1079+
* progress and set TIF_SLD so the detection is re-enabled via
1080+
* switch_to_sld() when the task is scheduled out.
1081+
*/
1082+
sld_update_msr(false);
1083+
set_tsk_thread_flag(current, TIF_SLD);
1084+
return true;
1085+
}
1086+
1087+
/*
1088+
* This function is called only when switching between tasks with
1089+
* different split-lock detection modes. It sets the MSR for the
1090+
* mode of the new task. This is right most of the time, but since
1091+
* the MSR is shared by hyperthreads on a physical core there can
1092+
* be glitches when the two threads need different modes.
1093+
*/
1094+
void switch_to_sld(unsigned long tifn)
1095+
{
1096+
sld_update_msr(!(tifn & _TIF_SLD));
1097+
}
1098+
1099+
#define SPLIT_LOCK_CPU(model) {X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY}
1100+
1101+
/*
1102+
* The following processors have the split lock detection feature. But
1103+
* since they don't have the IA32_CORE_CAPABILITIES MSR, the feature cannot
1104+
* be enumerated. Enable it by family and model matching on these
1105+
* processors.
1106+
*/
1107+
static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
1108+
SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_X),
1109+
SPLIT_LOCK_CPU(INTEL_FAM6_ICELAKE_L),
1110+
{}
1111+
};
1112+
1113+
void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c)
1114+
{
1115+
u64 ia32_core_caps = 0;
1116+
1117+
if (c->x86_vendor != X86_VENDOR_INTEL)
1118+
return;
1119+
if (cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) {
1120+
/* Enumerate features reported in IA32_CORE_CAPABILITIES MSR. */
1121+
rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps);
1122+
} else if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
1123+
/* Enumerate split lock detection by family and model. */
1124+
if (x86_match_cpu(split_lock_cpu_ids))
1125+
ia32_core_caps |= MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT;
1126+
}
1127+
1128+
if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)
1129+
split_lock_setup();
1130+
}

arch/x86/kernel/process.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
649649
/* Enforce MSR update to ensure consistent state */
650650
__speculation_ctrl_update(~tifn, tifn);
651651
}
652+
653+
if ((tifp ^ tifn) & _TIF_SLD)
654+
switch_to_sld(tifn);
652655
}
653656

654657
/*

arch/x86/kernel/traps.c

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include <asm/traps.h>
4747
#include <asm/desc.h>
4848
#include <asm/fpu/internal.h>
49+
#include <asm/cpu.h>
4950
#include <asm/cpu_entry_area.h>
5051
#include <asm/mce.h>
5152
#include <asm/fixmap.h>
@@ -242,7 +243,6 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
242243
{
243244
struct task_struct *tsk = current;
244245

245-
246246
if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code))
247247
return;
248248

@@ -288,9 +288,29 @@ DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, 0, NULL, "coprocessor segment overru
288288
DO_ERROR(X86_TRAP_TS, SIGSEGV, 0, NULL, "invalid TSS", invalid_TSS)
289289
DO_ERROR(X86_TRAP_NP, SIGBUS, 0, NULL, "segment not present", segment_not_present)
290290
DO_ERROR(X86_TRAP_SS, SIGBUS, 0, NULL, "stack segment", stack_segment)
291-
DO_ERROR(X86_TRAP_AC, SIGBUS, BUS_ADRALN, NULL, "alignment check", alignment_check)
292291
#undef IP
293292

293+
dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code)
294+
{
295+
char *str = "alignment check";
296+
297+
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
298+
299+
if (notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_AC, SIGBUS) == NOTIFY_STOP)
300+
return;
301+
302+
if (!user_mode(regs))
303+
die("Split lock detected\n", regs, error_code);
304+
305+
local_irq_enable();
306+
307+
if (handle_user_split_lock(regs, error_code))
308+
return;
309+
310+
do_trap(X86_TRAP_AC, SIGBUS, "alignment check", regs,
311+
error_code, BUS_ADRALN, NULL);
312+
}
313+
294314
#ifdef CONFIG_VMAP_STACK
295315
__visible void __noreturn handle_stack_overflow(const char *message,
296316
struct pt_regs *regs,

0 commit comments

Comments
 (0)