Skip to content

Commit b041b52

Browse files
aeglKAGA-KOKO
authored andcommitted
x86/split_lock: Make life miserable for split lockers
In https://lore.kernel.org/all/87y22uujkm.ffs@tglx/ Thomas said: Its's simply wishful thinking that stuff gets fixed because of a WARN_ONCE(). This has never worked. The only thing which works is to make stuff fail hard or slow it down in a way which makes it annoying enough to users to complain. He was talking about WBINVD. But it made me think about how we use the split lock detection feature in Linux. Existing code has three options for applications: 1) Don't enable split lock detection (allow arbitrary split locks) 2) Warn once when a process uses split lock, but let the process keep running with split lock detection disabled 3) Kill process that use split locks Option 2 falls into the "wishful thinking" territory that Thomas warns does nothing. But option 3 might not be viable in a situation with legacy applications that need to run. Hence make option 2 much stricter to "slow it down in a way which makes it annoying". Primary reason for this change is to provide better quality of service to the rest of the applications running on the system. Internal testing shows that even with many processes splitting locks, performance for the rest of the system is much more responsive. The new "warn" mode operates like this. When an application tries to execute a bus lock the #AC handler. 1) Delays (interruptibly) 10 ms before moving to next step. 2) Blocks (interruptibly) until it can get the semaphore If interrupted, just return. Assume the signal will either kill the task, or direct execution away from the instruction that is trying to get the bus lock. 3) Disables split lock detection for the current core 4) Schedules a work queue to re-enable split lock detect in 2 jiffies 5) Returns The work queue that re-enables split lock detection also releases the semaphore. There is a corner case where a CPU may be taken offline while split lock detection is disabled. A CPU hotplug handler handles this case. Old behaviour was to only print the split lock warning on the first occurrence of a split lock from a task. Preserve that by adding a flag to the task structure that suppresses subsequent split lock messages from that task. Signed-off-by: Tony Luck <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent af2d861 commit b041b52

File tree

3 files changed

+61
-10
lines changed

3 files changed

+61
-10
lines changed

arch/x86/kernel/cpu/intel.c

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@
77
#include <linux/smp.h>
88
#include <linux/sched.h>
99
#include <linux/sched/clock.h>
10+
#include <linux/semaphore.h>
1011
#include <linux/thread_info.h>
1112
#include <linux/init.h>
1213
#include <linux/uaccess.h>
14+
#include <linux/workqueue.h>
1315
#include <linux/delay.h>
16+
#include <linux/cpuhotplug.h>
1417

1518
#include <asm/cpufeature.h>
1619
#include <asm/msr.h>
@@ -999,6 +1002,8 @@ static const struct {
9991002

10001003
static struct ratelimit_state bld_ratelimit;
10011004

1005+
static DEFINE_SEMAPHORE(buslock_sem);
1006+
10021007
static inline bool match_option(const char *arg, int arglen, const char *opt)
10031008
{
10041009
int len = strlen(opt), ratelimit;
@@ -1109,18 +1114,52 @@ static void split_lock_init(void)
11091114
split_lock_verify_msr(sld_state != sld_off);
11101115
}
11111116

1117+
static void __split_lock_reenable(struct work_struct *work)
1118+
{
1119+
sld_update_msr(true);
1120+
up(&buslock_sem);
1121+
}
1122+
1123+
/*
1124+
* If a CPU goes offline with pending delayed work to re-enable split lock
1125+
* detection then the delayed work will be executed on some other CPU. That
1126+
* handles releasing the buslock_sem, but because it executes on a
1127+
* different CPU probably won't re-enable split lock detection. This is a
1128+
* problem on HT systems since the sibling CPU on the same core may then be
1129+
* left running with split lock detection disabled.
1130+
*
1131+
* Unconditionally re-enable detection here.
1132+
*/
1133+
static int splitlock_cpu_offline(unsigned int cpu)
1134+
{
1135+
sld_update_msr(true);
1136+
1137+
return 0;
1138+
}
1139+
1140+
static DECLARE_DELAYED_WORK(split_lock_reenable, __split_lock_reenable);
1141+
11121142
static void split_lock_warn(unsigned long ip)
11131143
{
1114-
pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
1115-
current->comm, current->pid, ip);
1144+
int cpu;
11161145

1117-
/*
1118-
* Disable the split lock detection for this task so it can make
1119-
* progress and set TIF_SLD so the detection is re-enabled via
1120-
* switch_to_sld() when the task is scheduled out.
1121-
*/
1146+
if (!current->reported_split_lock)
1147+
pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
1148+
current->comm, current->pid, ip);
1149+
current->reported_split_lock = 1;
1150+
1151+
/* misery factor #1, sleep 10ms before trying to execute split lock */
1152+
if (msleep_interruptible(10) > 0)
1153+
return;
1154+
/* Misery factor #2, only allow one buslocked disabled core at a time */
1155+
if (down_interruptible(&buslock_sem) == -EINTR)
1156+
return;
1157+
cpu = get_cpu();
1158+
schedule_delayed_work_on(cpu, &split_lock_reenable, 2);
1159+
1160+
/* Disable split lock detection on this CPU to make progress */
11221161
sld_update_msr(false);
1123-
set_tsk_thread_flag(current, TIF_SLD);
1162+
put_cpu();
11241163
}
11251164

11261165
bool handle_guest_split_lock(unsigned long ip)
@@ -1274,10 +1313,14 @@ static void sld_state_show(void)
12741313
pr_info("disabled\n");
12751314
break;
12761315
case sld_warn:
1277-
if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
1316+
if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {
12781317
pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n");
1279-
else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
1318+
if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
1319+
"x86/splitlock", NULL, splitlock_cpu_offline) < 0)
1320+
pr_warn("No splitlock CPU offline handler\n");
1321+
} else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) {
12801322
pr_info("#DB: warning on user-space bus_locks\n");
1323+
}
12811324
break;
12821325
case sld_fatal:
12831326
if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) {

include/linux/sched.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -941,6 +941,9 @@ struct task_struct {
941941
#ifdef CONFIG_IOMMU_SVA
942942
unsigned pasid_activated:1;
943943
#endif
944+
#ifdef CONFIG_CPU_SUP_INTEL
945+
unsigned reported_split_lock:1;
946+
#endif
944947

945948
unsigned long atomic_flags; /* Flags requiring atomic access. */
946949

kernel/fork.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,6 +1045,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
10451045
#ifdef CONFIG_MEMCG
10461046
tsk->active_memcg = NULL;
10471047
#endif
1048+
1049+
#ifdef CONFIG_CPU_SUP_INTEL
1050+
tsk->reported_split_lock = 0;
1051+
#endif
1052+
10481053
return tsk;
10491054

10501055
free_stack:

0 commit comments

Comments
 (0)