Skip to content

Commit b6b178e

Browse files
committed
Merge tag 'timers-core-2020-08-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull more timer updates from Thomas Gleixner: "A set of posix CPU timer changes which allows to defer the heavy work of posix CPU timers into task work context. The tick interrupt is reduced to a quick check which queues the work which is doing the heavy lifting before returning to user space or going back to guest mode. Moving this out is deferring the signal delivery slightly but posix CPU timers are inaccurate by nature as they depend on the tick so there is no real damage. The relevant test cases all passed. This lifts the last offender for RT out of the hard interrupt context tick handler, but it also has the general benefit that the actual heavy work is accounted to the task/process and not to the tick interrupt itself. Further optimizations are possible to break long sighand lock hold and interrupt disabled (on !RT kernels) times when a massive amount of posix CPU timers (which are unpriviledged) is armed for a task/process. This is currently only enabled for x86 because the architecture has to ensure that task work is handled in KVM before entering a guest, which was just established for x86 with the new common entry/exit code which got merged post 5.8 and is not the case for other KVM architectures" * tag 'timers-core-2020-08-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86: Select POSIX_CPU_TIMERS_TASK_WORK posix-cpu-timers: Provide mechanisms to defer timer handling to task_work posix-cpu-timers: Split run_posix_cpu_timers()
2 parents 1d229a6 + 0099808 commit b6b178e

File tree

6 files changed

+223
-25
lines changed

6 files changed

+223
-25
lines changed

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ config X86
209209
select HAVE_PERF_REGS
210210
select HAVE_PERF_USER_STACK_DUMP
211211
select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT
212+
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
212213
select HAVE_REGS_AND_STACK_ACCESS_API
213214
select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
214215
select HAVE_FUNCTION_ARG_ACCESS_API

include/linux/posix-timers.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <linux/list.h>
77
#include <linux/alarmtimer.h>
88
#include <linux/timerqueue.h>
9+
#include <linux/task_work.h>
910

1011
struct kernel_siginfo;
1112
struct task_struct;
@@ -125,6 +126,16 @@ struct posix_cputimers {
125126
unsigned int expiry_active;
126127
};
127128

129+
/**
130+
* posix_cputimers_work - Container for task work based posix CPU timer expiry
131+
* @work: The task work to be scheduled
132+
* @scheduled: @work has been scheduled already, no further processing
133+
*/
134+
struct posix_cputimers_work {
135+
struct callback_head work;
136+
unsigned int scheduled;
137+
};
138+
128139
static inline void posix_cputimers_init(struct posix_cputimers *pct)
129140
{
130141
memset(pct, 0, sizeof(*pct));
@@ -165,6 +176,12 @@ static inline void posix_cputimers_group_init(struct posix_cputimers *pct,
165176
u64 cpu_limit) { }
166177
#endif
167178

179+
#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
180+
void posix_cputimers_init_work(void);
181+
#else
182+
static inline void posix_cputimers_init_work(void) { }
183+
#endif
184+
168185
#define REQUEUE_PENDING 1
169186

170187
/**

include/linux/sched.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -890,6 +890,10 @@ struct task_struct {
890890
/* Empty if CONFIG_POSIX_CPUTIMERS=n */
891891
struct posix_cputimers posix_cputimers;
892892

893+
#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
894+
struct posix_cputimers_work posix_cputimers_work;
895+
#endif
896+
893897
/* Process credentials: */
894898

895899
/* Tracer's credentials at attach: */

kernel/time/Kconfig

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,15 @@ config GENERIC_CLOCKEVENTS_MIN_ADJUST
5252
config GENERIC_CMOS_UPDATE
5353
bool
5454

55+
# Select to handle posix CPU timers from task_work
56+
# and not from the timer interrupt context
57+
config HAVE_POSIX_CPU_TIMERS_TASK_WORK
58+
bool
59+
60+
config POSIX_CPU_TIMERS_TASK_WORK
61+
bool
62+
default y if POSIX_TIMERS && HAVE_POSIX_CPU_TIMERS_TASK_WORK
63+
5564
if GENERIC_CLOCKEVENTS
5665
menu "Timers subsystem"
5766

kernel/time/posix-cpu-timers.c

Lines changed: 191 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,7 @@ static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp)
377377
*/
378378
static int posix_cpu_timer_create(struct k_itimer *new_timer)
379379
{
380+
static struct lock_class_key posix_cpu_timers_key;
380381
struct pid *pid;
381382

382383
rcu_read_lock();
@@ -386,6 +387,17 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer)
386387
return -EINVAL;
387388
}
388389

390+
/*
391+
* If posix timer expiry is handled in task work context then
392+
* timer::it_lock can be taken without disabling interrupts as all
393+
* other locking happens in task context. This requires a seperate
394+
* lock class key otherwise regular posix timer expiry would record
395+
* the lock class being taken in interrupt context and generate a
396+
* false positive warning.
397+
*/
398+
if (IS_ENABLED(CONFIG_POSIX_CPU_TIMERS_TASK_WORK))
399+
lockdep_set_class(&new_timer->it_lock, &posix_cpu_timers_key);
400+
389401
new_timer->kclock = &clock_posix_cpu;
390402
timerqueue_init(&new_timer->it.cpu.node);
391403
new_timer->it.cpu.pid = get_pid(pid);
@@ -1080,43 +1092,163 @@ static inline bool fastpath_timer_check(struct task_struct *tsk)
10801092
return false;
10811093
}
10821094

1095+
static void handle_posix_cpu_timers(struct task_struct *tsk);
1096+
1097+
#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
1098+
static void posix_cpu_timers_work(struct callback_head *work)
1099+
{
1100+
handle_posix_cpu_timers(current);
1101+
}
1102+
10831103
/*
1084-
* This is called from the timer interrupt handler. The irq handler has
1085-
* already updated our counts. We need to check if any timers fire now.
1086-
* Interrupts are disabled.
1104+
* Initialize posix CPU timers task work in init task. Out of line to
1105+
* keep the callback static and to avoid header recursion hell.
10871106
*/
1088-
void run_posix_cpu_timers(void)
1107+
void __init posix_cputimers_init_work(void)
10891108
{
1090-
struct task_struct *tsk = current;
1091-
struct k_itimer *timer, *next;
1092-
unsigned long flags;
1093-
LIST_HEAD(firing);
1109+
init_task_work(&current->posix_cputimers_work.work,
1110+
posix_cpu_timers_work);
1111+
}
10941112

1095-
lockdep_assert_irqs_disabled();
1113+
/*
1114+
* Note: All operations on tsk->posix_cputimer_work.scheduled happen either
1115+
* in hard interrupt context or in task context with interrupts
1116+
* disabled. Aside of that the writer/reader interaction is always in the
1117+
* context of the current task, which means they are strict per CPU.
1118+
*/
1119+
static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)
1120+
{
1121+
return tsk->posix_cputimers_work.scheduled;
1122+
}
10961123

1097-
/*
1098-
* The fast path checks that there are no expired thread or thread
1099-
* group timers. If that's so, just return.
1100-
*/
1101-
if (!fastpath_timer_check(tsk))
1124+
static inline void __run_posix_cpu_timers(struct task_struct *tsk)
1125+
{
1126+
if (WARN_ON_ONCE(tsk->posix_cputimers_work.scheduled))
11021127
return;
11031128

1104-
lockdep_posixtimer_enter();
1105-
if (!lock_task_sighand(tsk, &flags)) {
1106-
lockdep_posixtimer_exit();
1107-
return;
1129+
/* Schedule task work to actually expire the timers */
1130+
tsk->posix_cputimers_work.scheduled = true;
1131+
task_work_add(tsk, &tsk->posix_cputimers_work.work, TWA_RESUME);
1132+
}
1133+
1134+
static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk,
1135+
unsigned long start)
1136+
{
1137+
bool ret = true;
1138+
1139+
/*
1140+
* On !RT kernels interrupts are disabled while collecting expired
1141+
* timers, so no tick can happen and the fast path check can be
1142+
* reenabled without further checks.
1143+
*/
1144+
if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
1145+
tsk->posix_cputimers_work.scheduled = false;
1146+
return true;
11081147
}
1148+
11091149
/*
1110-
* Here we take off tsk->signal->cpu_timers[N] and
1111-
* tsk->cpu_timers[N] all the timers that are firing, and
1112-
* put them on the firing list.
1150+
* On RT enabled kernels ticks can happen while the expired timers
1151+
* are collected under sighand lock. But any tick which observes
1152+
* the CPUTIMERS_WORK_SCHEDULED bit set, does not run the fastpath
1153+
* checks. So reenabling the tick work has do be done carefully:
1154+
*
1155+
* Disable interrupts and run the fast path check if jiffies have
1156+
* advanced since the collecting of expired timers started. If
1157+
* jiffies have not advanced or the fast path check did not find
1158+
* newly expired timers, reenable the fast path check in the timer
1159+
* interrupt. If there are newly expired timers, return false and
1160+
* let the collection loop repeat.
11131161
*/
1114-
check_thread_timers(tsk, &firing);
1162+
local_irq_disable();
1163+
if (start != jiffies && fastpath_timer_check(tsk))
1164+
ret = false;
1165+
else
1166+
tsk->posix_cputimers_work.scheduled = false;
1167+
local_irq_enable();
1168+
1169+
return ret;
1170+
}
1171+
#else /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */
1172+
static inline void __run_posix_cpu_timers(struct task_struct *tsk)
1173+
{
1174+
lockdep_posixtimer_enter();
1175+
handle_posix_cpu_timers(tsk);
1176+
lockdep_posixtimer_exit();
1177+
}
1178+
1179+
static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)
1180+
{
1181+
return false;
1182+
}
1183+
1184+
static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk,
1185+
unsigned long start)
1186+
{
1187+
return true;
1188+
}
1189+
#endif /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */
1190+
1191+
static void handle_posix_cpu_timers(struct task_struct *tsk)
1192+
{
1193+
struct k_itimer *timer, *next;
1194+
unsigned long flags, start;
1195+
LIST_HEAD(firing);
1196+
1197+
if (!lock_task_sighand(tsk, &flags))
1198+
return;
11151199

1116-
check_process_timers(tsk, &firing);
1200+
do {
1201+
/*
1202+
* On RT locking sighand lock does not disable interrupts,
1203+
* so this needs to be careful vs. ticks. Store the current
1204+
* jiffies value.
1205+
*/
1206+
start = READ_ONCE(jiffies);
1207+
barrier();
1208+
1209+
/*
1210+
* Here we take off tsk->signal->cpu_timers[N] and
1211+
* tsk->cpu_timers[N] all the timers that are firing, and
1212+
* put them on the firing list.
1213+
*/
1214+
check_thread_timers(tsk, &firing);
1215+
1216+
check_process_timers(tsk, &firing);
1217+
1218+
/*
1219+
* The above timer checks have updated the exipry cache and
1220+
* because nothing can have queued or modified timers after
1221+
* sighand lock was taken above it is guaranteed to be
1222+
* consistent. So the next timer interrupt fastpath check
1223+
* will find valid data.
1224+
*
1225+
* If timer expiry runs in the timer interrupt context then
1226+
* the loop is not relevant as timers will be directly
1227+
* expired in interrupt context. The stub function below
1228+
* returns always true which allows the compiler to
1229+
* optimize the loop out.
1230+
*
1231+
* If timer expiry is deferred to task work context then
1232+
* the following rules apply:
1233+
*
1234+
* - On !RT kernels no tick can have happened on this CPU
1235+
* after sighand lock was acquired because interrupts are
1236+
* disabled. So reenabling task work before dropping
1237+
* sighand lock and reenabling interrupts is race free.
1238+
*
1239+
* - On RT kernels ticks might have happened but the tick
1240+
* work ignored posix CPU timer handling because the
1241+
* CPUTIMERS_WORK_SCHEDULED bit is set. Reenabling work
1242+
* must be done very carefully including a check whether
1243+
* ticks have happened since the start of the timer
1244+
* expiry checks. posix_cpu_timers_enable_work() takes
1245+
* care of that and eventually lets the expiry checks
1246+
* run again.
1247+
*/
1248+
} while (!posix_cpu_timers_enable_work(tsk, start));
11171249

11181250
/*
1119-
* We must release these locks before taking any timer's lock.
1251+
* We must release sighand lock before taking any timer's lock.
11201252
* There is a potential race with timer deletion here, as the
11211253
* siglock now protects our private firing list. We have set
11221254
* the firing flag in each timer, so that a deletion attempt
@@ -1134,6 +1266,13 @@ void run_posix_cpu_timers(void)
11341266
list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) {
11351267
int cpu_firing;
11361268

1269+
/*
1270+
* spin_lock() is sufficient here even independent of the
1271+
* expiry context. If expiry happens in hard interrupt
1272+
* context it's obvious. For task work context it's safe
1273+
* because all other operations on timer::it_lock happen in
1274+
* task context (syscall or exit).
1275+
*/
11371276
spin_lock(&timer->it_lock);
11381277
list_del_init(&timer->it.cpu.elist);
11391278
cpu_firing = timer->it.cpu.firing;
@@ -1147,7 +1286,34 @@ void run_posix_cpu_timers(void)
11471286
cpu_timer_fire(timer);
11481287
spin_unlock(&timer->it_lock);
11491288
}
1150-
lockdep_posixtimer_exit();
1289+
}
1290+
1291+
/*
1292+
* This is called from the timer interrupt handler. The irq handler has
1293+
* already updated our counts. We need to check if any timers fire now.
1294+
* Interrupts are disabled.
1295+
*/
1296+
void run_posix_cpu_timers(void)
1297+
{
1298+
struct task_struct *tsk = current;
1299+
1300+
lockdep_assert_irqs_disabled();
1301+
1302+
/*
1303+
* If the actual expiry is deferred to task work context and the
1304+
* work is already scheduled there is no point to do anything here.
1305+
*/
1306+
if (posix_cpu_timers_work_scheduled(tsk))
1307+
return;
1308+
1309+
/*
1310+
* The fast path checks that there are no expired thread or thread
1311+
* group timers. If that's so, just return.
1312+
*/
1313+
if (!fastpath_timer_check(tsk))
1314+
return;
1315+
1316+
__run_posix_cpu_timers(tsk);
11511317
}
11521318

11531319
/*

kernel/time/timer.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2017,6 +2017,7 @@ static void __init init_timer_cpus(void)
20172017
void __init init_timers(void)
20182018
{
20192019
init_timer_cpus();
2020+
posix_cputimers_init_work();
20202021
open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
20212022
}
20222023

0 commit comments

Comments
 (0)