Skip to content

Commit f7abf14

Browse files
committed
posix-cpu-timers: Implement the missing timer_wait_running callback
For some unknown reason the introduction of the timer_wait_running callback missed to fixup posix CPU timers, which went unnoticed for almost four years. Marco reported recently that the WARN_ON() in timer_wait_running() triggers with a posix CPU timer test case. Posix CPU timers have two execution models for expiring timers depending on CONFIG_POSIX_CPU_TIMERS_TASK_WORK: 1) If not enabled, the expiry happens in hard interrupt context so spin waiting on the remote CPU is reasonably time bound. Implement an empty stub function for that case. 2) If enabled, the expiry happens in task work before returning to user space or guest mode. The expired timers are marked as firing and moved from the timer queue to a local list head with sighand lock held. Once the timers are moved, sighand lock is dropped and the expiry happens in fully preemptible context. That means the expiring task can be scheduled out, migrated, interrupted etc. So spin waiting on it is more than suboptimal. The timer wheel has a timer_wait_running() mechanism for RT, which uses a per CPU timer-base expiry lock which is held by the expiry code and the task waiting for the timer function to complete blocks on that lock. This does not work in the same way for posix CPU timers as there is no timer base and expiry for process wide timers can run on any task belonging to that process, but the concept of waiting on an expiry lock can be used too in a slightly different way: - Add a mutex to struct posix_cputimers_work. This struct is per task and used to schedule the expiry task work from the timer interrupt. - Add a task_struct pointer to struct cpu_timer which is used to store a the task which runs the expiry. That's filled in when the task moves the expired timers to the local expiry list. That's not affecting the size of the k_itimer union as there are bigger union members already - Let the task take the expiry mutex around the expiry function - Let the waiter acquire a task reference with rcu_read_lock() held and block on the expiry mutex This avoids spin-waiting on a task which might not even be on a CPU and works nicely for RT too. Fixes: ec8f954 ("posix-timers: Use a callback for cancel synchronization on PREEMPT_RT") Reported-by: Marco Elver <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Tested-by: Marco Elver <[email protected]> Tested-by: Sebastian Andrzej Siewior <[email protected]> Reviewed-by: Frederic Weisbecker <[email protected]> Cc: [email protected] Link: https://lore.kernel.org/r/87zg764ojw.ffs@tglx
1 parent 263dda2 commit f7abf14

File tree

3 files changed

+82
-20
lines changed

3 files changed

+82
-20
lines changed

include/linux/posix-timers.h

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
#include <linux/spinlock.h>
66
#include <linux/list.h>
7+
#include <linux/mutex.h>
78
#include <linux/alarmtimer.h>
89
#include <linux/timerqueue.h>
910

@@ -62,16 +63,18 @@ static inline int clockid_to_fd(const clockid_t clk)
6263
* cpu_timer - Posix CPU timer representation for k_itimer
6364
* @node: timerqueue node to queue in the task/sig
6465
* @head: timerqueue head on which this timer is queued
65-
* @task: Pointer to target task
66+
* @pid: Pointer to target task PID
6667
* @elist: List head for the expiry list
6768
* @firing: Timer is currently firing
69+
* @handling: Pointer to the task which handles expiry
6870
*/
6971
struct cpu_timer {
70-
struct timerqueue_node node;
71-
struct timerqueue_head *head;
72-
struct pid *pid;
73-
struct list_head elist;
74-
int firing;
72+
struct timerqueue_node node;
73+
struct timerqueue_head *head;
74+
struct pid *pid;
75+
struct list_head elist;
76+
int firing;
77+
struct task_struct __rcu *handling;
7578
};
7679

7780
static inline bool cpu_timer_enqueue(struct timerqueue_head *head,
@@ -135,10 +138,12 @@ struct posix_cputimers {
135138
/**
136139
* posix_cputimers_work - Container for task work based posix CPU timer expiry
137140
* @work: The task work to be scheduled
141+
* @mutex: Mutex held around expiry in context of this task work
138142
* @scheduled: @work has been scheduled already, no further processing
139143
*/
140144
struct posix_cputimers_work {
141145
struct callback_head work;
146+
struct mutex mutex;
142147
unsigned int scheduled;
143148
};
144149

kernel/time/posix-cpu-timers.c

Lines changed: 67 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,8 @@ static u64 collect_timerqueue(struct timerqueue_head *head,
846846
return expires;
847847

848848
ctmr->firing = 1;
849+
/* See posix_cpu_timer_wait_running() */
850+
rcu_assign_pointer(ctmr->handling, current);
849851
cpu_timer_dequeue(ctmr);
850852
list_add_tail(&ctmr->elist, firing);
851853
}
@@ -1161,7 +1163,49 @@ static void handle_posix_cpu_timers(struct task_struct *tsk);
11611163
#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
11621164
static void posix_cpu_timers_work(struct callback_head *work)
11631165
{
1166+
struct posix_cputimers_work *cw = container_of(work, typeof(*cw), work);
1167+
1168+
mutex_lock(&cw->mutex);
11641169
handle_posix_cpu_timers(current);
1170+
mutex_unlock(&cw->mutex);
1171+
}
1172+
1173+
/*
1174+
* Invoked from the posix-timer core when a cancel operation failed because
1175+
* the timer is marked firing. The caller holds rcu_read_lock(), which
1176+
* protects the timer and the task which is expiring it from being freed.
1177+
*/
1178+
static void posix_cpu_timer_wait_running(struct k_itimer *timr)
1179+
{
1180+
struct task_struct *tsk = rcu_dereference(timr->it.cpu.handling);
1181+
1182+
/* Has the handling task completed expiry already? */
1183+
if (!tsk)
1184+
return;
1185+
1186+
/* Ensure that the task cannot go away */
1187+
get_task_struct(tsk);
1188+
/* Now drop the RCU protection so the mutex can be locked */
1189+
rcu_read_unlock();
1190+
/* Wait on the expiry mutex */
1191+
mutex_lock(&tsk->posix_cputimers_work.mutex);
1192+
/* Release it immediately again. */
1193+
mutex_unlock(&tsk->posix_cputimers_work.mutex);
1194+
/* Drop the task reference. */
1195+
put_task_struct(tsk);
1196+
/* Relock RCU so the callsite is balanced */
1197+
rcu_read_lock();
1198+
}
1199+
1200+
static void posix_cpu_timer_wait_running_nsleep(struct k_itimer *timr)
1201+
{
1202+
/* Ensure that timr->it.cpu.handling task cannot go away */
1203+
rcu_read_lock();
1204+
spin_unlock_irq(&timr->it_lock);
1205+
posix_cpu_timer_wait_running(timr);
1206+
rcu_read_unlock();
1207+
/* @timr is on stack and is valid */
1208+
spin_lock_irq(&timr->it_lock);
11651209
}
11661210

11671211
/*
@@ -1177,6 +1221,7 @@ void clear_posix_cputimers_work(struct task_struct *p)
11771221
sizeof(p->posix_cputimers_work.work));
11781222
init_task_work(&p->posix_cputimers_work.work,
11791223
posix_cpu_timers_work);
1224+
mutex_init(&p->posix_cputimers_work.mutex);
11801225
p->posix_cputimers_work.scheduled = false;
11811226
}
11821227

@@ -1255,6 +1300,18 @@ static inline void __run_posix_cpu_timers(struct task_struct *tsk)
12551300
lockdep_posixtimer_exit();
12561301
}
12571302

1303+
static void posix_cpu_timer_wait_running(struct k_itimer *timr)
1304+
{
1305+
cpu_relax();
1306+
}
1307+
1308+
static void posix_cpu_timer_wait_running_nsleep(struct k_itimer *timr)
1309+
{
1310+
spin_unlock_irq(&timr->it_lock);
1311+
cpu_relax();
1312+
spin_lock_irq(&timr->it_lock);
1313+
}
1314+
12581315
static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)
12591316
{
12601317
return false;
@@ -1363,6 +1420,8 @@ static void handle_posix_cpu_timers(struct task_struct *tsk)
13631420
*/
13641421
if (likely(cpu_firing >= 0))
13651422
cpu_timer_fire(timer);
1423+
/* See posix_cpu_timer_wait_running() */
1424+
rcu_assign_pointer(timer->it.cpu.handling, NULL);
13661425
spin_unlock(&timer->it_lock);
13671426
}
13681427
}
@@ -1497,23 +1556,16 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
14971556
expires = cpu_timer_getexpires(&timer.it.cpu);
14981557
error = posix_cpu_timer_set(&timer, 0, &zero_it, &it);
14991558
if (!error) {
1500-
/*
1501-
* Timer is now unarmed, deletion can not fail.
1502-
*/
1559+
/* Timer is now unarmed, deletion can not fail. */
15031560
posix_cpu_timer_del(&timer);
1561+
} else {
1562+
while (error == TIMER_RETRY) {
1563+
posix_cpu_timer_wait_running_nsleep(&timer);
1564+
error = posix_cpu_timer_del(&timer);
1565+
}
15041566
}
1505-
spin_unlock_irq(&timer.it_lock);
15061567

1507-
while (error == TIMER_RETRY) {
1508-
/*
1509-
* We need to handle case when timer was or is in the
1510-
* middle of firing. In other cases we already freed
1511-
* resources.
1512-
*/
1513-
spin_lock_irq(&timer.it_lock);
1514-
error = posix_cpu_timer_del(&timer);
1515-
spin_unlock_irq(&timer.it_lock);
1516-
}
1568+
spin_unlock_irq(&timer.it_lock);
15171569

15181570
if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) {
15191571
/*
@@ -1623,6 +1675,7 @@ const struct k_clock clock_posix_cpu = {
16231675
.timer_del = posix_cpu_timer_del,
16241676
.timer_get = posix_cpu_timer_get,
16251677
.timer_rearm = posix_cpu_timer_rearm,
1678+
.timer_wait_running = posix_cpu_timer_wait_running,
16261679
};
16271680

16281681
const struct k_clock clock_process = {

kernel/time/posix-timers.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,10 @@ static struct k_itimer *timer_wait_running(struct k_itimer *timer,
846846
rcu_read_lock();
847847
unlock_timer(timer, *flags);
848848

849+
/*
850+
* kc->timer_wait_running() might drop RCU lock. So @timer
851+
* cannot be touched anymore after the function returns!
852+
*/
849853
if (!WARN_ON_ONCE(!kc->timer_wait_running))
850854
kc->timer_wait_running(timer);
851855

0 commit comments

Comments
 (0)