@@ -377,6 +377,7 @@ static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp)
377
377
*/
378
378
static int posix_cpu_timer_create (struct k_itimer * new_timer )
379
379
{
380
+ static struct lock_class_key posix_cpu_timers_key ;
380
381
struct pid * pid ;
381
382
382
383
rcu_read_lock ();
@@ -386,6 +387,17 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer)
386
387
return - EINVAL ;
387
388
}
388
389
390
+ /*
391
+ * If posix timer expiry is handled in task work context then
392
+ * timer::it_lock can be taken without disabling interrupts as all
393
+ * other locking happens in task context. This requires a seperate
394
+ * lock class key otherwise regular posix timer expiry would record
395
+ * the lock class being taken in interrupt context and generate a
396
+ * false positive warning.
397
+ */
398
+ if (IS_ENABLED (CONFIG_POSIX_CPU_TIMERS_TASK_WORK ))
399
+ lockdep_set_class (& new_timer -> it_lock , & posix_cpu_timers_key );
400
+
389
401
new_timer -> kclock = & clock_posix_cpu ;
390
402
timerqueue_init (& new_timer -> it .cpu .node );
391
403
new_timer -> it .cpu .pid = get_pid (pid );
@@ -1080,26 +1092,163 @@ static inline bool fastpath_timer_check(struct task_struct *tsk)
1080
1092
return false;
1081
1093
}
1082
1094
1083
- static void __run_posix_cpu_timers (struct task_struct * tsk )
1095
+ static void handle_posix_cpu_timers (struct task_struct * tsk );
1096
+
1097
+ #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
1098
+ static void posix_cpu_timers_work (struct callback_head * work )
1099
+ {
1100
+ handle_posix_cpu_timers (current );
1101
+ }
1102
+
1103
+ /*
1104
+ * Initialize posix CPU timers task work in init task. Out of line to
1105
+ * keep the callback static and to avoid header recursion hell.
1106
+ */
1107
+ void __init posix_cputimers_init_work (void )
1108
+ {
1109
+ init_task_work (& current -> posix_cputimers_work .work ,
1110
+ posix_cpu_timers_work );
1111
+ }
1112
+
1113
+ /*
1114
+ * Note: All operations on tsk->posix_cputimer_work.scheduled happen either
1115
+ * in hard interrupt context or in task context with interrupts
1116
+ * disabled. Aside of that the writer/reader interaction is always in the
1117
+ * context of the current task, which means they are strict per CPU.
1118
+ */
1119
+ static inline bool posix_cpu_timers_work_scheduled (struct task_struct * tsk )
1120
+ {
1121
+ return tsk -> posix_cputimers_work .scheduled ;
1122
+ }
1123
+
1124
+ static inline void __run_posix_cpu_timers (struct task_struct * tsk )
1125
+ {
1126
+ if (WARN_ON_ONCE (tsk -> posix_cputimers_work .scheduled ))
1127
+ return ;
1128
+
1129
+ /* Schedule task work to actually expire the timers */
1130
+ tsk -> posix_cputimers_work .scheduled = true;
1131
+ task_work_add (tsk , & tsk -> posix_cputimers_work .work , TWA_RESUME );
1132
+ }
1133
+
1134
+ static inline bool posix_cpu_timers_enable_work (struct task_struct * tsk ,
1135
+ unsigned long start )
1136
+ {
1137
+ bool ret = true;
1138
+
1139
+ /*
1140
+ * On !RT kernels interrupts are disabled while collecting expired
1141
+ * timers, so no tick can happen and the fast path check can be
1142
+ * reenabled without further checks.
1143
+ */
1144
+ if (!IS_ENABLED (CONFIG_PREEMPT_RT )) {
1145
+ tsk -> posix_cputimers_work .scheduled = false;
1146
+ return true;
1147
+ }
1148
+
1149
+ /*
1150
+ * On RT enabled kernels ticks can happen while the expired timers
1151
+ * are collected under sighand lock. But any tick which observes
1152
+ * the CPUTIMERS_WORK_SCHEDULED bit set, does not run the fastpath
1153
+ * checks. So reenabling the tick work has do be done carefully:
1154
+ *
1155
+ * Disable interrupts and run the fast path check if jiffies have
1156
+ * advanced since the collecting of expired timers started. If
1157
+ * jiffies have not advanced or the fast path check did not find
1158
+ * newly expired timers, reenable the fast path check in the timer
1159
+ * interrupt. If there are newly expired timers, return false and
1160
+ * let the collection loop repeat.
1161
+ */
1162
+ local_irq_disable ();
1163
+ if (start != jiffies && fastpath_timer_check (tsk ))
1164
+ ret = false;
1165
+ else
1166
+ tsk -> posix_cputimers_work .scheduled = false;
1167
+ local_irq_enable ();
1168
+
1169
+ return ret ;
1170
+ }
1171
+ #else /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */
1172
+ static inline void __run_posix_cpu_timers (struct task_struct * tsk )
1173
+ {
1174
+ lockdep_posixtimer_enter ();
1175
+ handle_posix_cpu_timers (tsk );
1176
+ lockdep_posixtimer_exit ();
1177
+ }
1178
+
1179
+ static inline bool posix_cpu_timers_work_scheduled (struct task_struct * tsk )
1180
+ {
1181
+ return false;
1182
+ }
1183
+
1184
+ static inline bool posix_cpu_timers_enable_work (struct task_struct * tsk ,
1185
+ unsigned long start )
1186
+ {
1187
+ return true;
1188
+ }
1189
+ #endif /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */
1190
+
1191
+ static void handle_posix_cpu_timers (struct task_struct * tsk )
1084
1192
{
1085
1193
struct k_itimer * timer , * next ;
1086
- unsigned long flags ;
1194
+ unsigned long flags , start ;
1087
1195
LIST_HEAD (firing );
1088
1196
1089
1197
if (!lock_task_sighand (tsk , & flags ))
1090
1198
return ;
1091
1199
1092
- /*
1093
- * Here we take off tsk->signal->cpu_timers[N] and
1094
- * tsk->cpu_timers[N] all the timers that are firing, and
1095
- * put them on the firing list.
1096
- */
1097
- check_thread_timers (tsk , & firing );
1200
+ do {
1201
+ /*
1202
+ * On RT locking sighand lock does not disable interrupts,
1203
+ * so this needs to be careful vs. ticks. Store the current
1204
+ * jiffies value.
1205
+ */
1206
+ start = READ_ONCE (jiffies );
1207
+ barrier ();
1098
1208
1099
- check_process_timers (tsk , & firing );
1209
+ /*
1210
+ * Here we take off tsk->signal->cpu_timers[N] and
1211
+ * tsk->cpu_timers[N] all the timers that are firing, and
1212
+ * put them on the firing list.
1213
+ */
1214
+ check_thread_timers (tsk , & firing );
1215
+
1216
+ check_process_timers (tsk , & firing );
1217
+
1218
+ /*
1219
+ * The above timer checks have updated the exipry cache and
1220
+ * because nothing can have queued or modified timers after
1221
+ * sighand lock was taken above it is guaranteed to be
1222
+ * consistent. So the next timer interrupt fastpath check
1223
+ * will find valid data.
1224
+ *
1225
+ * If timer expiry runs in the timer interrupt context then
1226
+ * the loop is not relevant as timers will be directly
1227
+ * expired in interrupt context. The stub function below
1228
+ * returns always true which allows the compiler to
1229
+ * optimize the loop out.
1230
+ *
1231
+ * If timer expiry is deferred to task work context then
1232
+ * the following rules apply:
1233
+ *
1234
+ * - On !RT kernels no tick can have happened on this CPU
1235
+ * after sighand lock was acquired because interrupts are
1236
+ * disabled. So reenabling task work before dropping
1237
+ * sighand lock and reenabling interrupts is race free.
1238
+ *
1239
+ * - On RT kernels ticks might have happened but the tick
1240
+ * work ignored posix CPU timer handling because the
1241
+ * CPUTIMERS_WORK_SCHEDULED bit is set. Reenabling work
1242
+ * must be done very carefully including a check whether
1243
+ * ticks have happened since the start of the timer
1244
+ * expiry checks. posix_cpu_timers_enable_work() takes
1245
+ * care of that and eventually lets the expiry checks
1246
+ * run again.
1247
+ */
1248
+ } while (!posix_cpu_timers_enable_work (tsk , start ));
1100
1249
1101
1250
/*
1102
- * We must release these locks before taking any timer's lock.
1251
+ * We must release sighand lock before taking any timer's lock.
1103
1252
* There is a potential race with timer deletion here, as the
1104
1253
* siglock now protects our private firing list. We have set
1105
1254
* the firing flag in each timer, so that a deletion attempt
@@ -1117,6 +1266,13 @@ static void __run_posix_cpu_timers(struct task_struct *tsk)
1117
1266
list_for_each_entry_safe (timer , next , & firing , it .cpu .elist ) {
1118
1267
int cpu_firing ;
1119
1268
1269
+ /*
1270
+ * spin_lock() is sufficient here even independent of the
1271
+ * expiry context. If expiry happens in hard interrupt
1272
+ * context it's obvious. For task work context it's safe
1273
+ * because all other operations on timer::it_lock happen in
1274
+ * task context (syscall or exit).
1275
+ */
1120
1276
spin_lock (& timer -> it_lock );
1121
1277
list_del_init (& timer -> it .cpu .elist );
1122
1278
cpu_firing = timer -> it .cpu .firing ;
@@ -1143,16 +1299,21 @@ void run_posix_cpu_timers(void)
1143
1299
1144
1300
lockdep_assert_irqs_disabled ();
1145
1301
1302
+ /*
1303
+ * If the actual expiry is deferred to task work context and the
1304
+ * work is already scheduled there is no point to do anything here.
1305
+ */
1306
+ if (posix_cpu_timers_work_scheduled (tsk ))
1307
+ return ;
1308
+
1146
1309
/*
1147
1310
* The fast path checks that there are no expired thread or thread
1148
1311
* group timers. If that's so, just return.
1149
1312
*/
1150
1313
if (!fastpath_timer_check (tsk ))
1151
1314
return ;
1152
1315
1153
- lockdep_posixtimer_enter ();
1154
1316
__run_posix_cpu_timers (tsk );
1155
- lockdep_posixtimer_exit ();
1156
1317
}
1157
1318
1158
1319
/*
0 commit comments