@@ -377,6 +377,7 @@ static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp)
377
377
*/
378
378
static int posix_cpu_timer_create (struct k_itimer * new_timer )
379
379
{
380
+ static struct lock_class_key posix_cpu_timers_key ;
380
381
struct pid * pid ;
381
382
382
383
rcu_read_lock ();
@@ -386,6 +387,17 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer)
386
387
return - EINVAL ;
387
388
}
388
389
390
+ /*
391
+ * If posix timer expiry is handled in task work context then
392
+ * timer::it_lock can be taken without disabling interrupts as all
393
+ * other locking happens in task context. This requires a seperate
394
+ * lock class key otherwise regular posix timer expiry would record
395
+ * the lock class being taken in interrupt context and generate a
396
+ * false positive warning.
397
+ */
398
+ if (IS_ENABLED (CONFIG_POSIX_CPU_TIMERS_TASK_WORK ))
399
+ lockdep_set_class (& new_timer -> it_lock , & posix_cpu_timers_key );
400
+
389
401
new_timer -> kclock = & clock_posix_cpu ;
390
402
timerqueue_init (& new_timer -> it .cpu .node );
391
403
new_timer -> it .cpu .pid = get_pid (pid );
@@ -1080,43 +1092,163 @@ static inline bool fastpath_timer_check(struct task_struct *tsk)
1080
1092
return false;
1081
1093
}
1082
1094
1095
+ static void handle_posix_cpu_timers (struct task_struct * tsk );
1096
+
1097
+ #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
1098
+ static void posix_cpu_timers_work (struct callback_head * work )
1099
+ {
1100
+ handle_posix_cpu_timers (current );
1101
+ }
1102
+
1083
1103
/*
1084
- * This is called from the timer interrupt handler. The irq handler has
1085
- * already updated our counts. We need to check if any timers fire now.
1086
- * Interrupts are disabled.
1104
+ * Initialize posix CPU timers task work in init task. Out of line to
1105
+ * keep the callback static and to avoid header recursion hell.
1087
1106
*/
1088
- void run_posix_cpu_timers (void )
1107
+ void __init posix_cputimers_init_work (void )
1089
1108
{
1090
- struct task_struct * tsk = current ;
1091
- struct k_itimer * timer , * next ;
1092
- unsigned long flags ;
1093
- LIST_HEAD (firing );
1109
+ init_task_work (& current -> posix_cputimers_work .work ,
1110
+ posix_cpu_timers_work );
1111
+ }
1094
1112
1095
- lockdep_assert_irqs_disabled ();
1113
+ /*
1114
+ * Note: All operations on tsk->posix_cputimer_work.scheduled happen either
1115
+ * in hard interrupt context or in task context with interrupts
1116
+ * disabled. Aside of that the writer/reader interaction is always in the
1117
+ * context of the current task, which means they are strict per CPU.
1118
+ */
1119
+ static inline bool posix_cpu_timers_work_scheduled (struct task_struct * tsk )
1120
+ {
1121
+ return tsk -> posix_cputimers_work .scheduled ;
1122
+ }
1096
1123
1097
- /*
1098
- * The fast path checks that there are no expired thread or thread
1099
- * group timers. If that's so, just return.
1100
- */
1101
- if (!fastpath_timer_check (tsk ))
1124
+ static inline void __run_posix_cpu_timers (struct task_struct * tsk )
1125
+ {
1126
+ if (WARN_ON_ONCE (tsk -> posix_cputimers_work .scheduled ))
1102
1127
return ;
1103
1128
1104
- lockdep_posixtimer_enter ();
1105
- if (!lock_task_sighand (tsk , & flags )) {
1106
- lockdep_posixtimer_exit ();
1107
- return ;
1129
+ /* Schedule task work to actually expire the timers */
1130
+ tsk -> posix_cputimers_work .scheduled = true;
1131
+ task_work_add (tsk , & tsk -> posix_cputimers_work .work , TWA_RESUME );
1132
+ }
1133
+
1134
+ static inline bool posix_cpu_timers_enable_work (struct task_struct * tsk ,
1135
+ unsigned long start )
1136
+ {
1137
+ bool ret = true;
1138
+
1139
+ /*
1140
+ * On !RT kernels interrupts are disabled while collecting expired
1141
+ * timers, so no tick can happen and the fast path check can be
1142
+ * reenabled without further checks.
1143
+ */
1144
+ if (!IS_ENABLED (CONFIG_PREEMPT_RT )) {
1145
+ tsk -> posix_cputimers_work .scheduled = false;
1146
+ return true;
1108
1147
}
1148
+
1109
1149
/*
1110
- * Here we take off tsk->signal->cpu_timers[N] and
1111
- * tsk->cpu_timers[N] all the timers that are firing, and
1112
- * put them on the firing list.
1150
+ * On RT enabled kernels ticks can happen while the expired timers
1151
+ * are collected under sighand lock. But any tick which observes
1152
+ * the CPUTIMERS_WORK_SCHEDULED bit set, does not run the fastpath
1153
+ * checks. So reenabling the tick work has do be done carefully:
1154
+ *
1155
+ * Disable interrupts and run the fast path check if jiffies have
1156
+ * advanced since the collecting of expired timers started. If
1157
+ * jiffies have not advanced or the fast path check did not find
1158
+ * newly expired timers, reenable the fast path check in the timer
1159
+ * interrupt. If there are newly expired timers, return false and
1160
+ * let the collection loop repeat.
1113
1161
*/
1114
- check_thread_timers (tsk , & firing );
1162
+ local_irq_disable ();
1163
+ if (start != jiffies && fastpath_timer_check (tsk ))
1164
+ ret = false;
1165
+ else
1166
+ tsk -> posix_cputimers_work .scheduled = false;
1167
+ local_irq_enable ();
1168
+
1169
+ return ret ;
1170
+ }
1171
+ #else /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */
1172
+ static inline void __run_posix_cpu_timers (struct task_struct * tsk )
1173
+ {
1174
+ lockdep_posixtimer_enter ();
1175
+ handle_posix_cpu_timers (tsk );
1176
+ lockdep_posixtimer_exit ();
1177
+ }
1178
+
1179
+ static inline bool posix_cpu_timers_work_scheduled (struct task_struct * tsk )
1180
+ {
1181
+ return false;
1182
+ }
1183
+
1184
+ static inline bool posix_cpu_timers_enable_work (struct task_struct * tsk ,
1185
+ unsigned long start )
1186
+ {
1187
+ return true;
1188
+ }
1189
+ #endif /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */
1190
+
1191
+ static void handle_posix_cpu_timers (struct task_struct * tsk )
1192
+ {
1193
+ struct k_itimer * timer , * next ;
1194
+ unsigned long flags , start ;
1195
+ LIST_HEAD (firing );
1196
+
1197
+ if (!lock_task_sighand (tsk , & flags ))
1198
+ return ;
1115
1199
1116
- check_process_timers (tsk , & firing );
1200
+ do {
1201
+ /*
1202
+ * On RT locking sighand lock does not disable interrupts,
1203
+ * so this needs to be careful vs. ticks. Store the current
1204
+ * jiffies value.
1205
+ */
1206
+ start = READ_ONCE (jiffies );
1207
+ barrier ();
1208
+
1209
+ /*
1210
+ * Here we take off tsk->signal->cpu_timers[N] and
1211
+ * tsk->cpu_timers[N] all the timers that are firing, and
1212
+ * put them on the firing list.
1213
+ */
1214
+ check_thread_timers (tsk , & firing );
1215
+
1216
+ check_process_timers (tsk , & firing );
1217
+
1218
+ /*
1219
+ * The above timer checks have updated the exipry cache and
1220
+ * because nothing can have queued or modified timers after
1221
+ * sighand lock was taken above it is guaranteed to be
1222
+ * consistent. So the next timer interrupt fastpath check
1223
+ * will find valid data.
1224
+ *
1225
+ * If timer expiry runs in the timer interrupt context then
1226
+ * the loop is not relevant as timers will be directly
1227
+ * expired in interrupt context. The stub function below
1228
+ * returns always true which allows the compiler to
1229
+ * optimize the loop out.
1230
+ *
1231
+ * If timer expiry is deferred to task work context then
1232
+ * the following rules apply:
1233
+ *
1234
+ * - On !RT kernels no tick can have happened on this CPU
1235
+ * after sighand lock was acquired because interrupts are
1236
+ * disabled. So reenabling task work before dropping
1237
+ * sighand lock and reenabling interrupts is race free.
1238
+ *
1239
+ * - On RT kernels ticks might have happened but the tick
1240
+ * work ignored posix CPU timer handling because the
1241
+ * CPUTIMERS_WORK_SCHEDULED bit is set. Reenabling work
1242
+ * must be done very carefully including a check whether
1243
+ * ticks have happened since the start of the timer
1244
+ * expiry checks. posix_cpu_timers_enable_work() takes
1245
+ * care of that and eventually lets the expiry checks
1246
+ * run again.
1247
+ */
1248
+ } while (!posix_cpu_timers_enable_work (tsk , start ));
1117
1249
1118
1250
/*
1119
- * We must release these locks before taking any timer's lock.
1251
+ * We must release sighand lock before taking any timer's lock.
1120
1252
* There is a potential race with timer deletion here, as the
1121
1253
* siglock now protects our private firing list. We have set
1122
1254
* the firing flag in each timer, so that a deletion attempt
@@ -1134,6 +1266,13 @@ void run_posix_cpu_timers(void)
1134
1266
list_for_each_entry_safe (timer , next , & firing , it .cpu .elist ) {
1135
1267
int cpu_firing ;
1136
1268
1269
+ /*
1270
+ * spin_lock() is sufficient here even independent of the
1271
+ * expiry context. If expiry happens in hard interrupt
1272
+ * context it's obvious. For task work context it's safe
1273
+ * because all other operations on timer::it_lock happen in
1274
+ * task context (syscall or exit).
1275
+ */
1137
1276
spin_lock (& timer -> it_lock );
1138
1277
list_del_init (& timer -> it .cpu .elist );
1139
1278
cpu_firing = timer -> it .cpu .firing ;
@@ -1147,7 +1286,34 @@ void run_posix_cpu_timers(void)
1147
1286
cpu_timer_fire (timer );
1148
1287
spin_unlock (& timer -> it_lock );
1149
1288
}
1150
- lockdep_posixtimer_exit ();
1289
+ }
1290
+
1291
+ /*
1292
+ * This is called from the timer interrupt handler. The irq handler has
1293
+ * already updated our counts. We need to check if any timers fire now.
1294
+ * Interrupts are disabled.
1295
+ */
1296
+ void run_posix_cpu_timers (void )
1297
+ {
1298
+ struct task_struct * tsk = current ;
1299
+
1300
+ lockdep_assert_irqs_disabled ();
1301
+
1302
+ /*
1303
+ * If the actual expiry is deferred to task work context and the
1304
+ * work is already scheduled there is no point to do anything here.
1305
+ */
1306
+ if (posix_cpu_timers_work_scheduled (tsk ))
1307
+ return ;
1308
+
1309
+ /*
1310
+ * The fast path checks that there are no expired thread or thread
1311
+ * group timers. If that's so, just return.
1312
+ */
1313
+ if (!fastpath_timer_check (tsk ))
1314
+ return ;
1315
+
1316
+ __run_posix_cpu_timers (tsk );
1151
1317
}
1152
1318
1153
1319
/*
0 commit comments