Skip to content

Commit b0cb56c

Browse files
committed
Merge tag 'kernel-6.15-rc1.tasklist_lock' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull tasklist_lock optimizations from Christian Brauner: "According to the performance testbots this brings a 23% performance increase when creating new processes: - Reduce tasklist_lock hold time on exit: - Perform add_device_randomness() without tasklist_lock - Perform free_pid() calls outside of tasklist_lock - Drop irq disablement around pidmap_lock - Add some tasklist_lock asserts - Call flush_sigqueue() lockless by changing release_task() - Don't pointlessly clear TIF_SIGPENDING in __exit_signal() -> clear_tsk_thread_flag()" * tag 'kernel-6.15-rc1.tasklist_lock' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: pid: drop irq disablement around pidmap_lock pid: perform free_pid() calls outside of tasklist_lock pid: sprinkle tasklist_lock asserts exit: hoist get_pid() in release_task() outside of tasklist_lock exit: perform add_device_randomness() without tasklist_lock exit: kill the pointless __exit_signal()->clear_tsk_thread_flag(TIF_SIGPENDING) exit: change the release_task() paths to call flush_sigqueue() lockless
2 parents 56e7a8b + 0a7713a commit b0cb56c

File tree

4 files changed

+93
-66
lines changed

4 files changed

+93
-66
lines changed

include/linux/pid.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,9 @@ extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type);
101101
* these helpers must be called with the tasklist_lock write-held.
102102
*/
103103
extern void attach_pid(struct task_struct *task, enum pid_type);
104-
extern void detach_pid(struct task_struct *task, enum pid_type);
105-
extern void change_pid(struct task_struct *task, enum pid_type,
106-
struct pid *pid);
104+
void detach_pid(struct pid **pids, struct task_struct *task, enum pid_type);
105+
void change_pid(struct pid **pids, struct task_struct *task, enum pid_type,
106+
struct pid *pid);
107107
extern void exchange_tids(struct task_struct *task, struct task_struct *old);
108108
extern void transfer_pid(struct task_struct *old, struct task_struct *new,
109109
enum pid_type);
@@ -129,6 +129,7 @@ extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
129129
extern struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
130130
size_t set_tid_size);
131131
extern void free_pid(struct pid *pid);
132+
void free_pids(struct pid **pids);
132133
extern void disable_pid_allocation(struct pid_namespace *ns);
133134

134135
/*

kernel/exit.c

Lines changed: 35 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -123,14 +123,22 @@ static __init int kernel_exit_sysfs_init(void)
123123
late_initcall(kernel_exit_sysfs_init);
124124
#endif
125125

126-
static void __unhash_process(struct task_struct *p, bool group_dead)
126+
/*
127+
* For things release_task() would like to do *after* tasklist_lock is released.
128+
*/
129+
struct release_task_post {
130+
struct pid *pids[PIDTYPE_MAX];
131+
};
132+
133+
static void __unhash_process(struct release_task_post *post, struct task_struct *p,
134+
bool group_dead)
127135
{
128136
nr_threads--;
129-
detach_pid(p, PIDTYPE_PID);
137+
detach_pid(post->pids, p, PIDTYPE_PID);
130138
if (group_dead) {
131-
detach_pid(p, PIDTYPE_TGID);
132-
detach_pid(p, PIDTYPE_PGID);
133-
detach_pid(p, PIDTYPE_SID);
139+
detach_pid(post->pids, p, PIDTYPE_TGID);
140+
detach_pid(post->pids, p, PIDTYPE_PGID);
141+
detach_pid(post->pids, p, PIDTYPE_SID);
134142

135143
list_del_rcu(&p->tasks);
136144
list_del_init(&p->sibling);
@@ -142,7 +150,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
142150
/*
143151
* This function expects the tasklist_lock write-locked.
144152
*/
145-
static void __exit_signal(struct task_struct *tsk)
153+
static void __exit_signal(struct release_task_post *post, struct task_struct *tsk)
146154
{
147155
struct signal_struct *sig = tsk->signal;
148156
bool group_dead = thread_group_leader(tsk);
@@ -175,9 +183,6 @@ static void __exit_signal(struct task_struct *tsk)
175183
sig->curr_target = next_thread(tsk);
176184
}
177185

178-
add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
179-
sizeof(unsigned long long));
180-
181186
/*
182187
* Accumulate here the counters for all threads as they die. We could
183188
* skip the group leader because it is the last user of signal_struct,
@@ -198,23 +203,15 @@ static void __exit_signal(struct task_struct *tsk)
198203
task_io_accounting_add(&sig->ioac, &tsk->ioac);
199204
sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
200205
sig->nr_threads--;
201-
__unhash_process(tsk, group_dead);
206+
__unhash_process(post, tsk, group_dead);
202207
write_sequnlock(&sig->stats_lock);
203208

204-
/*
205-
* Do this under ->siglock, we can race with another thread
206-
* doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
207-
*/
208-
flush_sigqueue(&tsk->pending);
209209
tsk->sighand = NULL;
210210
spin_unlock(&sighand->siglock);
211211

212212
__cleanup_sighand(sighand);
213-
clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
214-
if (group_dead) {
215-
flush_sigqueue(&sig->shared_pending);
213+
if (group_dead)
216214
tty_kref_put(tty);
217-
}
218215
}
219216

220217
static void delayed_put_task_struct(struct rcu_head *rhp)
@@ -240,10 +237,13 @@ void __weak release_thread(struct task_struct *dead_task)
240237

241238
void release_task(struct task_struct *p)
242239
{
240+
struct release_task_post post;
243241
struct task_struct *leader;
244242
struct pid *thread_pid;
245243
int zap_leader;
246244
repeat:
245+
memset(&post, 0, sizeof(post));
246+
247247
/* don't need to get the RCU readlock here - the process is dead and
248248
* can't be modifying its own credentials. But shut RCU-lockdep up */
249249
rcu_read_lock();
@@ -253,10 +253,11 @@ void release_task(struct task_struct *p)
253253
pidfs_exit(p);
254254
cgroup_release(p);
255255

256+
thread_pid = get_pid(p->thread_pid);
257+
256258
write_lock_irq(&tasklist_lock);
257259
ptrace_release_task(p);
258-
thread_pid = get_pid(p->thread_pid);
259-
__exit_signal(p);
260+
__exit_signal(&post, p);
260261

261262
/*
262263
* If we are the last non-leader member of the thread
@@ -280,7 +281,20 @@ void release_task(struct task_struct *p)
280281
write_unlock_irq(&tasklist_lock);
281282
proc_flush_pid(thread_pid);
282283
put_pid(thread_pid);
284+
add_device_randomness(&p->se.sum_exec_runtime,
285+
sizeof(p->se.sum_exec_runtime));
286+
free_pids(post.pids);
283287
release_thread(p);
288+
/*
289+
* This task was already removed from the process/thread/pid lists
290+
* and lock_task_sighand(p) can't succeed. Nobody else can touch
291+
* ->pending or, if group dead, signal->shared_pending. We can call
292+
* flush_sigqueue() lockless.
293+
*/
294+
flush_sigqueue(&p->pending);
295+
if (thread_group_leader(p))
296+
flush_sigqueue(&p->signal->shared_pending);
297+
284298
put_task_struct_rcu_user(p);
285299

286300
p = leader;

kernel/pid.c

Lines changed: 45 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -88,20 +88,6 @@ struct pid_namespace init_pid_ns = {
8888
};
8989
EXPORT_SYMBOL_GPL(init_pid_ns);
9090

91-
/*
92-
* Note: disable interrupts while the pidmap_lock is held as an
93-
* interrupt might come in and do read_lock(&tasklist_lock).
94-
*
95-
* If we don't disable interrupts there is a nasty deadlock between
96-
* detach_pid()->free_pid() and another cpu that does
97-
* spin_lock(&pidmap_lock) followed by an interrupt routine that does
98-
* read_lock(&tasklist_lock);
99-
*
100-
* After we clean up the tasklist_lock and know there are no
101-
* irq handlers that take it we can leave the interrupts enabled.
102-
* For now it is easier to be safe than to prove it can't happen.
103-
*/
104-
10591
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
10692
seqcount_spinlock_t pidmap_lock_seq = SEQCNT_SPINLOCK_ZERO(pidmap_lock_seq, &pidmap_lock);
10793

@@ -128,11 +114,11 @@ static void delayed_put_pid(struct rcu_head *rhp)
128114

129115
void free_pid(struct pid *pid)
130116
{
131-
/* We can be called with write_lock_irq(&tasklist_lock) held */
132117
int i;
133-
unsigned long flags;
134118

135-
spin_lock_irqsave(&pidmap_lock, flags);
119+
lockdep_assert_not_held(&tasklist_lock);
120+
121+
spin_lock(&pidmap_lock);
136122
for (i = 0; i <= pid->level; i++) {
137123
struct upid *upid = pid->numbers + i;
138124
struct pid_namespace *ns = upid->ns;
@@ -155,11 +141,23 @@ void free_pid(struct pid *pid)
155141
idr_remove(&ns->idr, upid->nr);
156142
}
157143
pidfs_remove_pid(pid);
158-
spin_unlock_irqrestore(&pidmap_lock, flags);
144+
spin_unlock(&pidmap_lock);
159145

160146
call_rcu(&pid->rcu, delayed_put_pid);
161147
}
162148

149+
void free_pids(struct pid **pids)
150+
{
151+
int tmp;
152+
153+
/*
154+
* This can batch pidmap_lock.
155+
*/
156+
for (tmp = PIDTYPE_MAX; --tmp >= 0; )
157+
if (pids[tmp])
158+
free_pid(pids[tmp]);
159+
}
160+
163161
struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
164162
size_t set_tid_size)
165163
{
@@ -211,7 +209,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
211209
}
212210

213211
idr_preload(GFP_KERNEL);
214-
spin_lock_irq(&pidmap_lock);
212+
spin_lock(&pidmap_lock);
215213

216214
if (tid) {
217215
nr = idr_alloc(&tmp->idr, NULL, tid,
@@ -238,7 +236,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
238236
nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
239237
pid_max, GFP_ATOMIC);
240238
}
241-
spin_unlock_irq(&pidmap_lock);
239+
spin_unlock(&pidmap_lock);
242240
idr_preload_end();
243241

244242
if (nr < 0) {
@@ -272,7 +270,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
272270

273271
upid = pid->numbers + ns->level;
274272
idr_preload(GFP_KERNEL);
275-
spin_lock_irq(&pidmap_lock);
273+
spin_lock(&pidmap_lock);
276274
if (!(ns->pid_allocated & PIDNS_ADDING))
277275
goto out_unlock;
278276
pidfs_add_pid(pid);
@@ -281,18 +279,18 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
281279
idr_replace(&upid->ns->idr, pid, upid->nr);
282280
upid->ns->pid_allocated++;
283281
}
284-
spin_unlock_irq(&pidmap_lock);
282+
spin_unlock(&pidmap_lock);
285283
idr_preload_end();
286284

287285
return pid;
288286

289287
out_unlock:
290-
spin_unlock_irq(&pidmap_lock);
288+
spin_unlock(&pidmap_lock);
291289
idr_preload_end();
292290
put_pid_ns(ns);
293291

294292
out_free:
295-
spin_lock_irq(&pidmap_lock);
293+
spin_lock(&pidmap_lock);
296294
while (++i <= ns->level) {
297295
upid = pid->numbers + i;
298296
idr_remove(&upid->ns->idr, upid->nr);
@@ -302,17 +300,17 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
302300
if (ns->pid_allocated == PIDNS_ADDING)
303301
idr_set_cursor(&ns->idr, 0);
304302

305-
spin_unlock_irq(&pidmap_lock);
303+
spin_unlock(&pidmap_lock);
306304

307305
kmem_cache_free(ns->pid_cachep, pid);
308306
return ERR_PTR(retval);
309307
}
310308

311309
void disable_pid_allocation(struct pid_namespace *ns)
312310
{
313-
spin_lock_irq(&pidmap_lock);
311+
spin_lock(&pidmap_lock);
314312
ns->pid_allocated &= ~PIDNS_ADDING;
315-
spin_unlock_irq(&pidmap_lock);
313+
spin_unlock(&pidmap_lock);
316314
}
317315

318316
struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
@@ -339,17 +337,23 @@ static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type)
339337
*/
340338
void attach_pid(struct task_struct *task, enum pid_type type)
341339
{
342-
struct pid *pid = *task_pid_ptr(task, type);
340+
struct pid *pid;
341+
342+
lockdep_assert_held_write(&tasklist_lock);
343+
344+
pid = *task_pid_ptr(task, type);
343345
hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]);
344346
}
345347

346-
static void __change_pid(struct task_struct *task, enum pid_type type,
347-
struct pid *new)
348+
static void __change_pid(struct pid **pids, struct task_struct *task,
349+
enum pid_type type, struct pid *new)
348350
{
349-
struct pid **pid_ptr = task_pid_ptr(task, type);
350-
struct pid *pid;
351+
struct pid **pid_ptr, *pid;
351352
int tmp;
352353

354+
lockdep_assert_held_write(&tasklist_lock);
355+
356+
pid_ptr = task_pid_ptr(task, type);
353357
pid = *pid_ptr;
354358

355359
hlist_del_rcu(&task->pid_links[type]);
@@ -364,18 +368,19 @@ static void __change_pid(struct task_struct *task, enum pid_type type,
364368
if (pid_has_task(pid, tmp))
365369
return;
366370

367-
free_pid(pid);
371+
WARN_ON(pids[type]);
372+
pids[type] = pid;
368373
}
369374

370-
void detach_pid(struct task_struct *task, enum pid_type type)
375+
void detach_pid(struct pid **pids, struct task_struct *task, enum pid_type type)
371376
{
372-
__change_pid(task, type, NULL);
377+
__change_pid(pids, task, type, NULL);
373378
}
374379

375-
void change_pid(struct task_struct *task, enum pid_type type,
380+
void change_pid(struct pid **pids, struct task_struct *task, enum pid_type type,
376381
struct pid *pid)
377382
{
378-
__change_pid(task, type, pid);
383+
__change_pid(pids, task, type, pid);
379384
attach_pid(task, type);
380385
}
381386

@@ -386,6 +391,8 @@ void exchange_tids(struct task_struct *left, struct task_struct *right)
386391
struct hlist_head *head1 = &pid1->tasks[PIDTYPE_PID];
387392
struct hlist_head *head2 = &pid2->tasks[PIDTYPE_PID];
388393

394+
lockdep_assert_held_write(&tasklist_lock);
395+
389396
/* Swap the single entry tid lists */
390397
hlists_swap_heads_rcu(head1, head2);
391398

@@ -403,6 +410,7 @@ void transfer_pid(struct task_struct *old, struct task_struct *new,
403410
enum pid_type type)
404411
{
405412
WARN_ON_ONCE(type == PIDTYPE_PID);
413+
lockdep_assert_held_write(&tasklist_lock);
406414
hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]);
407415
}
408416

0 commit comments

Comments
 (0)