Skip to content

Commit 0b0894f

Browse files
committed
Merge tag 'sched_urgent_for_v5.17_rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fix from Borislav Petkov: "Fix task exposure order when forking tasks" * tag 'sched_urgent_for_v5.17_rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched: Fix yet more sched_fork() races
2 parents 6e8e752 + b1e8206 commit 0b0894f

File tree

3 files changed

+35
-16
lines changed

3 files changed

+35
-16
lines changed

include/linux/sched/task.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
5454
extern void init_idle(struct task_struct *idle, int cpu);
5555

5656
extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
57-
extern void sched_post_fork(struct task_struct *p,
58-
struct kernel_clone_args *kargs);
57+
extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs);
58+
extern void sched_post_fork(struct task_struct *p);
5959
extern void sched_dead(struct task_struct *p);
6060

6161
void __noreturn do_task_dead(void);

kernel/fork.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2266,6 +2266,17 @@ static __latent_entropy struct task_struct *copy_process(
22662266
if (retval)
22672267
goto bad_fork_put_pidfd;
22682268

2269+
/*
2270+
* Now that the cgroups are pinned, re-clone the parent cgroup and put
2271+
* the new task on the correct runqueue. All this *before* the task
2272+
* becomes visible.
2273+
*
2274+
* This isn't part of ->can_fork() because while the re-cloning is
2275+
* cgroup specific, it unconditionally needs to place the task on a
2276+
* runqueue.
2277+
*/
2278+
sched_cgroup_fork(p, args);
2279+
22692280
/*
22702281
* From this point on we must avoid any synchronous user-space
22712282
* communication until we take the tasklist-lock. In particular, we do
@@ -2375,7 +2386,7 @@ static __latent_entropy struct task_struct *copy_process(
23752386
fd_install(pidfd, pidfile);
23762387

23772388
proc_fork_connector(p);
2378-
sched_post_fork(p, args);
2389+
sched_post_fork(p);
23792390
cgroup_post_fork(p, args);
23802391
perf_event_fork(p);
23812392

kernel/sched/core.c

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,9 +1214,8 @@ int tg_nop(struct task_group *tg, void *data)
12141214
}
12151215
#endif
12161216

1217-
static void set_load_weight(struct task_struct *p)
1217+
static void set_load_weight(struct task_struct *p, bool update_load)
12181218
{
1219-
bool update_load = !(READ_ONCE(p->__state) & TASK_NEW);
12201219
int prio = p->static_prio - MAX_RT_PRIO;
12211220
struct load_weight *load = &p->se.load;
12221221

@@ -4407,7 +4406,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
44074406
p->static_prio = NICE_TO_PRIO(0);
44084407

44094408
p->prio = p->normal_prio = p->static_prio;
4410-
set_load_weight(p);
4409+
set_load_weight(p, false);
44114410

44124411
/*
44134412
* We don't need the reset flag anymore after the fork. It has
@@ -4425,6 +4424,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
44254424

44264425
init_entity_runnable_average(&p->se);
44274426

4427+
44284428
#ifdef CONFIG_SCHED_INFO
44294429
if (likely(sched_info_on()))
44304430
memset(&p->sched_info, 0, sizeof(p->sched_info));
@@ -4440,18 +4440,23 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
44404440
return 0;
44414441
}
44424442

4443-
void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs)
4443+
void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
44444444
{
44454445
unsigned long flags;
4446-
#ifdef CONFIG_CGROUP_SCHED
4447-
struct task_group *tg;
4448-
#endif
44494446

4447+
/*
4448+
* Because we're not yet on the pid-hash, p->pi_lock isn't strictly
4449+
* required yet, but lockdep gets upset if rules are violated.
4450+
*/
44504451
raw_spin_lock_irqsave(&p->pi_lock, flags);
44514452
#ifdef CONFIG_CGROUP_SCHED
4452-
tg = container_of(kargs->cset->subsys[cpu_cgrp_id],
4453-
struct task_group, css);
4454-
p->sched_task_group = autogroup_task_group(p, tg);
4453+
if (1) {
4454+
struct task_group *tg;
4455+
tg = container_of(kargs->cset->subsys[cpu_cgrp_id],
4456+
struct task_group, css);
4457+
tg = autogroup_task_group(p, tg);
4458+
p->sched_task_group = tg;
4459+
}
44554460
#endif
44564461
rseq_migrate(p);
44574462
/*
@@ -4462,7 +4467,10 @@ void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs)
44624467
if (p->sched_class->task_fork)
44634468
p->sched_class->task_fork(p);
44644469
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4470+
}
44654471

4472+
void sched_post_fork(struct task_struct *p)
4473+
{
44664474
uclamp_post_fork(p);
44674475
}
44684476

@@ -6922,7 +6930,7 @@ void set_user_nice(struct task_struct *p, long nice)
69226930
put_prev_task(rq, p);
69236931

69246932
p->static_prio = NICE_TO_PRIO(nice);
6925-
set_load_weight(p);
6933+
set_load_weight(p, true);
69266934
old_prio = p->prio;
69276935
p->prio = effective_prio(p);
69286936

@@ -7213,7 +7221,7 @@ static void __setscheduler_params(struct task_struct *p,
72137221
*/
72147222
p->rt_priority = attr->sched_priority;
72157223
p->normal_prio = normal_prio(p);
7216-
set_load_weight(p);
7224+
set_load_weight(p, true);
72177225
}
72187226

72197227
/*
@@ -9446,7 +9454,7 @@ void __init sched_init(void)
94469454
#endif
94479455
}
94489456

9449-
set_load_weight(&init_task);
9457+
set_load_weight(&init_task, false);
94509458

94519459
/*
94529460
* The boot idle thread does lazy MMU switching as well:

0 commit comments

Comments
 (0)