@@ -1796,13 +1796,28 @@ static inline bool rq_has_pinned_tasks(struct rq *rq)
1796
1796
*/
1797
1797
static inline bool is_cpu_allowed (struct task_struct * p , int cpu )
1798
1798
{
1799
+ /* When not in the task's cpumask, no point in looking further. */
1799
1800
if (!cpumask_test_cpu (cpu , p -> cpus_ptr ))
1800
1801
return false;
1801
1802
1802
- if (is_per_cpu_kthread (p ) || is_migration_disabled (p ))
1803
+ /* migrate_disabled() must be allowed to finish. */
1804
+ if (is_migration_disabled (p ))
1803
1805
return cpu_online (cpu );
1804
1806
1805
- return cpu_active (cpu );
1807
+ /* Non kernel threads are not allowed during either online or offline. */
1808
+ if (!(p -> flags & PF_KTHREAD ))
1809
+ return cpu_active (cpu );
1810
+
1811
+ /* KTHREAD_IS_PER_CPU is always allowed. */
1812
+ if (kthread_is_per_cpu (p ))
1813
+ return cpu_online (cpu );
1814
+
1815
+ /* Regular kernel threads don't get to stay during offline. */
1816
+ if (cpu_rq (cpu )-> balance_push )
1817
+ return false;
1818
+
1819
+ /* But are allowed during online. */
1820
+ return cpu_online (cpu );
1806
1821
}
1807
1822
1808
1823
/*
@@ -2327,7 +2342,9 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
2327
2342
2328
2343
if (p -> flags & PF_KTHREAD || is_migration_disabled (p )) {
2329
2344
/*
2330
- * Kernel threads are allowed on online && !active CPUs.
2345
+ * Kernel threads are allowed on online && !active CPUs,
2346
+ * however, during cpu-hot-unplug, even these might get pushed
2347
+ * away if not KTHREAD_IS_PER_CPU.
2331
2348
*
2332
2349
* Specifically, migration_disabled() tasks must not fail the
2333
2350
* cpumask_any_and_distribute() pick below, esp. so on
@@ -2371,16 +2388,6 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
2371
2388
2372
2389
__do_set_cpus_allowed (p , new_mask , flags );
2373
2390
2374
- if (p -> flags & PF_KTHREAD ) {
2375
- /*
2376
- * For kernel threads that do indeed end up on online &&
2377
- * !active we want to ensure they are strict per-CPU threads.
2378
- */
2379
- WARN_ON (cpumask_intersects (new_mask , cpu_online_mask ) &&
2380
- !cpumask_intersects (new_mask , cpu_active_mask ) &&
2381
- p -> nr_cpus_allowed != 1 );
2382
- }
2383
-
2384
2391
return affine_move_task (rq , p , & rf , dest_cpu , flags );
2385
2392
2386
2393
out :
@@ -3121,6 +3128,13 @@ bool cpus_share_cache(int this_cpu, int that_cpu)
3121
3128
3122
3129
static inline bool ttwu_queue_cond (int cpu , int wake_flags )
3123
3130
{
3131
+ /*
3132
+ * Do not complicate things with the async wake_list while the CPU is
3133
+ * in hotplug state.
3134
+ */
3135
+ if (!cpu_active (cpu ))
3136
+ return false;
3137
+
3124
3138
/*
3125
3139
* If the CPU does not share cache, then queue the task on the
3126
3140
* remote rqs wakelist to avoid accessing remote data.
@@ -7276,8 +7290,14 @@ static void balance_push(struct rq *rq)
7276
7290
/*
7277
7291
* Both the cpu-hotplug and stop task are in this case and are
7278
7292
* required to complete the hotplug process.
7293
+ *
7294
+ * XXX: the idle task does not match kthread_is_per_cpu() due to
7295
+ * histerical raisins.
7279
7296
*/
7280
- if (is_per_cpu_kthread (push_task ) || is_migration_disabled (push_task )) {
7297
+ if (rq -> idle == push_task ||
7298
+ ((push_task -> flags & PF_KTHREAD ) && kthread_is_per_cpu (push_task )) ||
7299
+ is_migration_disabled (push_task )) {
7300
+
7281
7301
/*
7282
7302
* If this is the idle task on the outgoing CPU try to wake
7283
7303
* up the hotplug control thread which might wait for the
@@ -7309,7 +7329,7 @@ static void balance_push(struct rq *rq)
7309
7329
/*
7310
7330
* At this point need_resched() is true and we'll take the loop in
7311
7331
* schedule(). The next pick is obviously going to be the stop task
7312
- * which is_per_cpu_kthread () and will push this task away.
7332
+ * which kthread_is_per_cpu () and will push this task away.
7313
7333
*/
7314
7334
raw_spin_lock (& rq -> lock );
7315
7335
}
@@ -7320,10 +7340,13 @@ static void balance_push_set(int cpu, bool on)
7320
7340
struct rq_flags rf ;
7321
7341
7322
7342
rq_lock_irqsave (rq , & rf );
7323
- if (on )
7343
+ rq -> balance_push = on ;
7344
+ if (on ) {
7345
+ WARN_ON_ONCE (rq -> balance_callback );
7324
7346
rq -> balance_callback = & balance_push_callback ;
7325
- else
7347
+ } else if ( rq -> balance_callback == & balance_push_callback ) {
7326
7348
rq -> balance_callback = NULL ;
7349
+ }
7327
7350
rq_unlock_irqrestore (rq , & rf );
7328
7351
}
7329
7352
@@ -7441,6 +7464,10 @@ int sched_cpu_activate(unsigned int cpu)
7441
7464
struct rq * rq = cpu_rq (cpu );
7442
7465
struct rq_flags rf ;
7443
7466
7467
+ /*
7468
+ * Make sure that when the hotplug state machine does a roll-back
7469
+ * we clear balance_push. Ideally that would happen earlier...
7470
+ */
7444
7471
balance_push_set (cpu , false);
7445
7472
7446
7473
#ifdef CONFIG_SCHED_SMT
@@ -7483,17 +7510,27 @@ int sched_cpu_deactivate(unsigned int cpu)
7483
7510
int ret ;
7484
7511
7485
7512
set_cpu_active (cpu , false);
7513
+
7514
+ /*
7515
+ * From this point forward, this CPU will refuse to run any task that
7516
+ * is not: migrate_disable() or KTHREAD_IS_PER_CPU, and will actively
7517
+ * push those tasks away until this gets cleared, see
7518
+ * sched_cpu_dying().
7519
+ */
7520
+ balance_push_set (cpu , true);
7521
+
7486
7522
/*
7487
- * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
7488
- * users of this state to go away such that all new such users will
7489
- * observe it.
7523
+ * We've cleared cpu_active_mask / set balance_push, wait for all
7524
+ * preempt-disabled and RCU users of this state to go away such that
7525
+ * all new such users will observe it.
7526
+ *
7527
+ * Specifically, we rely on ttwu to no longer target this CPU, see
7528
+ * ttwu_queue_cond() and is_cpu_allowed().
7490
7529
*
7491
7530
* Do sync before park smpboot threads to take care the rcu boost case.
7492
7531
*/
7493
7532
synchronize_rcu ();
7494
7533
7495
- balance_push_set (cpu , true);
7496
-
7497
7534
rq_lock_irqsave (rq , & rf );
7498
7535
if (rq -> rd ) {
7499
7536
update_rq_clock (rq );
@@ -7574,6 +7611,25 @@ static void calc_load_migrate(struct rq *rq)
7574
7611
atomic_long_add (delta , & calc_load_tasks );
7575
7612
}
7576
7613
7614
+ static void dump_rq_tasks (struct rq * rq , const char * loglvl )
7615
+ {
7616
+ struct task_struct * g , * p ;
7617
+ int cpu = cpu_of (rq );
7618
+
7619
+ lockdep_assert_held (& rq -> lock );
7620
+
7621
+ printk ("%sCPU%d enqueued tasks (%u total):\n" , loglvl , cpu , rq -> nr_running );
7622
+ for_each_process_thread (g , p ) {
7623
+ if (task_cpu (p ) != cpu )
7624
+ continue ;
7625
+
7626
+ if (!task_on_rq_queued (p ))
7627
+ continue ;
7628
+
7629
+ printk ("%s\tpid: %d, name: %s\n" , loglvl , p -> pid , p -> comm );
7630
+ }
7631
+ }
7632
+
7577
7633
int sched_cpu_dying (unsigned int cpu )
7578
7634
{
7579
7635
struct rq * rq = cpu_rq (cpu );
@@ -7583,9 +7639,18 @@ int sched_cpu_dying(unsigned int cpu)
7583
7639
sched_tick_stop (cpu );
7584
7640
7585
7641
rq_lock_irqsave (rq , & rf );
7586
- BUG_ON (rq -> nr_running != 1 || rq_has_pinned_tasks (rq ));
7642
+ if (rq -> nr_running != 1 || rq_has_pinned_tasks (rq )) {
7643
+ WARN (true, "Dying CPU not properly vacated!" );
7644
+ dump_rq_tasks (rq , KERN_WARNING );
7645
+ }
7587
7646
rq_unlock_irqrestore (rq , & rf );
7588
7647
7648
+ /*
7649
+ * Now that the CPU is offline, make sure we're welcome
7650
+ * to new tasks once we come back up.
7651
+ */
7652
+ balance_push_set (cpu , false);
7653
+
7589
7654
calc_load_migrate (rq );
7590
7655
update_max_interval ();
7591
7656
nohz_balance_exit_idle (rq );
0 commit comments