@@ -154,6 +154,9 @@ struct worker_pool {
154
154
155
155
unsigned long watchdog_ts ; /* L: watchdog timestamp */
156
156
157
+ /* The current concurrency level. */
158
+ atomic_t nr_running ;
159
+
157
160
struct list_head worklist ; /* L: list of pending works */
158
161
159
162
int nr_workers ; /* L: total number of workers */
@@ -177,19 +180,12 @@ struct worker_pool {
177
180
struct hlist_node hash_node ; /* PL: unbound_pool_hash node */
178
181
int refcnt ; /* PL: refcnt for unbound pools */
179
182
180
- /*
181
- * The current concurrency level. As it's likely to be accessed
182
- * from other CPUs during try_to_wake_up(), put it in a separate
183
- * cacheline.
184
- */
185
- atomic_t nr_running ____cacheline_aligned_in_smp ;
186
-
187
183
/*
188
184
* Destruction of pool is RCU protected to allow dereferences
189
185
* from get_work_pool().
190
186
*/
191
187
struct rcu_head rcu ;
192
- } ____cacheline_aligned_in_smp ;
188
+ };
193
189
194
190
/*
195
191
* The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS
@@ -868,8 +864,17 @@ void wq_worker_running(struct task_struct *task)
868
864
869
865
if (!worker -> sleeping )
870
866
return ;
867
+
868
+ /*
869
+ * If preempted by unbind_workers() between the WORKER_NOT_RUNNING check
870
+ * and the nr_running increment below, we may ruin the nr_running reset
871
+ * and leave with an unexpected pool->nr_running == 1 on the newly unbound
872
+ * pool. Protect against such race.
873
+ */
874
+ preempt_disable ();
871
875
if (!(worker -> flags & WORKER_NOT_RUNNING ))
872
876
atomic_inc (& worker -> pool -> nr_running );
877
+ preempt_enable ();
873
878
worker -> sleeping = 0 ;
874
879
}
875
880
@@ -878,8 +883,7 @@ void wq_worker_running(struct task_struct *task)
878
883
* @task: task going to sleep
879
884
*
880
885
* This function is called from schedule() when a busy worker is
881
- * going to sleep. Preemption needs to be disabled to protect ->sleeping
882
- * assignment.
886
+ * going to sleep.
883
887
*/
884
888
void wq_worker_sleeping (struct task_struct * task )
885
889
{
@@ -903,6 +907,16 @@ void wq_worker_sleeping(struct task_struct *task)
903
907
worker -> sleeping = 1 ;
904
908
raw_spin_lock_irq (& pool -> lock );
905
909
910
+ /*
911
+ * Recheck in case unbind_workers() preempted us. We don't
912
+ * want to decrement nr_running after the worker is unbound
913
+ * and nr_running has been reset.
914
+ */
915
+ if (worker -> flags & WORKER_NOT_RUNNING ) {
916
+ raw_spin_unlock_irq (& pool -> lock );
917
+ return ;
918
+ }
919
+
906
920
/*
907
921
* The counterpart of the following dec_and_test, implied mb,
908
922
* worklist not empty test sequence is in insert_work().
@@ -1531,7 +1545,8 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
1531
1545
* @work: work to queue
1532
1546
*
1533
1547
* We queue the work to a specific CPU, the caller must ensure it
1534
- * can't go away.
1548
+ * can't go away. Callers that fail to ensure that the specified
1549
+ * CPU cannot go away will execute on a randomly chosen CPU.
1535
1550
*
1536
1551
* Return: %false if @work was already on a queue, %true otherwise.
1537
1552
*/
@@ -1811,14 +1826,8 @@ static void worker_enter_idle(struct worker *worker)
1811
1826
if (too_many_workers (pool ) && !timer_pending (& pool -> idle_timer ))
1812
1827
mod_timer (& pool -> idle_timer , jiffies + IDLE_WORKER_TIMEOUT );
1813
1828
1814
- /*
1815
- * Sanity check nr_running. Because unbind_workers() releases
1816
- * pool->lock between setting %WORKER_UNBOUND and zapping
1817
- * nr_running, the warning may trigger spuriously. Check iff
1818
- * unbind is not in progress.
1819
- */
1820
- WARN_ON_ONCE (!(pool -> flags & POOL_DISASSOCIATED ) &&
1821
- pool -> nr_workers == pool -> nr_idle &&
1829
+ /* Sanity check nr_running. */
1830
+ WARN_ON_ONCE (pool -> nr_workers == pool -> nr_idle &&
1822
1831
atomic_read (& pool -> nr_running ));
1823
1832
}
1824
1833
@@ -4979,38 +4988,22 @@ static void unbind_workers(int cpu)
4979
4988
/*
4980
4989
* We've blocked all attach/detach operations. Make all workers
4981
4990
* unbound and set DISASSOCIATED. Before this, all workers
4982
- * except for the ones which are still executing works from
4983
- * before the last CPU down must be on the cpu. After
4984
- * this, they may become diasporas.
4991
+ * must be on the cpu. After this, they may become diasporas.
4992
+ * And the preemption disabled section in their sched callbacks
4993
+ * are guaranteed to see WORKER_UNBOUND since the code here
4994
+ * is on the same cpu.
4985
4995
*/
4986
4996
for_each_pool_worker (worker , pool )
4987
4997
worker -> flags |= WORKER_UNBOUND ;
4988
4998
4989
4999
pool -> flags |= POOL_DISASSOCIATED ;
4990
5000
4991
- raw_spin_unlock_irq (& pool -> lock );
4992
-
4993
- for_each_pool_worker (worker , pool ) {
4994
- kthread_set_per_cpu (worker -> task , -1 );
4995
- WARN_ON_ONCE (set_cpus_allowed_ptr (worker -> task , cpu_possible_mask ) < 0 );
4996
- }
4997
-
4998
- mutex_unlock (& wq_pool_attach_mutex );
4999
-
5000
5001
/*
5001
- * Call schedule() so that we cross rq->lock and thus can
5002
- * guarantee sched callbacks see the %WORKER_UNBOUND flag.
5003
- * This is necessary as scheduler callbacks may be invoked
5004
- * from other cpus.
5005
- */
5006
- schedule ();
5007
-
5008
- /*
5009
- * Sched callbacks are disabled now. Zap nr_running.
5010
- * After this, nr_running stays zero and need_more_worker()
5011
- * and keep_working() are always true as long as the
5012
- * worklist is not empty. This pool now behaves as an
5013
- * unbound (in terms of concurrency management) pool which
5002
+ * The handling of nr_running in sched callbacks are disabled
5003
+ * now. Zap nr_running. After this, nr_running stays zero and
5004
+ * need_more_worker() and keep_working() are always true as
5005
+ * long as the worklist is not empty. This pool now behaves as
5006
+ * an unbound (in terms of concurrency management) pool which
5014
5007
* are served by workers tied to the pool.
5015
5008
*/
5016
5009
atomic_set (& pool -> nr_running , 0 );
@@ -5020,9 +5013,16 @@ static void unbind_workers(int cpu)
5020
5013
* worker blocking could lead to lengthy stalls. Kick off
5021
5014
* unbound chain execution of currently pending work items.
5022
5015
*/
5023
- raw_spin_lock_irq (& pool -> lock );
5024
5016
wake_up_worker (pool );
5017
+
5025
5018
raw_spin_unlock_irq (& pool -> lock );
5019
+
5020
+ for_each_pool_worker (worker , pool ) {
5021
+ kthread_set_per_cpu (worker -> task , -1 );
5022
+ WARN_ON_ONCE (set_cpus_allowed_ptr (worker -> task , cpu_possible_mask ) < 0 );
5023
+ }
5024
+
5025
+ mutex_unlock (& wq_pool_attach_mutex );
5026
5026
}
5027
5027
}
5028
5028
@@ -5058,17 +5058,6 @@ static void rebind_workers(struct worker_pool *pool)
5058
5058
for_each_pool_worker (worker , pool ) {
5059
5059
unsigned int worker_flags = worker -> flags ;
5060
5060
5061
- /*
5062
- * A bound idle worker should actually be on the runqueue
5063
- * of the associated CPU for local wake-ups targeting it to
5064
- * work. Kick all idle workers so that they migrate to the
5065
- * associated CPU. Doing this in the same loop as
5066
- * replacing UNBOUND with REBOUND is safe as no worker will
5067
- * be bound before @pool->lock is released.
5068
- */
5069
- if (worker_flags & WORKER_IDLE )
5070
- wake_up_process (worker -> task );
5071
-
5072
5061
/*
5073
5062
* We want to clear UNBOUND but can't directly call
5074
5063
* worker_clr_flags() or adjust nr_running. Atomically
0 commit comments