Skip to content

Commit 134874e

Browse files
committed
workqueue: Allow cancel_work_sync() and disable_work() from atomic contexts on BH work items
Now that work_grab_pending() can always grab the PENDING bit without sleeping, the only thing that prevents allowing cancel_work_sync() of a BH work item from an atomic context is the flushing of the in-flight instance. When we're flushing a BH work item for cancel_work_sync(), we know that the work item is not queued and must be executing in a BH context, which means that it's safe to busy-wait for its completion from a non-hardirq atomic context. This patch updates __flush_work() so that it busy-waits when flushing a BH work item for cancel_work_sync(). might_sleep() is pushed from start_flush_work() to its callers - when operating on a BH work item, __cancel_work_sync() now enforces !in_hardirq() instead of might_sleep(). This allows cancel_work_sync() and disable_work() to be called from non-hardirq atomic contexts on BH work items. v3: In __flush_work(), test WORK_OFFQ_BH to tell whether a work item being canceled can be busy waited instead of making start_flush_work() return the pool. (Lai) v2: Lai pointed out that __flush_work() was accessing pool->flags outside the RCU critical section protecting the pool pointer. Fix it by testing and remembering the result inside the RCU critical section. Signed-off-by: Tejun Heo <[email protected]> Reviewed-by: Lai Jiangshan <[email protected]>
1 parent 456a78e commit 134874e

File tree

1 file changed

+55
-19
lines changed

1 file changed

+55
-19
lines changed

kernel/workqueue.c

Lines changed: 55 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4105,8 +4105,6 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
41054105
struct pool_workqueue *pwq;
41064106
struct workqueue_struct *wq;
41074107

4108-
might_sleep();
4109-
41104108
rcu_read_lock();
41114109
pool = get_work_pool(work);
41124110
if (!pool) {
@@ -4158,20 +4156,49 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
41584156
static bool __flush_work(struct work_struct *work, bool from_cancel)
41594157
{
41604158
struct wq_barrier barr;
4159+
unsigned long data;
41614160

41624161
if (WARN_ON(!wq_online))
41634162
return false;
41644163

41654164
if (WARN_ON(!work->func))
41664165
return false;
41674166

4168-
if (start_flush_work(work, &barr, from_cancel)) {
4169-
wait_for_completion(&barr.done);
4170-
destroy_work_on_stack(&barr.work);
4171-
return true;
4172-
} else {
4167+
if (!start_flush_work(work, &barr, from_cancel))
41734168
return false;
4169+
4170+
/*
4171+
* start_flush_work() returned %true. If @from_cancel is set, we know
4172+
* that @work must have been executing during start_flush_work() and
4173+
* can't currently be queued. Its data must contain OFFQ bits. If @work
4174+
* was queued on a BH workqueue, we also know that it was running in the
4175+
* BH context and thus can be busy-waited.
4176+
*/
4177+
data = *work_data_bits(work);
4178+
if (from_cancel &&
4179+
!WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_BH)) {
4180+
/*
4181+
* On RT, prevent a live lock when %current preempted soft
4182+
* interrupt processing or prevents ksoftirqd from running by
4183+
* keeping flipping BH. If the BH work item runs on a different
4184+
* CPU then this has no effect other than doing the BH
4185+
* disable/enable dance for nothing. This is copied from
4186+
* kernel/softirq.c::tasklet_unlock_spin_wait().
4187+
*/
4188+
while (!try_wait_for_completion(&barr.done)) {
4189+
if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
4190+
local_bh_disable();
4191+
local_bh_enable();
4192+
} else {
4193+
cpu_relax();
4194+
}
4195+
}
4196+
} else {
4197+
wait_for_completion(&barr.done);
41744198
}
4199+
4200+
destroy_work_on_stack(&barr.work);
4201+
return true;
41754202
}
41764203

41774204
/**
@@ -4187,6 +4214,7 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
41874214
*/
41884215
bool flush_work(struct work_struct *work)
41894216
{
4217+
might_sleep();
41904218
return __flush_work(work, false);
41914219
}
41924220
EXPORT_SYMBOL_GPL(flush_work);
@@ -4276,6 +4304,11 @@ static bool __cancel_work_sync(struct work_struct *work, u32 cflags)
42764304

42774305
ret = __cancel_work(work, cflags | WORK_CANCEL_DISABLE);
42784306

4307+
if (*work_data_bits(work) & WORK_OFFQ_BH)
4308+
WARN_ON_ONCE(in_hardirq());
4309+
else
4310+
might_sleep();
4311+
42794312
/*
42804313
* Skip __flush_work() during early boot when we know that @work isn't
42814314
* executing. This allows canceling during early boot.
@@ -4302,19 +4335,19 @@ EXPORT_SYMBOL(cancel_work);
43024335
* cancel_work_sync - cancel a work and wait for it to finish
43034336
* @work: the work to cancel
43044337
*
4305-
* Cancel @work and wait for its execution to finish. This function
4306-
* can be used even if the work re-queues itself or migrates to
4307-
* another workqueue. On return from this function, @work is
4308-
* guaranteed to be not pending or executing on any CPU.
4338+
* Cancel @work and wait for its execution to finish. This function can be used
4339+
* even if the work re-queues itself or migrates to another workqueue. On return
4340+
* from this function, @work is guaranteed to be not pending or executing on any
4341+
* CPU as long as there aren't racing enqueues.
43094342
*
4310-
* cancel_work_sync(&delayed_work->work) must not be used for
4311-
* delayed_work's. Use cancel_delayed_work_sync() instead.
4343+
* cancel_work_sync(&delayed_work->work) must not be used for delayed_work's.
4344+
* Use cancel_delayed_work_sync() instead.
43124345
*
4313-
* The caller must ensure that the workqueue on which @work was last
4314-
* queued can't be destroyed before this function returns.
4346+
* Must be called from a sleepable context if @work was last queued on a non-BH
4347+
* workqueue. Can also be called from non-hardirq atomic contexts including BH
4348+
* if @work was last queued on a BH workqueue.
43154349
*
4316-
* Return:
4317-
* %true if @work was pending, %false otherwise.
4350+
* Returns %true if @work was pending, %false otherwise.
43184351
*/
43194352
bool cancel_work_sync(struct work_struct *work)
43204353
{
@@ -4384,8 +4417,11 @@ EXPORT_SYMBOL_GPL(disable_work);
43844417
* Similar to disable_work() but also wait for @work to finish if currently
43854418
* executing.
43864419
*
4387-
* Must be called from a sleepable context. Returns %true if @work was pending,
4388-
* %false otherwise.
4420+
* Must be called from a sleepable context if @work was last queued on a non-BH
4421+
* workqueue. Can also be called from non-hardirq atomic contexts including BH
4422+
* if @work was last queued on a BH workqueue.
4423+
*
4424+
* Returns %true if @work was pending, %false otherwise.
43894425
*/
43904426
bool disable_work_sync(struct work_struct *work)
43914427
{

0 commit comments

Comments
 (0)