Skip to content

Commit f501513

Browse files
herbertxgregkh
authored andcommitted
padata: Replace delayed timer with immediate workqueue in padata_reorder
[ Upstream commit 6fc4dbc ] The function padata_reorder will use a timer when it cannot progress while completed jobs are outstanding (pd->reorder_objects > 0). This is suboptimal as if we do end up using the timer then it would have introduced a gratuitous delay of one second. In fact we can easily distinguish between whether completed jobs are outstanding and whether we can make progress. All we have to do is look at the next pqueue list. This patch does that by replacing pd->processed with pd->cpu so that the next pqueue is more accessible. A work queue is used instead of the original try_again to avoid hogging the CPU. Note that we don't bother removing the work queue in padata_flush_queues because the whole premise is broken. You cannot flush async crypto requests so it makes no sense to even try. A subsequent patch will fix it by replacing it with a ref counting scheme. Signed-off-by: Herbert Xu <[email protected]> [dj: - adjust context - corrected setup_timer -> timer_setup to delete hunk - skip padata_flush_queues() hunk, function already removed in 4.9] Signed-off-by: Daniel Jordan <[email protected]> Signed-off-by: Sasha Levin <[email protected]>
1 parent 251716d commit f501513

File tree

2 files changed

+22
-86
lines changed

2 files changed

+22
-86
lines changed

include/linux/padata.h

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#include <linux/workqueue.h>
2525
#include <linux/spinlock.h>
2626
#include <linux/list.h>
27-
#include <linux/timer.h>
2827
#include <linux/notifier.h>
2928
#include <linux/kobject.h>
3029

@@ -85,18 +84,14 @@ struct padata_serial_queue {
8584
* @serial: List to wait for serialization after reordering.
8685
* @pwork: work struct for parallelization.
8786
* @swork: work struct for serialization.
88-
* @pd: Backpointer to the internal control structure.
8987
* @work: work struct for parallelization.
90-
* @reorder_work: work struct for reordering.
9188
* @num_obj: Number of objects that are processed by this cpu.
9289
* @cpu_index: Index of the cpu.
9390
*/
9491
struct padata_parallel_queue {
9592
struct padata_list parallel;
9693
struct padata_list reorder;
97-
struct parallel_data *pd;
9894
struct work_struct work;
99-
struct work_struct reorder_work;
10095
atomic_t num_obj;
10196
int cpu_index;
10297
};
@@ -122,10 +117,10 @@ struct padata_cpumask {
122117
* @reorder_objects: Number of objects waiting in the reorder queues.
123118
* @refcnt: Number of objects holding a reference on this parallel_data.
124119
* @max_seq_nr: Maximal used sequence number.
120+
* @cpu: Next CPU to be processed.
125121
* @cpumask: The cpumasks in use for parallel and serial workers.
122+
* @reorder_work: work struct for reordering.
126123
* @lock: Reorder lock.
127-
* @processed: Number of already processed objects.
128-
* @timer: Reorder timer.
129124
*/
130125
struct parallel_data {
131126
struct padata_instance *pinst;
@@ -134,10 +129,10 @@ struct parallel_data {
134129
atomic_t reorder_objects;
135130
atomic_t refcnt;
136131
atomic_t seq_nr;
132+
int cpu;
137133
struct padata_cpumask cpumask;
134+
struct work_struct reorder_work;
138135
spinlock_t lock ____cacheline_aligned;
139-
unsigned int processed;
140-
struct timer_list timer;
141136
};
142137

143138
/**

kernel/padata.c

Lines changed: 18 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -166,23 +166,12 @@ EXPORT_SYMBOL(padata_do_parallel);
166166
*/
167167
static struct padata_priv *padata_get_next(struct parallel_data *pd)
168168
{
169-
int cpu, num_cpus;
170-
unsigned int next_nr, next_index;
171169
struct padata_parallel_queue *next_queue;
172170
struct padata_priv *padata;
173171
struct padata_list *reorder;
172+
int cpu = pd->cpu;
174173

175-
num_cpus = cpumask_weight(pd->cpumask.pcpu);
176-
177-
/*
178-
* Calculate the percpu reorder queue and the sequence
179-
* number of the next object.
180-
*/
181-
next_nr = pd->processed;
182-
next_index = next_nr % num_cpus;
183-
cpu = padata_index_to_cpu(pd, next_index);
184174
next_queue = per_cpu_ptr(pd->pqueue, cpu);
185-
186175
reorder = &next_queue->reorder;
187176

188177
spin_lock(&reorder->lock);
@@ -193,7 +182,8 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
193182
list_del_init(&padata->list);
194183
atomic_dec(&pd->reorder_objects);
195184

196-
pd->processed++;
185+
pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1,
186+
false);
197187

198188
spin_unlock(&reorder->lock);
199189
goto out;
@@ -216,6 +206,7 @@ static void padata_reorder(struct parallel_data *pd)
216206
struct padata_priv *padata;
217207
struct padata_serial_queue *squeue;
218208
struct padata_instance *pinst = pd->pinst;
209+
struct padata_parallel_queue *next_queue;
219210

220211
/*
221212
* We need to ensure that only one cpu can work on dequeueing of
@@ -247,7 +238,6 @@ static void padata_reorder(struct parallel_data *pd)
247238
* so exit immediately.
248239
*/
249240
if (PTR_ERR(padata) == -ENODATA) {
250-
del_timer(&pd->timer);
251241
spin_unlock_bh(&pd->lock);
252242
return;
253243
}
@@ -266,70 +256,29 @@ static void padata_reorder(struct parallel_data *pd)
266256

267257
/*
268258
* The next object that needs serialization might have arrived to
269-
* the reorder queues in the meantime, we will be called again
270-
* from the timer function if no one else cares for it.
259+
* the reorder queues in the meantime.
271260
*
272-
* Ensure reorder_objects is read after pd->lock is dropped so we see
273-
* an increment from another task in padata_do_serial. Pairs with
261+
* Ensure reorder queue is read after pd->lock is dropped so we see
262+
* new objects from another task in padata_do_serial. Pairs with
274263
* smp_mb__after_atomic in padata_do_serial.
275264
*/
276265
smp_mb();
277-
if (atomic_read(&pd->reorder_objects)
278-
&& !(pinst->flags & PADATA_RESET))
279-
mod_timer(&pd->timer, jiffies + HZ);
280-
else
281-
del_timer(&pd->timer);
282266

283-
return;
267+
next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);
268+
if (!list_empty(&next_queue->reorder.list))
269+
queue_work(pinst->wq, &pd->reorder_work);
284270
}
285271

286272
static void invoke_padata_reorder(struct work_struct *work)
287273
{
288-
struct padata_parallel_queue *pqueue;
289274
struct parallel_data *pd;
290275

291276
local_bh_disable();
292-
pqueue = container_of(work, struct padata_parallel_queue, reorder_work);
293-
pd = pqueue->pd;
277+
pd = container_of(work, struct parallel_data, reorder_work);
294278
padata_reorder(pd);
295279
local_bh_enable();
296280
}
297281

298-
static void padata_reorder_timer(unsigned long arg)
299-
{
300-
struct parallel_data *pd = (struct parallel_data *)arg;
301-
unsigned int weight;
302-
int target_cpu, cpu;
303-
304-
cpu = get_cpu();
305-
306-
/* We don't lock pd here to not interfere with parallel processing
307-
* padata_reorder() calls on other CPUs. We just need any CPU out of
308-
* the cpumask.pcpu set. It would be nice if it's the right one but
309-
* it doesn't matter if we're off to the next one by using an outdated
310-
* pd->processed value.
311-
*/
312-
weight = cpumask_weight(pd->cpumask.pcpu);
313-
target_cpu = padata_index_to_cpu(pd, pd->processed % weight);
314-
315-
/* ensure to call the reorder callback on the correct CPU */
316-
if (cpu != target_cpu) {
317-
struct padata_parallel_queue *pqueue;
318-
struct padata_instance *pinst;
319-
320-
/* The timer function is serialized wrt itself -- no locking
321-
* needed.
322-
*/
323-
pinst = pd->pinst;
324-
pqueue = per_cpu_ptr(pd->pqueue, target_cpu);
325-
queue_work_on(target_cpu, pinst->wq, &pqueue->reorder_work);
326-
} else {
327-
padata_reorder(pd);
328-
}
329-
330-
put_cpu();
331-
}
332-
333282
static void padata_serial_worker(struct work_struct *serial_work)
334283
{
335284
struct padata_serial_queue *squeue;
@@ -383,9 +332,8 @@ void padata_do_serial(struct padata_priv *padata)
383332

384333
cpu = get_cpu();
385334

386-
/* We need to run on the same CPU padata_do_parallel(.., padata, ..)
387-
* was called on -- or, at least, enqueue the padata object into the
388-
* correct per-cpu queue.
335+
/* We need to enqueue the padata object into the correct
336+
* per-cpu queue.
389337
*/
390338
if (cpu != padata->cpu) {
391339
reorder_via_wq = 1;
@@ -395,26 +343,20 @@ void padata_do_serial(struct padata_priv *padata)
395343
pqueue = per_cpu_ptr(pd->pqueue, cpu);
396344

397345
spin_lock(&pqueue->reorder.lock);
398-
atomic_inc(&pd->reorder_objects);
399346
list_add_tail(&padata->list, &pqueue->reorder.list);
347+
atomic_inc(&pd->reorder_objects);
400348
spin_unlock(&pqueue->reorder.lock);
401349

402350
/*
403-
* Ensure the atomic_inc of reorder_objects above is ordered correctly
351+
* Ensure the addition to the reorder list is ordered correctly
404352
* with the trylock of pd->lock in padata_reorder. Pairs with smp_mb
405353
* in padata_reorder.
406354
*/
407355
smp_mb__after_atomic();
408356

409357
put_cpu();
410358

411-
/* If we're running on the wrong CPU, call padata_reorder() via a
412-
* kernel worker.
413-
*/
414-
if (reorder_via_wq)
415-
queue_work_on(cpu, pd->pinst->wq, &pqueue->reorder_work);
416-
else
417-
padata_reorder(pd);
359+
padata_reorder(pd);
418360
}
419361
EXPORT_SYMBOL(padata_do_serial);
420362

@@ -470,14 +412,12 @@ static void padata_init_pqueues(struct parallel_data *pd)
470412
continue;
471413
}
472414

473-
pqueue->pd = pd;
474415
pqueue->cpu_index = cpu_index;
475416
cpu_index++;
476417

477418
__padata_list_init(&pqueue->reorder);
478419
__padata_list_init(&pqueue->parallel);
479420
INIT_WORK(&pqueue->work, padata_parallel_worker);
480-
INIT_WORK(&pqueue->reorder_work, invoke_padata_reorder);
481421
atomic_set(&pqueue->num_obj, 0);
482422
}
483423
}
@@ -505,12 +445,13 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
505445

506446
padata_init_pqueues(pd);
507447
padata_init_squeues(pd);
508-
setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
509448
atomic_set(&pd->seq_nr, -1);
510449
atomic_set(&pd->reorder_objects, 0);
511450
atomic_set(&pd->refcnt, 1);
512451
pd->pinst = pinst;
513452
spin_lock_init(&pd->lock);
453+
pd->cpu = cpumask_first(pcpumask);
454+
INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
514455

515456
return pd;
516457

0 commit comments

Comments
 (0)