Skip to content

Commit d119357

Browse files
committed
rcu-tasks: Treat only synchronous grace periods urgently
The performance requirements on RCU Tasks, and in particular on RCU Tasks Trace, have evolved over time as the workloads have evolved. The current implementation is designed to provide low grace-period latencies, and also to accommodate short-duration floods of callbacks. However, current workloads can also provide a constant background callback-queuing rate of a few hundred call_rcu_tasks_trace() invocations per second. This results in continuous back-to-back RCU Tasks Trace grace periods, which in turn can consume the better part of 10% of a CPU. One could take the attitude that there are several tens of other CPUs on the systems running such workloads, but energy efficiency is a thing. On these systems, although asynchronous grace-period requests happen every few milliseconds, synchronous grace-period requests are quite rare. This commit therefore arrranges for grace periods to be initiated immediately in response to calls to synchronize_rcu_tasks*() and also to calls to synchronize_rcu_mult() that are passed one of the call_rcu_tasks*() functions. These are recognized by the tell-tale wakeme_after_rcu callback function. In other cases, callbacks are gathered up for up to about 250 milliseconds before a grace period is initiated. This results in more than an order of magnitude reduction in RCU Tasks Trace grace periods, with corresponding reduction in consumption of CPU time. Reported-by: Alexei Starovoitov <[email protected]> Reported-by: Martin KaFai Lau <[email protected]> Signed-off-by: Paul E. McKenney <[email protected]>
1 parent 06c2afb commit d119357

File tree

1 file changed

+73
-8
lines changed

1 file changed

+73
-8
lines changed

kernel/rcu/tasks.h

Lines changed: 73 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
2525
* @cblist: Callback list.
2626
* @lock: Lock protecting per-CPU callback list.
2727
* @rtp_jiffies: Jiffies counter value for statistics.
28+
* @lazy_timer: Timer to unlazify callbacks.
29+
* @urgent_gp: Number of additional non-lazy grace periods.
2830
* @rtp_n_lock_retries: Rough lock-contention statistic.
2931
* @rtp_work: Work queue for invoking callbacks.
3032
* @rtp_irq_work: IRQ work queue for deferred wakeups.
@@ -38,6 +40,8 @@ struct rcu_tasks_percpu {
3840
raw_spinlock_t __private lock;
3941
unsigned long rtp_jiffies;
4042
unsigned long rtp_n_lock_retries;
43+
struct timer_list lazy_timer;
44+
unsigned int urgent_gp;
4145
struct work_struct rtp_work;
4246
struct irq_work rtp_irq_work;
4347
struct rcu_head barrier_q_head;
@@ -51,7 +55,6 @@ struct rcu_tasks_percpu {
5155
* @cbs_wait: RCU wait allowing a new callback to get kthread's attention.
5256
* @cbs_gbl_lock: Lock protecting callback list.
5357
* @tasks_gp_mutex: Mutex protecting grace period, needed during mid-boot dead zone.
54-
* @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
5558
* @gp_func: This flavor's grace-period-wait function.
5659
* @gp_state: Grace period's most recent state transition (debugging).
5760
* @gp_sleep: Per-grace-period sleep to prevent CPU-bound looping.
@@ -61,6 +64,8 @@ struct rcu_tasks_percpu {
6164
* @tasks_gp_seq: Number of grace periods completed since boot.
6265
* @n_ipis: Number of IPIs sent to encourage grace periods to end.
6366
* @n_ipis_fails: Number of IPI-send failures.
67+
* @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
68+
* @lazy_jiffies: Number of jiffies to allow callbacks to be lazy.
6469
* @pregp_func: This flavor's pre-grace-period function (optional).
6570
* @pertask_func: This flavor's per-task scan function (optional).
6671
* @postscan_func: This flavor's post-task scan function (optional).
@@ -92,6 +97,7 @@ struct rcu_tasks {
9297
unsigned long n_ipis;
9398
unsigned long n_ipis_fails;
9499
struct task_struct *kthread_ptr;
100+
unsigned long lazy_jiffies;
95101
rcu_tasks_gp_func_t gp_func;
96102
pregp_func_t pregp_func;
97103
pertask_func_t pertask_func;
@@ -127,6 +133,7 @@ static struct rcu_tasks rt_name = \
127133
.gp_func = gp, \
128134
.call_func = call, \
129135
.rtpcpu = &rt_name ## __percpu, \
136+
.lazy_jiffies = DIV_ROUND_UP(HZ, 4), \
130137
.name = n, \
131138
.percpu_enqueue_shift = order_base_2(CONFIG_NR_CPUS), \
132139
.percpu_enqueue_lim = 1, \
@@ -276,6 +283,33 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
276283
data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim), rcu_task_cb_adjust);
277284
}
278285

286+
// Compute wakeup time for lazy callback timer.
287+
static unsigned long rcu_tasks_lazy_time(struct rcu_tasks *rtp)
288+
{
289+
return jiffies + rtp->lazy_jiffies;
290+
}
291+
292+
// Timer handler that unlazifies lazy callbacks.
293+
static void call_rcu_tasks_generic_timer(struct timer_list *tlp)
294+
{
295+
unsigned long flags;
296+
bool needwake = false;
297+
struct rcu_tasks *rtp;
298+
struct rcu_tasks_percpu *rtpcp = from_timer(rtpcp, tlp, lazy_timer);
299+
300+
rtp = rtpcp->rtpp;
301+
raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
302+
if (!rcu_segcblist_empty(&rtpcp->cblist) && rtp->lazy_jiffies) {
303+
if (!rtpcp->urgent_gp)
304+
rtpcp->urgent_gp = 1;
305+
needwake = true;
306+
mod_timer(&rtpcp->lazy_timer, rcu_tasks_lazy_time(rtp));
307+
}
308+
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
309+
if (needwake)
310+
rcuwait_wake_up(&rtp->cbs_wait);
311+
}
312+
279313
// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic().
280314
static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp)
281315
{
@@ -292,6 +326,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
292326
{
293327
int chosen_cpu;
294328
unsigned long flags;
329+
bool havekthread = smp_load_acquire(&rtp->kthread_ptr);
295330
int ideal_cpu;
296331
unsigned long j;
297332
bool needadjust = false;
@@ -321,7 +356,15 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
321356
cblist_init_generic(rtp);
322357
raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
323358
}
324-
needwake = rcu_segcblist_empty(&rtpcp->cblist);
359+
needwake = func == wakeme_after_rcu;
360+
if (havekthread && !timer_pending(&rtpcp->lazy_timer)) {
361+
if (rtp->lazy_jiffies)
362+
mod_timer(&rtpcp->lazy_timer, rcu_tasks_lazy_time(rtp));
363+
else
364+
needwake = rcu_segcblist_empty(&rtpcp->cblist);
365+
}
366+
if (needwake)
367+
rtpcp->urgent_gp = 3;
325368
rcu_segcblist_enqueue(&rtpcp->cblist, rhp);
326369
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
327370
if (unlikely(needadjust)) {
@@ -415,9 +458,14 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
415458
}
416459
rcu_segcblist_advance(&rtpcp->cblist, rcu_seq_current(&rtp->tasks_gp_seq));
417460
(void)rcu_segcblist_accelerate(&rtpcp->cblist, rcu_seq_snap(&rtp->tasks_gp_seq));
418-
if (rcu_segcblist_pend_cbs(&rtpcp->cblist))
461+
if (rtpcp->urgent_gp > 0 && rcu_segcblist_pend_cbs(&rtpcp->cblist)) {
462+
if (rtp->lazy_jiffies)
463+
rtpcp->urgent_gp--;
419464
needgpcb |= 0x3;
420-
if (!rcu_segcblist_empty(&rtpcp->cblist))
465+
} else if (rcu_segcblist_empty(&rtpcp->cblist)) {
466+
rtpcp->urgent_gp = 0;
467+
}
468+
if (rcu_segcblist_ready_cbs(&rtpcp->cblist))
421469
needgpcb |= 0x1;
422470
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
423471
}
@@ -549,11 +597,19 @@ static void rcu_tasks_one_gp(struct rcu_tasks *rtp, bool midboot)
549597
// RCU-tasks kthread that detects grace periods and invokes callbacks.
550598
static int __noreturn rcu_tasks_kthread(void *arg)
551599
{
600+
int cpu;
552601
struct rcu_tasks *rtp = arg;
553602

603+
for_each_possible_cpu(cpu) {
604+
struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
605+
606+
timer_setup(&rtpcp->lazy_timer, call_rcu_tasks_generic_timer, 0);
607+
rtpcp->urgent_gp = 1;
608+
}
609+
554610
/* Run on housekeeping CPUs by default. Sysadm can move if desired. */
555611
housekeeping_affine(current, HK_TYPE_RCU);
556-
WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start!
612+
smp_store_release(&rtp->kthread_ptr, current); // Let GPs start!
557613

558614
/*
559615
* Each pass through the following loop makes one check for
@@ -635,23 +691,32 @@ static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
635691
{
636692
int cpu;
637693
bool havecbs = false;
694+
bool haveurgent = false;
695+
bool haveurgentcbs = false;
638696

639697
for_each_possible_cpu(cpu) {
640698
struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
641699

642-
if (!data_race(rcu_segcblist_empty(&rtpcp->cblist))) {
700+
if (!data_race(rcu_segcblist_empty(&rtpcp->cblist)))
643701
havecbs = true;
702+
if (data_race(rtpcp->urgent_gp))
703+
haveurgent = true;
704+
if (!data_race(rcu_segcblist_empty(&rtpcp->cblist)) && data_race(rtpcp->urgent_gp))
705+
haveurgentcbs = true;
706+
if (havecbs && haveurgent && haveurgentcbs)
644707
break;
645-
}
646708
}
647-
pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c %s\n",
709+
pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c%c%c l:%lu %s\n",
648710
rtp->kname,
649711
tasks_gp_state_getname(rtp), data_race(rtp->gp_state),
650712
jiffies - data_race(rtp->gp_jiffies),
651713
data_race(rcu_seq_current(&rtp->tasks_gp_seq)),
652714
data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis),
653715
".k"[!!data_race(rtp->kthread_ptr)],
654716
".C"[havecbs],
717+
".u"[haveurgent],
718+
".U"[haveurgentcbs],
719+
rtp->lazy_jiffies,
655720
s);
656721
}
657722
#endif // #ifndef CONFIG_TINY_RCU

0 commit comments

Comments
 (0)