Skip to content

Commit e30960e

Browse files
committed
rcu: Let non-offloaded idle CPUs with callbacks defer tick
When a CPU goes idle, rcu_needs_cpu() is invoked to determine whether or not RCU needs the scheduler-clock tick to keep interrupting. Right now, RCU keeps the tick on for a given idle CPU if there are any non-offloaded callbacks queued on that CPU. But if all of these callbacks are waiting for a grace period to finish, there is no point in scheduling a tick before that grace period has any reasonable chance of completing. This commit therefore delays the tick in the case where all the callbacks are waiting for a specific grace period to elapse. In theory, this should result in a 50-70% reduction in RCU-induced scheduling-clock ticks on mostly-idle CPUs. In practice, TBD. /bin/bash: fm: command not found Signed-off-by: Paul E. McKenney <[email protected]> Cc: Peter Zijlstra <[email protected]>
1 parent 54b55b5 commit e30960e

File tree

4 files changed

+43
-15
lines changed

4 files changed

+43
-15
lines changed

include/linux/rcutiny.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ static inline void rcu_softirq_qs(void)
133133
rcu_tasks_qs(current, (preempt)); \
134134
} while (0)
135135

136-
static inline int rcu_needs_cpu(void)
136+
static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
137137
{
138138
return 0;
139139
}

include/linux/rcutree.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
void rcu_softirq_qs(void);
2121
void rcu_note_context_switch(bool preempt);
22-
int rcu_needs_cpu(void);
22+
int rcu_needs_cpu(u64 basemono, u64 *nextevt);
2323
void rcu_cpu_stall_reset(void);
2424

2525
/*

kernel/rcu/tree.c

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -676,12 +676,40 @@ void __rcu_irq_enter_check_tick(void)
676676
* scheduler-clock interrupt.
677677
*
678678
* Just check whether or not this CPU has non-offloaded RCU callbacks
679-
* queued.
679+
* queued that need immediate attention.
680680
*/
681-
int rcu_needs_cpu(void)
681+
int rcu_needs_cpu(u64 basemono, u64 *nextevt)
682682
{
683-
return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist) &&
684-
!rcu_rdp_is_offloaded(this_cpu_ptr(&rcu_data));
683+
unsigned long j;
684+
unsigned long jlast;
685+
unsigned long jwait;
686+
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
687+
struct rcu_segcblist *rsclp = &rdp->cblist;
688+
689+
// Disabled, empty, or offloaded means nothing to do.
690+
if (!rcu_segcblist_is_enabled(rsclp) ||
691+
rcu_segcblist_empty(rsclp) || rcu_rdp_is_offloaded(rdp)) {
692+
*nextevt = KTIME_MAX;
693+
return 0;
694+
}
695+
696+
// Callbacks ready to invoke or that have not already been
697+
// assigned a grace period need immediate attention.
698+
if (!rcu_segcblist_segempty(rsclp, RCU_DONE_TAIL) ||
699+
!rcu_segcblist_segempty(rsclp, RCU_NEXT_TAIL))
700+
return 1;
701+
702+
// There are callbacks waiting for some later grace period.
703+
// Wait for about a grace period or two since the last tick, at which
704+
// point there is high probability that this CPU will need to do some
705+
// work for RCU.
706+
j = jiffies;
707+
jlast = __this_cpu_read(rcu_data.last_sched_clock);
708+
jwait = READ_ONCE(jiffies_till_first_fqs) + READ_ONCE(jiffies_till_next_fqs) + 1;
709+
if (time_after(j, jlast + jwait))
710+
return 1;
711+
*nextevt = basemono + TICK_NSEC * (jlast + jwait - j);
712+
return 0;
685713
}
686714

687715
/*
@@ -2324,11 +2352,9 @@ void rcu_sched_clock_irq(int user)
23242352
{
23252353
unsigned long j;
23262354

2327-
if (IS_ENABLED(CONFIG_PROVE_RCU)) {
2328-
j = jiffies;
2329-
WARN_ON_ONCE(time_before(j, __this_cpu_read(rcu_data.last_sched_clock)));
2330-
__this_cpu_write(rcu_data.last_sched_clock, j);
2331-
}
2355+
j = jiffies;
2356+
WARN_ON_ONCE(time_before(j, __this_cpu_read(rcu_data.last_sched_clock)));
2357+
__this_cpu_write(rcu_data.last_sched_clock, j);
23322358
trace_rcu_utilization(TPS("Start scheduler-tick"));
23332359
lockdep_assert_irqs_disabled();
23342360
raw_cpu_inc(rcu_data.ticks_this_gp);

kernel/time/tick-sched.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -784,7 +784,7 @@ static inline bool local_timer_softirq_pending(void)
784784

785785
static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
786786
{
787-
u64 basemono, next_tick, delta, expires;
787+
u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
788788
unsigned long basejiff;
789789
unsigned int seq;
790790

@@ -807,7 +807,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
807807
* minimal delta which brings us back to this place
808808
* immediately. Lather, rinse and repeat...
809809
*/
810-
if (rcu_needs_cpu() || arch_needs_cpu() ||
810+
if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
811811
irq_work_needs_cpu() || local_timer_softirq_pending()) {
812812
next_tick = basemono + TICK_NSEC;
813813
} else {
@@ -818,8 +818,10 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
818818
* disabled this also looks at the next expiring
819819
* hrtimer.
820820
*/
821-
next_tick = get_next_timer_interrupt(basejiff, basemono);
822-
ts->next_timer = next_tick;
821+
next_tmr = get_next_timer_interrupt(basejiff, basemono);
822+
ts->next_timer = next_tmr;
823+
/* Take the next rcu event into account */
824+
next_tick = next_rcu < next_tmr ? next_rcu : next_tmr;
823825
}
824826

825827
/*

0 commit comments

Comments
 (0)