Skip to content

Commit 7d0c9c5

Browse files
committed
rcu-tasks: Avoid IPIing userspace/idle tasks if kernel is so built
Systems running CPU-bound real-time task do not want IPIs sent to CPUs executing nohz_full userspace tasks. Battery-powered systems don't want IPIs sent to idle CPUs in low-power mode. Unfortunately, RCU tasks trace can and will send such IPIs in some cases. Both of these situations occur only when the target CPU is in RCU dyntick-idle mode, in other words, when RCU is not watching the target CPU. This suggests that CPUs in dyntick-idle mode should use memory barriers in outermost invocations of rcu_read_lock_trace() and rcu_read_unlock_trace(), which would allow the RCU tasks trace grace period to directly read out the target CPU's read-side state. One challenge is that RCU tasks trace is not targeting a specific CPU, but rather a task. And that task could switch from one CPU to another at any time. This commit therefore uses try_invoke_on_locked_down_task() and checks for task_curr() in trc_inspect_reader_notrunning(). When this condition holds, the target task is running and cannot move. If CONFIG_TASKS_TRACE_RCU_READ_MB=y, the new rcu_dynticks_zero_in_eqs() function can be used to check if the specified integer (in this case, t->trc_reader_nesting) is zero while the target CPU remains in that same dyntick-idle sojourn. If so, the target task is in a quiescent state. If not, trc_read_check_handler() must indicate failure so that the grace-period kthread can take appropriate action or retry after an appropriate delay, as the case may be. With this change, given CONFIG_TASKS_TRACE_RCU_READ_MB=y, if a given CPU remains idle or a given task continues executing in nohz_full mode, the RCU tasks trace grace-period kthread will detect this without the need to send an IPI. Suggested-by: Mathieu Desnoyers <[email protected]> Signed-off-by: Paul E. McKenney <[email protected]>
1 parent 9ae58d7 commit 7d0c9c5

File tree

5 files changed

+72
-10
lines changed

5 files changed

+72
-10
lines changed

kernel/rcu/rcu.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,7 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type,
501501
#endif
502502

503503
#ifdef CONFIG_TINY_RCU
504+
static inline bool rcu_dynticks_zero_in_eqs(int cpu, int *vp) { return false; }
504505
static inline unsigned long rcu_get_gp_seq(void) { return 0; }
505506
static inline unsigned long rcu_exp_batches_completed(void) { return 0; }
506507
static inline unsigned long
@@ -510,6 +511,7 @@ static inline void show_rcu_gp_kthreads(void) { }
510511
static inline int rcu_get_gp_kthreads_prio(void) { return 0; }
511512
static inline void rcu_fwd_progress_check(unsigned long j) { }
512513
#else /* #ifdef CONFIG_TINY_RCU */
514+
bool rcu_dynticks_zero_in_eqs(int cpu, int *vp);
513515
unsigned long rcu_get_gp_seq(void);
514516
unsigned long rcu_exp_batches_completed(void);
515517
unsigned long srcu_batches_completed(struct srcu_struct *sp);

kernel/rcu/tasks.h

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -806,22 +806,38 @@ static void trc_read_check_handler(void *t_in)
806806
/* Callback function for scheduler to check locked-down task. */
807807
static bool trc_inspect_reader(struct task_struct *t, void *arg)
808808
{
809-
if (task_curr(t))
810-
return false; // It is running, so decline to inspect it.
809+
int cpu = task_cpu(t);
810+
bool in_qs = false;
811+
812+
if (task_curr(t)) {
813+
// If no chance of heavyweight readers, do it the hard way.
814+
if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
815+
return false;
816+
817+
// If heavyweight readers are enabled on the remote task,
818+
// we can inspect its state despite its currently running.
819+
// However, we cannot safely change its state.
820+
if (!rcu_dynticks_zero_in_eqs(cpu, &t->trc_reader_nesting))
821+
return false; // No quiescent state, do it the hard way.
822+
in_qs = true;
823+
} else {
824+
in_qs = likely(!t->trc_reader_nesting);
825+
}
811826

812827
// Mark as checked. Because this is called from the grace-period
813828
// kthread, also remove the task from the holdout list.
814829
t->trc_reader_checked = true;
815830
trc_del_holdout(t);
816831

817-
// If the task is in a read-side critical section, set up its
818-
// its state so that it will awaken the grace-period kthread upon
819-
// exit from that critical section.
820-
if (unlikely(t->trc_reader_nesting)) {
821-
atomic_inc(&trc_n_readers_need_end); // One more to wait on.
822-
WARN_ON_ONCE(t->trc_reader_special.b.need_qs);
823-
WRITE_ONCE(t->trc_reader_special.b.need_qs, true);
824-
}
832+
if (in_qs)
833+
return true; // Already in quiescent state, done!!!
834+
835+
// The task is in a read-side critical section, so set up its
836+
// state so that it will awaken the grace-period kthread upon exit
837+
// from that critical section.
838+
atomic_inc(&trc_n_readers_need_end); // One more to wait on.
839+
WARN_ON_ONCE(t->trc_reader_special.b.need_qs);
840+
WRITE_ONCE(t->trc_reader_special.b.need_qs, true);
825841
return true;
826842
}
827843

kernel/rcu/tree.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ static void rcu_dynticks_eqs_enter(void)
252252
* critical sections, and we also must force ordering with the
253253
* next idle sojourn.
254254
*/
255+
rcu_dynticks_task_trace_enter(); // Before ->dynticks update!
255256
seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
256257
// RCU is no longer watching. Better be in extended quiescent state!
257258
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
@@ -278,6 +279,7 @@ static void rcu_dynticks_eqs_exit(void)
278279
*/
279280
seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
280281
// RCU is now watching. Better not be in an extended quiescent state!
282+
rcu_dynticks_task_trace_exit(); // After ->dynticks update!
281283
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
282284
!(seq & RCU_DYNTICK_CTRL_CTR));
283285
if (seq & RCU_DYNTICK_CTRL_MASK) {
@@ -349,6 +351,28 @@ static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)
349351
return snap != rcu_dynticks_snap(rdp);
350352
}
351353

354+
/*
355+
* Return true if the referenced integer is zero while the specified
356+
* CPU remains within a single extended quiescent state.
357+
*/
358+
bool rcu_dynticks_zero_in_eqs(int cpu, int *vp)
359+
{
360+
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
361+
int snap;
362+
363+
// If not quiescent, force back to earlier extended quiescent state.
364+
snap = atomic_read(&rdp->dynticks) & ~(RCU_DYNTICK_CTRL_MASK |
365+
RCU_DYNTICK_CTRL_CTR);
366+
367+
smp_rmb(); // Order ->dynticks and *vp reads.
368+
if (READ_ONCE(*vp))
369+
return false; // Non-zero, so report failure;
370+
smp_rmb(); // Order *vp read and ->dynticks re-read.
371+
372+
// If still in the same extended quiescent state, we are good!
373+
return snap == (atomic_read(&rdp->dynticks) & ~RCU_DYNTICK_CTRL_MASK);
374+
}
375+
352376
/*
353377
* Set the special (bottom) bit of the specified CPU so that it
354378
* will take special action (such as flushing its TLB) on the

kernel/rcu/tree.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,8 @@ static void rcu_bind_gp_kthread(void);
454454
static bool rcu_nohz_full_cpu(void);
455455
static void rcu_dynticks_task_enter(void);
456456
static void rcu_dynticks_task_exit(void);
457+
static void rcu_dynticks_task_trace_enter(void);
458+
static void rcu_dynticks_task_trace_exit(void);
457459

458460
/* Forward declarations for tree_stall.h */
459461
static void record_gp_stall_check_time(void);

kernel/rcu/tree_plugin.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2552,3 +2552,21 @@ static void rcu_dynticks_task_exit(void)
25522552
WRITE_ONCE(current->rcu_tasks_idle_cpu, -1);
25532553
#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
25542554
}
2555+
2556+
/* Turn on heavyweight RCU tasks trace readers on idle/user entry. */
2557+
static void rcu_dynticks_task_trace_enter(void)
2558+
{
2559+
#ifdef CONFIG_TASKS_RCU_TRACE
2560+
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
2561+
current->trc_reader_special.b.need_mb = true;
2562+
#endif /* #ifdef CONFIG_TASKS_RCU_TRACE */
2563+
}
2564+
2565+
/* Turn off heavyweight RCU tasks trace readers on idle/user exit. */
2566+
static void rcu_dynticks_task_trace_exit(void)
2567+
{
2568+
#ifdef CONFIG_TASKS_RCU_TRACE
2569+
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
2570+
current->trc_reader_special.b.need_mb = false;
2571+
#endif /* #ifdef CONFIG_TASKS_RCU_TRACE */
2572+
}

0 commit comments

Comments
 (0)