Skip to content

Commit d5f177d

Browse files
committed
rcu-tasks: Add an RCU Tasks Trace to simplify protection of tracing hooks
Because RCU does not watch exception early-entry/late-exit, idle-loop, or CPU-hotplug execution, protection of tracing and BPF operations is needlessly complicated. This commit therefore adds a variant of Tasks RCU that: o Has explicit read-side markers to allow finite grace periods in the face of in-kernel loops for PREEMPT=n builds. These markers are rcu_read_lock_trace() and rcu_read_unlock_trace(). o Protects code in the idle loop, exception entry/exit, and CPU-hotplug code paths. In this respect, RCU-tasks trace is similar to SRCU, but with lighter-weight readers. o Avoids expensive read-side instruction, having overhead similar to that of Preemptible RCU. There are of course downsides: o The grace-period code can send IPIs to CPUs, even when those CPUs are in the idle loop or in nohz_full userspace. This is mitigated by later commits. o It is necessary to scan the full tasklist, much as for Tasks RCU. o There is a single callback queue guarded by a single lock, again, much as for Tasks RCU. However, those early use cases that request multiple grace periods in quick succession are expected to do so from a single task, which makes the single lock almost irrelevant. If needed, multiple callback queues can be provided using any number of schemes. Perhaps most important, this variant of RCU does not affect the vanilla flavors, rcu_preempt and rcu_sched. The fact that RCU Tasks Trace readers can operate from idle, offline, and exception entry/exit in no way enables rcu_preempt and rcu_sched readers to do so. The memory ordering was outlined here: https://lore.kernel.org/lkml/20200319034030.GX3199@paulmck-ThinkPad-P72/ This effort benefited greatly from off-list discussions of BPF requirements with Alexei Starovoitov and Andrii Nakryiko. At least some of the on-list discussions are captured in the Link: tags below. In addition, KCSAN was quite helpful in finding some early bugs. Link: https://lore.kernel.org/lkml/[email protected]/ Link: https://lore.kernel.org/lkml/[email protected]/ Link: https://lore.kernel.org/lkml/[email protected]/ Cc: Alexei Starovoitov <[email protected]> Cc: Andrii Nakryiko <[email protected]> [ paulmck: Apply feedback from Steve Rostedt and Joel Fernandes. ] [ paulmck: Decrement trc_n_readers_need_end upon IPI failure. ] [ paulmck: Fix locking issue reported by rcutorture. ] Signed-off-by: Paul E. McKenney <[email protected]>
1 parent d01aa26 commit d5f177d

File tree

6 files changed

+467
-5
lines changed

6 files changed

+467
-5
lines changed

include/linux/rcupdate_trace.h

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/* SPDX-License-Identifier: GPL-2.0+ */
2+
/*
3+
* Read-Copy Update mechanism for mutual exclusion, adapted for tracing.
4+
*
5+
* Copyright (C) 2020 Paul E. McKenney.
6+
*/
7+
8+
#ifndef __LINUX_RCUPDATE_TRACE_H
9+
#define __LINUX_RCUPDATE_TRACE_H
10+
11+
#include <linux/sched.h>
12+
#include <linux/rcupdate.h>
13+
14+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
15+
16+
extern struct lockdep_map rcu_trace_lock_map;
17+
18+
static inline int rcu_read_lock_trace_held(void)
19+
{
20+
return lock_is_held(&rcu_trace_lock_map);
21+
}
22+
23+
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
24+
25+
static inline int rcu_read_lock_trace_held(void)
26+
{
27+
return 1;
28+
}
29+
30+
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
31+
32+
#ifdef CONFIG_TASKS_TRACE_RCU
33+
34+
void rcu_read_unlock_trace_special(struct task_struct *t);
35+
36+
/**
37+
* rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section
38+
*
39+
* When synchronize_rcu_trace() is invoked by one task, then that task
40+
* is guaranteed to block until all other tasks exit their read-side
41+
* critical sections. Similarly, if call_rcu_trace() is invoked on one
42+
* task while other tasks are within RCU read-side critical sections,
43+
* invocation of the corresponding RCU callback is deferred until after
44+
* the all the other tasks exit their critical sections.
45+
*
46+
* For more details, please see the documentation for rcu_read_lock().
47+
*/
48+
static inline void rcu_read_lock_trace(void)
49+
{
50+
struct task_struct *t = current;
51+
52+
WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1);
53+
rcu_lock_acquire(&rcu_trace_lock_map);
54+
}
55+
56+
/**
57+
* rcu_read_unlock_trace - mark end of RCU-trace read-side critical section
58+
*
59+
* Pairs with a preceding call to rcu_read_lock_trace(), and nesting is
60+
* allowed. Invoking a rcu_read_unlock_trace() when there is no matching
61+
* rcu_read_lock_trace() is verboten, and will result in lockdep complaints.
62+
*
63+
* For more details, please see the documentation for rcu_read_unlock().
64+
*/
65+
static inline void rcu_read_unlock_trace(void)
66+
{
67+
int nesting;
68+
struct task_struct *t = current;
69+
70+
rcu_lock_release(&rcu_trace_lock_map);
71+
nesting = READ_ONCE(t->trc_reader_nesting) - 1;
72+
WRITE_ONCE(t->trc_reader_nesting, nesting);
73+
if (likely(!READ_ONCE(t->trc_reader_need_end)) || nesting)
74+
return; // We assume shallow reader nesting.
75+
rcu_read_unlock_trace_special(t);
76+
}
77+
78+
void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
79+
void synchronize_rcu_tasks_trace(void);
80+
void rcu_barrier_tasks_trace(void);
81+
82+
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
83+
84+
#endif /* __LINUX_RCUPDATE_TRACE_H */

include/linux/sched.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -724,6 +724,14 @@ struct task_struct {
724724
struct list_head rcu_tasks_holdout_list;
725725
#endif /* #ifdef CONFIG_TASKS_RCU */
726726

727+
#ifdef CONFIG_TASKS_TRACE_RCU
728+
int trc_reader_nesting;
729+
int trc_ipi_to_cpu;
730+
bool trc_reader_need_end;
731+
bool trc_reader_checked;
732+
struct list_head trc_holdout_list;
733+
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
734+
727735
struct sched_info sched_info;
728736

729737
struct list_head tasks;

init/init_task.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,10 @@ struct task_struct init_task
141141
.rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list),
142142
.rcu_tasks_idle_cpu = -1,
143143
#endif
144+
#ifdef CONFIG_TASKS_TRACE_RCU
145+
.trc_reader_nesting = 0,
146+
.trc_holdout_list = LIST_HEAD_INIT(init_task.trc_holdout_list),
147+
#endif
144148
#ifdef CONFIG_CPUSETS
145149
.mems_allowed_seq = SEQCNT_ZERO(init_task.mems_allowed_seq),
146150
#endif

kernel/fork.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1683,6 +1683,10 @@ static inline void rcu_copy_process(struct task_struct *p)
16831683
INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
16841684
p->rcu_tasks_idle_cpu = -1;
16851685
#endif /* #ifdef CONFIG_TASKS_RCU */
1686+
#ifdef CONFIG_TASKS_TRACE_RCU
1687+
p->trc_reader_nesting = 0;
1688+
INIT_LIST_HEAD(&p->trc_holdout_list);
1689+
#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
16861690
}
16871691

16881692
struct pid *pidfd_pid(const struct file *file)

kernel/rcu/Kconfig

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ config TREE_SRCU
7171
This option selects the full-fledged version of SRCU.
7272

7373
config TASKS_RCU_GENERIC
74-
def_bool TASKS_RCU || TASKS_RUDE_RCU
74+
def_bool TASKS_RCU || TASKS_RUDE_RCU || TASKS_TRACE_RCU
7575
select SRCU
7676
help
7777
This option enables generic infrastructure code supporting
@@ -93,6 +93,15 @@ config TASKS_RUDE_RCU
9393
switches on all online CPUs, including idle ones, so use
9494
with caution.
9595

96+
config TASKS_TRACE_RCU
97+
def_bool 0
98+
help
99+
This option enables a task-based RCU implementation that uses
100+
explicit rcu_read_lock_trace() read-side markers, and allows
101+
these readers to appear in the idle loop as well as on the CPU
102+
hotplug code paths. It can force IPIs on online CPUs, including
103+
idle ones, so use with caution.
104+
96105
config RCU_STALL_COMMON
97106
def_bool TREE_RCU
98107
help

0 commit comments

Comments
 (0)