1
+ .. _rcu_barrier :
2
+
1
3
RCU and Unloadable Modules
4
+ ==========================
2
5
3
6
[Originally published in LWN Jan. 14, 2007: http://lwn.net/Articles/217484/]
4
7
@@ -21,7 +24,7 @@ given that readers might well leave absolutely no trace of their
21
24
presence? There is a synchronize_rcu() primitive that blocks until all
22
25
pre-existing readers have completed. An updater wishing to delete an
23
26
element p from a linked list might do the following, while holding an
24
- appropriate lock, of course:
27
+ appropriate lock, of course::
25
28
26
29
list_del_rcu(p);
27
30
synchronize_rcu();
@@ -32,13 +35,13 @@ primitive must be used instead. This primitive takes a pointer to an
32
35
rcu_head struct placed within the RCU-protected data structure and
33
36
another pointer to a function that may be invoked later to free that
34
37
structure. Code to delete an element p from the linked list from IRQ
35
- context might then be as follows:
38
+ context might then be as follows::
36
39
37
40
list_del_rcu(p);
38
41
call_rcu(&p->rcu, p_callback);
39
42
40
43
Since call_rcu() never blocks, this code can safely be used from within
41
- IRQ context. The function p_callback() might be defined as follows:
44
+ IRQ context. The function p_callback() might be defined as follows::
42
45
43
46
static void p_callback(struct rcu_head *rp)
44
47
{
@@ -49,6 +52,7 @@ IRQ context. The function p_callback() might be defined as follows:
49
52
50
53
51
54
Unloading Modules That Use call_rcu()
55
+ -------------------------------------
52
56
53
57
But what if p_callback is defined in an unloadable module?
54
58
@@ -69,10 +73,11 @@ in realtime kernels in order to avoid excessive scheduling latencies.
69
73
70
74
71
75
rcu_barrier()
76
+ -------------
72
77
73
78
We instead need the rcu_barrier() primitive. Rather than waiting for
74
79
a grace period to elapse, rcu_barrier() waits for all outstanding RCU
75
- callbacks to complete. Please note that rcu_barrier() does - not- imply
80
+ callbacks to complete. Please note that rcu_barrier() does ** not ** imply
76
81
synchronize_rcu(), in particular, if there are no RCU callbacks queued
77
82
anywhere, rcu_barrier() is within its rights to return immediately,
78
83
without waiting for a grace period to elapse.
@@ -88,79 +93,79 @@ must match the flavor of rcu_barrier() with that of call_rcu(). If your
88
93
module uses multiple flavors of call_rcu(), then it must also use multiple
89
94
flavors of rcu_barrier() when unloading that module. For example, if
90
95
it uses call_rcu(), call_srcu() on srcu_struct_1, and call_srcu() on
91
- srcu_struct_2() , then the following three lines of code will be required
92
- when unloading:
96
+ srcu_struct_2, then the following three lines of code will be required
97
+ when unloading::
93
98
94
99
1 rcu_barrier();
95
100
2 srcu_barrier(&srcu_struct_1);
96
101
3 srcu_barrier(&srcu_struct_2);
97
102
98
103
The rcutorture module makes use of rcu_barrier() in its exit function
99
- as follows:
104
+ as follows::
100
105
101
- 1 static void
102
- 2 rcu_torture_cleanup(void)
103
- 3 {
104
- 4 int i;
106
+ 1 static void
107
+ 2 rcu_torture_cleanup(void)
108
+ 3 {
109
+ 4 int i;
105
110
5
106
- 6 fullstop = 1;
107
- 7 if (shuffler_task != NULL) {
111
+ 6 fullstop = 1;
112
+ 7 if (shuffler_task != NULL) {
108
113
8 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
109
114
9 kthread_stop(shuffler_task);
110
- 10 }
111
- 11 shuffler_task = NULL;
112
- 12
113
- 13 if (writer_task != NULL) {
114
- 14 VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task");
115
- 15 kthread_stop(writer_task);
116
- 16 }
117
- 17 writer_task = NULL;
118
- 18
119
- 19 if (reader_tasks != NULL) {
120
- 20 for (i = 0; i < nrealreaders; i++) {
121
- 21 if (reader_tasks[i] != NULL) {
122
- 22 VERBOSE_PRINTK_STRING(
123
- 23 "Stopping rcu_torture_reader task");
124
- 24 kthread_stop(reader_tasks[i]);
125
- 25 }
126
- 26 reader_tasks[i] = NULL;
127
- 27 }
128
- 28 kfree(reader_tasks);
129
- 29 reader_tasks = NULL;
130
- 30 }
131
- 31 rcu_torture_current = NULL;
132
- 32
133
- 33 if (fakewriter_tasks != NULL) {
134
- 34 for (i = 0; i < nfakewriters; i++) {
135
- 35 if (fakewriter_tasks[i] != NULL) {
136
- 36 VERBOSE_PRINTK_STRING(
137
- 37 "Stopping rcu_torture_fakewriter task");
138
- 38 kthread_stop(fakewriter_tasks[i]);
139
- 39 }
140
- 40 fakewriter_tasks[i] = NULL;
141
- 41 }
142
- 42 kfree(fakewriter_tasks);
143
- 43 fakewriter_tasks = NULL;
144
- 44 }
145
- 45
146
- 46 if (stats_task != NULL) {
147
- 47 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task");
148
- 48 kthread_stop(stats_task);
149
- 49 }
150
- 50 stats_task = NULL;
151
- 51
152
- 52 /* Wait for all RCU callbacks to fire. */
153
- 53 rcu_barrier();
154
- 54
155
- 55 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
156
- 56
157
- 57 if (cur_ops->cleanup != NULL)
158
- 58 cur_ops->cleanup();
159
- 59 if (atomic_read(&n_rcu_torture_error))
160
- 60 rcu_torture_print_module_parms("End of test: FAILURE");
161
- 61 else
162
- 62 rcu_torture_print_module_parms("End of test: SUCCESS");
163
- 63 }
115
+ 10 }
116
+ 11 shuffler_task = NULL;
117
+ 12
118
+ 13 if (writer_task != NULL) {
119
+ 14 VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task");
120
+ 15 kthread_stop(writer_task);
121
+ 16 }
122
+ 17 writer_task = NULL;
123
+ 18
124
+ 19 if (reader_tasks != NULL) {
125
+ 20 for (i = 0; i < nrealreaders; i++) {
126
+ 21 if (reader_tasks[i] != NULL) {
127
+ 22 VERBOSE_PRINTK_STRING(
128
+ 23 "Stopping rcu_torture_reader task");
129
+ 24 kthread_stop(reader_tasks[i]);
130
+ 25 }
131
+ 26 reader_tasks[i] = NULL;
132
+ 27 }
133
+ 28 kfree(reader_tasks);
134
+ 29 reader_tasks = NULL;
135
+ 30 }
136
+ 31 rcu_torture_current = NULL;
137
+ 32
138
+ 33 if (fakewriter_tasks != NULL) {
139
+ 34 for (i = 0; i < nfakewriters; i++) {
140
+ 35 if (fakewriter_tasks[i] != NULL) {
141
+ 36 VERBOSE_PRINTK_STRING(
142
+ 37 "Stopping rcu_torture_fakewriter task");
143
+ 38 kthread_stop(fakewriter_tasks[i]);
144
+ 39 }
145
+ 40 fakewriter_tasks[i] = NULL;
146
+ 41 }
147
+ 42 kfree(fakewriter_tasks);
148
+ 43 fakewriter_tasks = NULL;
149
+ 44 }
150
+ 45
151
+ 46 if (stats_task != NULL) {
152
+ 47 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task");
153
+ 48 kthread_stop(stats_task);
154
+ 49 }
155
+ 50 stats_task = NULL;
156
+ 51
157
+ 52 /* Wait for all RCU callbacks to fire. */
158
+ 53 rcu_barrier();
159
+ 54
160
+ 55 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
161
+ 56
162
+ 57 if (cur_ops->cleanup != NULL)
163
+ 58 cur_ops->cleanup();
164
+ 59 if (atomic_read(&n_rcu_torture_error))
165
+ 60 rcu_torture_print_module_parms("End of test: FAILURE");
166
+ 61 else
167
+ 62 rcu_torture_print_module_parms("End of test: SUCCESS");
168
+ 63 }
164
169
165
170
Line 6 sets a global variable that prevents any RCU callbacks from
166
171
re-posting themselves. This will not be necessary in most cases, since
@@ -176,9 +181,14 @@ for any pre-existing callbacks to complete.
176
181
Then lines 55-62 print status and do operation-specific cleanup, and
177
182
then return, permitting the module-unload operation to be completed.
178
183
179
- Quick Quiz #1: Is there any other situation where rcu_barrier() might
184
+ .. _rcubarrier_quiz_1 :
185
+
186
+ Quick Quiz #1:
187
+ Is there any other situation where rcu_barrier() might
180
188
be required?
181
189
190
+ :ref: `Answer to Quick Quiz #1 <answer_rcubarrier_quiz_1 >`
191
+
182
192
Your module might have additional complications. For example, if your
183
193
module invokes call_rcu() from timers, you will need to first cancel all
184
194
the timers, and only then invoke rcu_barrier() to wait for any remaining
@@ -188,31 +198,32 @@ Of course, if you module uses call_rcu(), you will need to invoke
188
198
rcu_barrier() before unloading. Similarly, if your module uses
189
199
call_srcu(), you will need to invoke srcu_barrier() before unloading,
190
200
and on the same srcu_struct structure. If your module uses call_rcu()
191
- - and- call_srcu(), then you will need to invoke rcu_barrier() - and-
201
+ ** and ** call_srcu(), then you will need to invoke rcu_barrier() ** and **
192
202
srcu_barrier().
193
203
194
204
195
205
Implementing rcu_barrier()
206
+ --------------------------
196
207
197
208
Dipankar Sarma's implementation of rcu_barrier() makes use of the fact
198
209
that RCU callbacks are never reordered once queued on one of the per-CPU
199
210
queues. His implementation queues an RCU callback on each of the per-CPU
200
211
callback queues, and then waits until they have all started executing, at
201
212
which point, all earlier RCU callbacks are guaranteed to have completed.
202
213
203
- The original code for rcu_barrier() was as follows:
214
+ The original code for rcu_barrier() was as follows::
204
215
205
- 1 void rcu_barrier(void)
206
- 2 {
207
- 3 BUG_ON(in_interrupt());
208
- 4 /* Take cpucontrol mutex to protect against CPU hotplug */
209
- 5 mutex_lock(&rcu_barrier_mutex);
210
- 6 init_completion(&rcu_barrier_completion);
211
- 7 atomic_set(&rcu_barrier_cpu_count, 0);
212
- 8 on_each_cpu(rcu_barrier_func, NULL, 0, 1);
213
- 9 wait_for_completion(&rcu_barrier_completion);
214
- 10 mutex_unlock(&rcu_barrier_mutex);
215
- 11 }
216
+ 1 void rcu_barrier(void)
217
+ 2 {
218
+ 3 BUG_ON(in_interrupt());
219
+ 4 /* Take cpucontrol mutex to protect against CPU hotplug */
220
+ 5 mutex_lock(&rcu_barrier_mutex);
221
+ 6 init_completion(&rcu_barrier_completion);
222
+ 7 atomic_set(&rcu_barrier_cpu_count, 0);
223
+ 8 on_each_cpu(rcu_barrier_func, NULL, 0, 1);
224
+ 9 wait_for_completion(&rcu_barrier_completion);
225
+ 10 mutex_unlock(&rcu_barrier_mutex);
226
+ 11 }
216
227
217
228
Line 3 verifies that the caller is in process context, and lines 5 and 10
218
229
use rcu_barrier_mutex to ensure that only one rcu_barrier() is using the
@@ -226,18 +237,18 @@ This code was rewritten in 2008 and several times thereafter, but this
226
237
still gives the general idea.
227
238
228
239
The rcu_barrier_func() runs on each CPU, where it invokes call_rcu()
229
- to post an RCU callback, as follows:
240
+ to post an RCU callback, as follows::
230
241
231
- 1 static void rcu_barrier_func(void *notused)
232
- 2 {
233
- 3 int cpu = smp_processor_id();
234
- 4 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
235
- 5 struct rcu_head *head;
242
+ 1 static void rcu_barrier_func(void *notused)
243
+ 2 {
244
+ 3 int cpu = smp_processor_id();
245
+ 4 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
246
+ 5 struct rcu_head *head;
236
247
6
237
- 7 head = &rdp->barrier;
238
- 8 atomic_inc(&rcu_barrier_cpu_count);
239
- 9 call_rcu(head, rcu_barrier_callback);
240
- 10 }
248
+ 7 head = &rdp->barrier;
249
+ 8 atomic_inc(&rcu_barrier_cpu_count);
250
+ 9 call_rcu(head, rcu_barrier_callback);
251
+ 10 }
241
252
242
253
Lines 3 and 4 locate RCU's internal per-CPU rcu_data structure,
243
254
which contains the struct rcu_head that needed for the later call to
@@ -248,27 +259,33 @@ the current CPU's queue.
248
259
249
260
The rcu_barrier_callback() function simply atomically decrements the
250
261
rcu_barrier_cpu_count variable and finalizes the completion when it
251
- reaches zero, as follows:
262
+ reaches zero, as follows::
252
263
253
264
1 static void rcu_barrier_callback(struct rcu_head *notused)
254
265
2 {
255
- 3 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
256
- 4 complete(&rcu_barrier_completion);
266
+ 3 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
267
+ 4 complete(&rcu_barrier_completion);
257
268
5 }
258
269
259
- Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes
270
+ .. _rcubarrier_quiz_2 :
271
+
272
+ Quick Quiz #2:
273
+ What happens if CPU 0's rcu_barrier_func() executes
260
274
immediately (thus incrementing rcu_barrier_cpu_count to the
261
275
value one), but the other CPU's rcu_barrier_func() invocations
262
276
are delayed for a full grace period? Couldn't this result in
263
277
rcu_barrier() returning prematurely?
264
278
279
+ :ref: `Answer to Quick Quiz #2 <answer_rcubarrier_quiz_2 >`
280
+
265
281
The current rcu_barrier() implementation is more complex, due to the need
266
282
to avoid disturbing idle CPUs (especially on battery-powered systems)
267
283
and the need to minimally disturb non-idle CPUs in real-time systems.
268
284
However, the code above illustrates the concepts.
269
285
270
286
271
287
rcu_barrier() Summary
288
+ ---------------------
272
289
273
290
The rcu_barrier() primitive has seen relatively little use, since most
274
291
code using RCU is in the core kernel rather than in modules. However, if
@@ -277,8 +294,12 @@ so that your module may be safely unloaded.
277
294
278
295
279
296
Answers to Quick Quizzes
297
+ ------------------------
298
+
299
+ .. _answer_rcubarrier_quiz_1 :
280
300
281
- Quick Quiz #1: Is there any other situation where rcu_barrier() might
301
+ Quick Quiz #1:
302
+ Is there any other situation where rcu_barrier() might
282
303
be required?
283
304
284
305
Answer: Interestingly enough, rcu_barrier() was not originally
@@ -292,7 +313,12 @@ Answer: Interestingly enough, rcu_barrier() was not originally
292
313
implementing rcutorture, and found that rcu_barrier() solves
293
314
this problem as well.
294
315
295
- Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes
316
+ :ref: `Back to Quick Quiz #1 <rcubarrier_quiz_1 >`
317
+
318
+ .. _answer_rcubarrier_quiz_2 :
319
+
320
+ Quick Quiz #2:
321
+ What happens if CPU 0's rcu_barrier_func() executes
296
322
immediately (thus incrementing rcu_barrier_cpu_count to the
297
323
value one), but the other CPU's rcu_barrier_func() invocations
298
324
are delayed for a full grace period? Couldn't this result in
@@ -323,3 +349,5 @@ Answer: This cannot happen. The reason is that on_each_cpu() has its last
323
349
is to add an rcu_read_lock() before line 8 of rcu_barrier()
324
350
and an rcu_read_unlock() after line 8 of this same function. If
325
351
you can think of a better change, please let me know!
352
+
353
+ :ref: `Back to Quick Quiz #2 <rcubarrier_quiz_2 >`
0 commit comments