Skip to content

Commit 6c325f4

Browse files
committed
Merge branch 'net_sched-fix-races-with-RCU-callbacks'
Cong Wang says: ==================== net_sched: fix races with RCU callbacks Recently, the RCU callbacks used in TC filters and TC actions keep drawing my attention, they introduce at least 4 race condition bugs: 1. A simple one fixed by Daniel: commit c78e174 Author: Daniel Borkmann <[email protected]> Date: Wed May 20 17:13:33 2015 +0200 net: sched: fix call_rcu() race on classifier module unloads 2. A very nasty one fixed by me: commit 1697c4b Author: Cong Wang <[email protected]> Date: Mon Sep 11 16:33:32 2017 -0700 net_sched: carefully handle tcf_block_put() 3. Two more bugs found by Chris: https://patchwork.ozlabs.org/patch/826696/ https://patchwork.ozlabs.org/patch/826695/ Usually RCU callbacks are simple, however for TC filters and actions, they are complex because at least TC actions could be destroyed together with the TC filter in one callback. And RCU callbacks are invoked in BH context, without locking they are parallel too. All of these contribute to the cause of these nasty bugs. Alternatively, we could also: a) Introduce a spinlock to serialize these RCU callbacks. But as I said in commit 1697c4b ("net_sched: carefully handle tcf_block_put()"), it is very hard to do because of tcf_chain_dump(). Potentially we need to do a lot of work to make it possible (if not impossible). b) Just get rid of these RCU callbacks, because they are not necessary at all, callers of these call_rcu() are all on slow paths and holding RTNL lock, so blocking is allowed in their contexts. However, David and Eric dislike adding synchronize_rcu() here. As suggested by Paul, we could defer the work to a workqueue and gain the permission of holding RTNL again without any performance impact, however, in tcf_block_put() we could have a deadlock when flushing workqueue while hodling RTNL lock, the trick here is to defer the work itself in workqueue and make it queued after all other works so that we keep the same ordering to avoid any use-after-free. Please see the first patch for details. Patch 1 introduces the infrastructure, patch 2~12 move each tc filter to the new tc filter workqueue, patch 13 adds an assertion to catch potential bugs like this, patch 14 closes another rcu callback race, patch 15 and patch 16 add new test cases. ==================== Reported-by: Chris Mi <[email protected]> Cc: Daniel Borkmann <[email protected]> Cc: Jiri Pirko <[email protected]> Cc: John Fastabend <[email protected]> Cc: Jamal Hadi Salim <[email protected]> Cc: "Paul E. McKenney" <[email protected]> Signed-off-by: Cong Wang <[email protected]> Signed-off-by: David S. Miller <[email protected]>
2 parents 8c83c88 + 31c2611 commit 6c325f4

File tree

19 files changed

+367
-57
lines changed

19 files changed

+367
-57
lines changed

include/net/pkt_cls.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define __NET_PKT_CLS_H
33

44
#include <linux/pkt_cls.h>
5+
#include <linux/workqueue.h>
56
#include <net/sch_generic.h>
67
#include <net/act_api.h>
78

@@ -17,6 +18,8 @@ struct tcf_walker {
1718
int register_tcf_proto_ops(struct tcf_proto_ops *ops);
1819
int unregister_tcf_proto_ops(struct tcf_proto_ops *ops);
1920

21+
bool tcf_queue_work(struct work_struct *work);
22+
2023
#ifdef CONFIG_NET_CLS
2124
struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
2225
bool create);

include/net/sch_generic.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <linux/dynamic_queue_limits.h>
1111
#include <linux/list.h>
1212
#include <linux/refcount.h>
13+
#include <linux/workqueue.h>
1314
#include <net/gen_stats.h>
1415
#include <net/rtnetlink.h>
1516

@@ -271,6 +272,7 @@ struct tcf_chain {
271272

272273
struct tcf_block {
273274
struct list_head chain_list;
275+
struct work_struct work;
274276
};
275277

276278
static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)

net/sched/act_sample.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ static int __init sample_init_module(void)
264264

265265
static void __exit sample_cleanup_module(void)
266266
{
267+
rcu_barrier();
267268
tcf_unregister_action(&act_sample_ops, &sample_net_ops);
268269
}
269270

net/sched/cls_api.c

Lines changed: 52 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ int register_tcf_proto_ops(struct tcf_proto_ops *ops)
7777
}
7878
EXPORT_SYMBOL(register_tcf_proto_ops);
7979

80+
static struct workqueue_struct *tc_filter_wq;
81+
8082
int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
8183
{
8284
struct tcf_proto_ops *t;
@@ -86,6 +88,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
8688
* tcf_proto_ops's destroy() handler.
8789
*/
8890
rcu_barrier();
91+
flush_workqueue(tc_filter_wq);
8992

9093
write_lock(&cls_mod_lock);
9194
list_for_each_entry(t, &tcf_proto_base, head) {
@@ -100,6 +103,12 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
100103
}
101104
EXPORT_SYMBOL(unregister_tcf_proto_ops);
102105

106+
bool tcf_queue_work(struct work_struct *work)
107+
{
108+
return queue_work(tc_filter_wq, work);
109+
}
110+
EXPORT_SYMBOL(tcf_queue_work);
111+
103112
/* Select new prio value from the range, managed by kernel. */
104113

105114
static inline u32 tcf_auto_prio(struct tcf_proto *tp)
@@ -266,23 +275,30 @@ int tcf_block_get(struct tcf_block **p_block,
266275
}
267276
EXPORT_SYMBOL(tcf_block_get);
268277

269-
void tcf_block_put(struct tcf_block *block)
278+
static void tcf_block_put_final(struct work_struct *work)
270279
{
280+
struct tcf_block *block = container_of(work, struct tcf_block, work);
271281
struct tcf_chain *chain, *tmp;
272282

273-
if (!block)
274-
return;
275-
276-
/* XXX: Standalone actions are not allowed to jump to any chain, and
277-
* bound actions should be all removed after flushing. However,
278-
* filters are destroyed in RCU callbacks, we have to hold the chains
279-
* first, otherwise we would always race with RCU callbacks on this list
280-
* without proper locking.
281-
*/
283+
/* At this point, all the chains should have refcnt == 1. */
284+
rtnl_lock();
285+
list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
286+
tcf_chain_put(chain);
287+
rtnl_unlock();
288+
kfree(block);
289+
}
282290

283-
/* Wait for existing RCU callbacks to cool down. */
284-
rcu_barrier();
291+
/* XXX: Standalone actions are not allowed to jump to any chain, and bound
292+
* actions should be all removed after flushing. However, filters are destroyed
293+
* in RCU callbacks, we have to hold the chains first, otherwise we would
294+
* always race with RCU callbacks on this list without proper locking.
295+
*/
296+
static void tcf_block_put_deferred(struct work_struct *work)
297+
{
298+
struct tcf_block *block = container_of(work, struct tcf_block, work);
299+
struct tcf_chain *chain;
285300

301+
rtnl_lock();
286302
/* Hold a refcnt for all chains, except 0, in case they are gone. */
287303
list_for_each_entry(chain, &block->chain_list, list)
288304
if (chain->index)
@@ -292,13 +308,27 @@ void tcf_block_put(struct tcf_block *block)
292308
list_for_each_entry(chain, &block->chain_list, list)
293309
tcf_chain_flush(chain);
294310

295-
/* Wait for RCU callbacks to release the reference count. */
311+
INIT_WORK(&block->work, tcf_block_put_final);
312+
/* Wait for RCU callbacks to release the reference count and make
313+
* sure their works have been queued before this.
314+
*/
296315
rcu_barrier();
316+
tcf_queue_work(&block->work);
317+
rtnl_unlock();
318+
}
297319

298-
/* At this point, all the chains should have refcnt == 1. */
299-
list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
300-
tcf_chain_put(chain);
301-
kfree(block);
320+
void tcf_block_put(struct tcf_block *block)
321+
{
322+
if (!block)
323+
return;
324+
325+
INIT_WORK(&block->work, tcf_block_put_deferred);
326+
/* Wait for existing RCU callbacks to cool down, make sure their works
327+
* have been queued before this. We can not flush pending works here
328+
* because we are holding the RTNL lock.
329+
*/
330+
rcu_barrier();
331+
tcf_queue_work(&block->work);
302332
}
303333
EXPORT_SYMBOL(tcf_block_put);
304334

@@ -879,6 +909,7 @@ void tcf_exts_destroy(struct tcf_exts *exts)
879909
#ifdef CONFIG_NET_CLS_ACT
880910
LIST_HEAD(actions);
881911

912+
ASSERT_RTNL();
882913
tcf_exts_to_list(exts, &actions);
883914
tcf_action_destroy(&actions, TCA_ACT_UNBIND);
884915
kfree(exts->actions);
@@ -1030,6 +1061,10 @@ EXPORT_SYMBOL(tcf_exts_get_dev);
10301061

10311062
static int __init tc_filter_init(void)
10321063
{
1064+
tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
1065+
if (!tc_filter_wq)
1066+
return -ENOMEM;
1067+
10331068
rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
10341069
rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
10351070
rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,

net/sched/cls_basic.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,10 @@ struct basic_filter {
3434
struct tcf_result res;
3535
struct tcf_proto *tp;
3636
struct list_head link;
37-
struct rcu_head rcu;
37+
union {
38+
struct work_struct work;
39+
struct rcu_head rcu;
40+
};
3841
};
3942

4043
static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -82,15 +85,26 @@ static int basic_init(struct tcf_proto *tp)
8285
return 0;
8386
}
8487

85-
static void basic_delete_filter(struct rcu_head *head)
88+
static void basic_delete_filter_work(struct work_struct *work)
8689
{
87-
struct basic_filter *f = container_of(head, struct basic_filter, rcu);
90+
struct basic_filter *f = container_of(work, struct basic_filter, work);
8891

92+
rtnl_lock();
8993
tcf_exts_destroy(&f->exts);
9094
tcf_em_tree_destroy(&f->ematches);
95+
rtnl_unlock();
96+
9197
kfree(f);
9298
}
9399

100+
static void basic_delete_filter(struct rcu_head *head)
101+
{
102+
struct basic_filter *f = container_of(head, struct basic_filter, rcu);
103+
104+
INIT_WORK(&f->work, basic_delete_filter_work);
105+
tcf_queue_work(&f->work);
106+
}
107+
94108
static void basic_destroy(struct tcf_proto *tp)
95109
{
96110
struct basic_head *head = rtnl_dereference(tp->root);

net/sched/cls_bpf.c

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,10 @@ struct cls_bpf_prog {
4949
struct sock_filter *bpf_ops;
5050
const char *bpf_name;
5151
struct tcf_proto *tp;
52-
struct rcu_head rcu;
52+
union {
53+
struct work_struct work;
54+
struct rcu_head rcu;
55+
};
5356
};
5457

5558
static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
@@ -257,9 +260,21 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
257260
kfree(prog);
258261
}
259262

263+
static void cls_bpf_delete_prog_work(struct work_struct *work)
264+
{
265+
struct cls_bpf_prog *prog = container_of(work, struct cls_bpf_prog, work);
266+
267+
rtnl_lock();
268+
__cls_bpf_delete_prog(prog);
269+
rtnl_unlock();
270+
}
271+
260272
static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
261273
{
262-
__cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu));
274+
struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu);
275+
276+
INIT_WORK(&prog->work, cls_bpf_delete_prog_work);
277+
tcf_queue_work(&prog->work);
263278
}
264279

265280
static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)

net/sched/cls_cgroup.c

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@ struct cls_cgroup_head {
2323
struct tcf_exts exts;
2424
struct tcf_ematch_tree ematches;
2525
struct tcf_proto *tp;
26-
struct rcu_head rcu;
26+
union {
27+
struct work_struct work;
28+
struct rcu_head rcu;
29+
};
2730
};
2831

2932
static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
@@ -57,15 +60,26 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
5760
[TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED },
5861
};
5962

63+
static void cls_cgroup_destroy_work(struct work_struct *work)
64+
{
65+
struct cls_cgroup_head *head = container_of(work,
66+
struct cls_cgroup_head,
67+
work);
68+
rtnl_lock();
69+
tcf_exts_destroy(&head->exts);
70+
tcf_em_tree_destroy(&head->ematches);
71+
kfree(head);
72+
rtnl_unlock();
73+
}
74+
6075
static void cls_cgroup_destroy_rcu(struct rcu_head *root)
6176
{
6277
struct cls_cgroup_head *head = container_of(root,
6378
struct cls_cgroup_head,
6479
rcu);
6580

66-
tcf_exts_destroy(&head->exts);
67-
tcf_em_tree_destroy(&head->ematches);
68-
kfree(head);
81+
INIT_WORK(&head->work, cls_cgroup_destroy_work);
82+
tcf_queue_work(&head->work);
6983
}
7084

7185
static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,

net/sched/cls_flow.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,10 @@ struct flow_filter {
5757
u32 divisor;
5858
u32 baseclass;
5959
u32 hashrnd;
60-
struct rcu_head rcu;
60+
union {
61+
struct work_struct work;
62+
struct rcu_head rcu;
63+
};
6164
};
6265

6366
static inline u32 addr_fold(void *addr)
@@ -369,14 +372,24 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
369372
[TCA_FLOW_PERTURB] = { .type = NLA_U32 },
370373
};
371374

372-
static void flow_destroy_filter(struct rcu_head *head)
375+
static void flow_destroy_filter_work(struct work_struct *work)
373376
{
374-
struct flow_filter *f = container_of(head, struct flow_filter, rcu);
377+
struct flow_filter *f = container_of(work, struct flow_filter, work);
375378

379+
rtnl_lock();
376380
del_timer_sync(&f->perturb_timer);
377381
tcf_exts_destroy(&f->exts);
378382
tcf_em_tree_destroy(&f->ematches);
379383
kfree(f);
384+
rtnl_unlock();
385+
}
386+
387+
static void flow_destroy_filter(struct rcu_head *head)
388+
{
389+
struct flow_filter *f = container_of(head, struct flow_filter, rcu);
390+
391+
INIT_WORK(&f->work, flow_destroy_filter_work);
392+
tcf_queue_work(&f->work);
380393
}
381394

382395
static int flow_change(struct net *net, struct sk_buff *in_skb,

net/sched/cls_flower.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,10 @@ struct cls_fl_filter {
8787
struct list_head list;
8888
u32 handle;
8989
u32 flags;
90-
struct rcu_head rcu;
90+
union {
91+
struct work_struct work;
92+
struct rcu_head rcu;
93+
};
9194
struct net_device *hw_dev;
9295
};
9396

@@ -215,12 +218,22 @@ static int fl_init(struct tcf_proto *tp)
215218
return 0;
216219
}
217220

218-
static void fl_destroy_filter(struct rcu_head *head)
221+
static void fl_destroy_filter_work(struct work_struct *work)
219222
{
220-
struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
223+
struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work);
221224

225+
rtnl_lock();
222226
tcf_exts_destroy(&f->exts);
223227
kfree(f);
228+
rtnl_unlock();
229+
}
230+
231+
static void fl_destroy_filter(struct rcu_head *head)
232+
{
233+
struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
234+
235+
INIT_WORK(&f->work, fl_destroy_filter_work);
236+
tcf_queue_work(&f->work);
224237
}
225238

226239
static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)

net/sched/cls_fw.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,10 @@ struct fw_filter {
4646
#endif /* CONFIG_NET_CLS_IND */
4747
struct tcf_exts exts;
4848
struct tcf_proto *tp;
49-
struct rcu_head rcu;
49+
union {
50+
struct work_struct work;
51+
struct rcu_head rcu;
52+
};
5053
};
5154

5255
static u32 fw_hash(u32 handle)
@@ -119,12 +122,22 @@ static int fw_init(struct tcf_proto *tp)
119122
return 0;
120123
}
121124

122-
static void fw_delete_filter(struct rcu_head *head)
125+
static void fw_delete_filter_work(struct work_struct *work)
123126
{
124-
struct fw_filter *f = container_of(head, struct fw_filter, rcu);
127+
struct fw_filter *f = container_of(work, struct fw_filter, work);
125128

129+
rtnl_lock();
126130
tcf_exts_destroy(&f->exts);
127131
kfree(f);
132+
rtnl_unlock();
133+
}
134+
135+
static void fw_delete_filter(struct rcu_head *head)
136+
{
137+
struct fw_filter *f = container_of(head, struct fw_filter, rcu);
138+
139+
INIT_WORK(&f->work, fw_delete_filter_work);
140+
tcf_queue_work(&f->work);
128141
}
129142

130143
static void fw_destroy(struct tcf_proto *tp)

0 commit comments

Comments
 (0)