Skip to content

Commit d06f925

Browse files
vladimirolteankuba-moo
authored andcommitted
net: dsa: avoid suspicious RCU usage for synced VLAN-aware MAC addresses
When using the felix driver (the only one which supports UC filtering and MC filtering) as a DSA master for a random other DSA switch, one can see the following stack trace when the downstream switch ports join a VLAN-aware bridge: ============================= WARNING: suspicious RCU usage ----------------------------- net/8021q/vlan_core.c:238 suspicious rcu_dereference_protected() usage! stack backtrace: Workqueue: dsa_ordered dsa_slave_switchdev_event_work Call trace: lockdep_rcu_suspicious+0x170/0x210 vlan_for_each+0x8c/0x188 dsa_slave_sync_uc+0x128/0x178 __hw_addr_sync_dev+0x138/0x158 dsa_slave_set_rx_mode+0x58/0x70 __dev_set_rx_mode+0x88/0xa8 dev_uc_add+0x74/0xa0 dsa_port_bridge_host_fdb_add+0xec/0x180 dsa_slave_switchdev_event_work+0x7c/0x1c8 process_one_work+0x290/0x568 What it's saying is that vlan_for_each() expects rtnl_lock() context and it's not getting it, when it's called from the DSA master's ndo_set_rx_mode(). The caller of that - dsa_slave_set_rx_mode() - is the slave DSA interface's dsa_port_bridge_host_fdb_add() which comes from the deferred dsa_slave_switchdev_event_work(). We went to great lengths to avoid the rtnl_lock() context in that call path in commit 0faf890 ("net: dsa: drop rtnl_lock from dsa_slave_switchdev_event_work"), and calling rtnl_lock() is simply not an option due to the possibility of deadlocking when calling dsa_flush_workqueue() from the call paths that do hold rtnl_lock() - basically all of them. So, when the DSA master calls vlan_for_each() from its ndo_set_rx_mode(), the state of the 8021q driver on this device is really not protected from concurrent access by anything. Looking at net/8021q/, I don't think that vlan_info->vid_list was particularly designed with RCU traversal in mind, so introducing an RCU read-side form of vlan_for_each() - vlan_for_each_rcu() - won't be so easy, and it also wouldn't be exactly what we need anyway. In general I believe that the solution isn't in net/8021q/ anyway; vlan_for_each() is not cut out for this task. DSA doesn't need rtnl_lock() to be held per se - since it's not a netdev state change that we're blocking, but rather, just concurrent additions/removals to a VLAN list. We don't even need sleepable context - the callback of vlan_for_each() just schedules deferred work. The proposed escape is to remove the dependency on vlan_for_each() and to open-code a non-sleepable, rtnl-free alternative to that, based on copies of the VLAN list modified from .ndo_vlan_rx_add_vid() and .ndo_vlan_rx_kill_vid(). Fixes: 64fdc5f ("net: dsa: sync unicast and multicast addresses for VLAN filters too") Signed-off-by: Vladimir Oltean <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent eaaacb0 commit d06f925

File tree

5 files changed

+74
-31
lines changed

5 files changed

+74
-31
lines changed

include/net/dsa.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -314,9 +314,17 @@ struct dsa_port {
314314
struct list_head fdbs;
315315
struct list_head mdbs;
316316

317-
/* List of VLANs that CPU and DSA ports are members of. */
318317
struct mutex vlans_lock;
319-
struct list_head vlans;
318+
union {
319+
/* List of VLANs that CPU and DSA ports are members of.
320+
* Access to this is serialized by the sleepable @vlans_lock.
321+
*/
322+
struct list_head vlans;
323+
/* List of VLANs that user ports are members of.
324+
* Access to this is serialized by netif_addr_lock_bh().
325+
*/
326+
struct list_head user_vlans;
327+
};
320328
};
321329

322330
/* TODO: ideally DSA ports would have a single dp->link_dp member,

net/dsa/dsa.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1106,7 +1106,7 @@ static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
11061106
mutex_init(&dp->vlans_lock);
11071107
INIT_LIST_HEAD(&dp->fdbs);
11081108
INIT_LIST_HEAD(&dp->mdbs);
1109-
INIT_LIST_HEAD(&dp->vlans);
1109+
INIT_LIST_HEAD(&dp->vlans); /* also initializes &dp->user_vlans */
11101110
INIT_LIST_HEAD(&dp->list);
11111111
list_add_tail(&dp->list, &dst->ports);
11121112

net/dsa/slave.c

Lines changed: 58 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "master.h"
2828
#include "netlink.h"
2929
#include "slave.h"
30+
#include "switch.h"
3031
#include "tag.h"
3132

3233
struct dsa_switchdev_event_work {
@@ -161,15 +162,36 @@ static int dsa_slave_schedule_standalone_work(struct net_device *dev,
161162
return 0;
162163
}
163164

164-
static int dsa_slave_host_vlan_rx_filtering(struct net_device *vdev, int vid,
165-
void *arg)
165+
static int dsa_slave_host_vlan_rx_filtering(void *arg, int vid)
166166
{
167167
struct dsa_host_vlan_rx_filtering_ctx *ctx = arg;
168168

169169
return dsa_slave_schedule_standalone_work(ctx->dev, ctx->event,
170170
ctx->addr, vid);
171171
}
172172

173+
static int dsa_slave_vlan_for_each(struct net_device *dev,
174+
int (*cb)(void *arg, int vid), void *arg)
175+
{
176+
struct dsa_port *dp = dsa_slave_to_port(dev);
177+
struct dsa_vlan *v;
178+
int err;
179+
180+
lockdep_assert_held(&dev->addr_list_lock);
181+
182+
err = cb(arg, 0);
183+
if (err)
184+
return err;
185+
186+
list_for_each_entry(v, &dp->user_vlans, list) {
187+
err = cb(arg, v->vid);
188+
if (err)
189+
return err;
190+
}
191+
192+
return 0;
193+
}
194+
173195
static int dsa_slave_sync_uc(struct net_device *dev,
174196
const unsigned char *addr)
175197
{
@@ -180,18 +202,14 @@ static int dsa_slave_sync_uc(struct net_device *dev,
180202
.addr = addr,
181203
.event = DSA_UC_ADD,
182204
};
183-
int err;
184205

185206
dev_uc_add(master, addr);
186207

187208
if (!dsa_switch_supports_uc_filtering(dp->ds))
188209
return 0;
189210

190-
err = dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, addr, 0);
191-
if (err)
192-
return err;
193-
194-
return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx);
211+
return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering,
212+
&ctx);
195213
}
196214

197215
static int dsa_slave_unsync_uc(struct net_device *dev,
@@ -204,18 +222,14 @@ static int dsa_slave_unsync_uc(struct net_device *dev,
204222
.addr = addr,
205223
.event = DSA_UC_DEL,
206224
};
207-
int err;
208225

209226
dev_uc_del(master, addr);
210227

211228
if (!dsa_switch_supports_uc_filtering(dp->ds))
212229
return 0;
213230

214-
err = dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, addr, 0);
215-
if (err)
216-
return err;
217-
218-
return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx);
231+
return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering,
232+
&ctx);
219233
}
220234

221235
static int dsa_slave_sync_mc(struct net_device *dev,
@@ -228,18 +242,14 @@ static int dsa_slave_sync_mc(struct net_device *dev,
228242
.addr = addr,
229243
.event = DSA_MC_ADD,
230244
};
231-
int err;
232245

233246
dev_mc_add(master, addr);
234247

235248
if (!dsa_switch_supports_mc_filtering(dp->ds))
236249
return 0;
237250

238-
err = dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, addr, 0);
239-
if (err)
240-
return err;
241-
242-
return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx);
251+
return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering,
252+
&ctx);
243253
}
244254

245255
static int dsa_slave_unsync_mc(struct net_device *dev,
@@ -252,18 +262,14 @@ static int dsa_slave_unsync_mc(struct net_device *dev,
252262
.addr = addr,
253263
.event = DSA_MC_DEL,
254264
};
255-
int err;
256265

257266
dev_mc_del(master, addr);
258267

259268
if (!dsa_switch_supports_mc_filtering(dp->ds))
260269
return 0;
261270

262-
err = dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, addr, 0);
263-
if (err)
264-
return err;
265-
266-
return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx);
271+
return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering,
272+
&ctx);
267273
}
268274

269275
void dsa_slave_sync_ha(struct net_device *dev)
@@ -1759,6 +1765,7 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
17591765
struct netlink_ext_ack extack = {0};
17601766
struct dsa_switch *ds = dp->ds;
17611767
struct netdev_hw_addr *ha;
1768+
struct dsa_vlan *v;
17621769
int ret;
17631770

17641771
/* User port... */
@@ -1782,8 +1789,17 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
17821789
!dsa_switch_supports_mc_filtering(ds))
17831790
return 0;
17841791

1792+
v = kzalloc(sizeof(*v), GFP_KERNEL);
1793+
if (!v) {
1794+
ret = -ENOMEM;
1795+
goto rollback;
1796+
}
1797+
17851798
netif_addr_lock_bh(dev);
17861799

1800+
v->vid = vid;
1801+
list_add_tail(&v->list, &dp->user_vlans);
1802+
17871803
if (dsa_switch_supports_mc_filtering(ds)) {
17881804
netdev_for_each_synced_mc_addr(ha, dev) {
17891805
dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD,
@@ -1803,6 +1819,12 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
18031819
dsa_flush_workqueue();
18041820

18051821
return 0;
1822+
1823+
rollback:
1824+
dsa_port_host_vlan_del(dp, &vlan);
1825+
dsa_port_vlan_del(dp, &vlan);
1826+
1827+
return ret;
18061828
}
18071829

18081830
static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
@@ -1816,6 +1838,7 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
18161838
};
18171839
struct dsa_switch *ds = dp->ds;
18181840
struct netdev_hw_addr *ha;
1841+
struct dsa_vlan *v;
18191842
int err;
18201843

18211844
err = dsa_port_vlan_del(dp, &vlan);
@@ -1832,6 +1855,15 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
18321855

18331856
netif_addr_lock_bh(dev);
18341857

1858+
v = dsa_vlan_find(&dp->user_vlans, &vlan);
1859+
if (!v) {
1860+
netif_addr_unlock_bh(dev);
1861+
return -ENOENT;
1862+
}
1863+
1864+
list_del(&v->list);
1865+
kfree(v);
1866+
18351867
if (dsa_switch_supports_mc_filtering(ds)) {
18361868
netdev_for_each_synced_mc_addr(ha, dev) {
18371869
dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL,

net/dsa/switch.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -673,8 +673,8 @@ static bool dsa_port_host_vlan_match(struct dsa_port *dp,
673673
return false;
674674
}
675675

676-
static struct dsa_vlan *dsa_vlan_find(struct list_head *vlan_list,
677-
const struct switchdev_obj_port_vlan *vlan)
676+
struct dsa_vlan *dsa_vlan_find(struct list_head *vlan_list,
677+
const struct switchdev_obj_port_vlan *vlan)
678678
{
679679
struct dsa_vlan *v;
680680

net/dsa/switch.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,9 @@ struct dsa_notifier_master_state_info {
111111
bool operational;
112112
};
113113

114+
struct dsa_vlan *dsa_vlan_find(struct list_head *vlan_list,
115+
const struct switchdev_obj_port_vlan *vlan);
116+
114117
int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v);
115118
int dsa_broadcast(unsigned long e, void *v);
116119

0 commit comments

Comments
 (0)