Skip to content

Commit d2fddbd

Browse files
Cosmin Ratiuklassert
authored andcommitted
bonding: Fix multiple long standing offload races
Refactor the bonding ipsec offload operations to fix a number of long-standing control plane races between state migration and user deletion and a few other issues. xfrm state deletion can happen concurrently with bond_change_active_slave() operation. This manifests itself as a bond_ipsec_del_sa() call with x->lock held, followed by a bond_ipsec_free_sa() a bit later from a wq. The alternate path of these calls coming from xfrm_dev_state_flush() can't happen, as that needs the RTNL lock and bond_change_active_slave() already holds it. 1. bond_ipsec_del_sa_all() might call xdo_dev_state_delete() a second time on an xfrm state that was concurrently killed. This is bad. 2. bond_ipsec_add_sa_all() can add a state on the new device, but pending bond_ipsec_free_sa() calls from the old device will then hit the WARN_ON() and then, worse, call xdo_dev_state_free() on the new device without a corresponding xdo_dev_state_delete(). 3. Resolve a sleeping in atomic context introduced by the mentioned "Fixes" commit. bond_ipsec_del_sa_all() and bond_ipsec_add_sa_all() now acquire x->lock and check for x->km.state to help with problems 1 and 2. And since xso.real_dev is now a private pointer managed by the bonding driver in xfrm state, make better use of it to fully fix problems 1 and 2. In bond_ipsec_del_sa_all(), set xso.real_dev to NULL while holding both the mutex and x->lock, which makes sure that neither bond_ipsec_del_sa() nor bond_ipsec_free_sa() could run concurrently. Fix problem 3 by moving the list cleanup (which requires the mutex) from bond_ipsec_del_sa() (called from atomic context) to bond_ipsec_free_sa() Finally, simplify bond_ipsec_del_sa() and bond_ipsec_free_sa() by using xso->real_dev directly, since it's now protected by locks and can be trusted to always reflect the offload device. Fixes: 2aeeef9 ("bonding: change ipsec_lock from spin lock to mutex") Signed-off-by: Cosmin Ratiu <[email protected]> Reviewed-by: Leon Romanovsky <[email protected]> Reviewed-by: Nikolay Aleksandrov <[email protected]> Reviewed-by: Hangbin Liu <[email protected]> Tested-by: Hangbin Liu <[email protected]> Signed-off-by: Steffen Klassert <[email protected]>
1 parent fd4e41e commit d2fddbd

File tree

2 files changed

+41
-48
lines changed

2 files changed

+41
-48
lines changed

drivers/net/bonding/bond_main.c

Lines changed: 36 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,20 @@ static void bond_ipsec_add_sa_all(struct bonding *bond)
545545
slave_warn(bond_dev, real_dev, "%s: failed to add SA\n", __func__);
546546
continue;
547547
}
548+
549+
spin_lock_bh(&ipsec->xs->lock);
550+
/* xs might have been killed by the user during the migration
551+
* to the new dev, but bond_ipsec_del_sa() should have done
552+
* nothing, as xso.real_dev is NULL.
553+
* Delete it from the device we just added it to. The pending
554+
* bond_ipsec_free_sa() call will do the rest of the cleanup.
555+
*/
556+
if (ipsec->xs->km.state == XFRM_STATE_DEAD &&
557+
real_dev->xfrmdev_ops->xdo_dev_state_delete)
558+
real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev,
559+
ipsec->xs);
548560
ipsec->xs->xso.real_dev = real_dev;
561+
spin_unlock_bh(&ipsec->xs->lock);
549562
}
550563
out:
551564
mutex_unlock(&bond->ipsec_lock);
@@ -560,48 +573,20 @@ static void bond_ipsec_del_sa(struct net_device *bond_dev,
560573
struct xfrm_state *xs)
561574
{
562575
struct net_device *real_dev;
563-
netdevice_tracker tracker;
564-
struct bond_ipsec *ipsec;
565-
struct bonding *bond;
566-
struct slave *slave;
567576

568-
if (!bond_dev)
577+
if (!bond_dev || !xs->xso.real_dev)
569578
return;
570579

571-
rcu_read_lock();
572-
bond = netdev_priv(bond_dev);
573-
slave = rcu_dereference(bond->curr_active_slave);
574-
real_dev = slave ? slave->dev : NULL;
575-
netdev_hold(real_dev, &tracker, GFP_ATOMIC);
576-
rcu_read_unlock();
577-
578-
if (!slave)
579-
goto out;
580-
581-
if (!xs->xso.real_dev)
582-
goto out;
583-
584-
WARN_ON(xs->xso.real_dev != real_dev);
580+
real_dev = xs->xso.real_dev;
585581

586582
if (!real_dev->xfrmdev_ops ||
587583
!real_dev->xfrmdev_ops->xdo_dev_state_delete ||
588584
netif_is_bond_master(real_dev)) {
589585
slave_warn(bond_dev, real_dev, "%s: no slave xdo_dev_state_delete\n", __func__);
590-
goto out;
586+
return;
591587
}
592588

593589
real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev, xs);
594-
out:
595-
netdev_put(real_dev, &tracker);
596-
mutex_lock(&bond->ipsec_lock);
597-
list_for_each_entry(ipsec, &bond->ipsec_list, list) {
598-
if (ipsec->xs == xs) {
599-
list_del(&ipsec->list);
600-
kfree(ipsec);
601-
break;
602-
}
603-
}
604-
mutex_unlock(&bond->ipsec_lock);
605590
}
606591

607592
static void bond_ipsec_del_sa_all(struct bonding *bond)
@@ -629,9 +614,15 @@ static void bond_ipsec_del_sa_all(struct bonding *bond)
629614
__func__);
630615
continue;
631616
}
617+
618+
spin_lock_bh(&ipsec->xs->lock);
632619
ipsec->xs->xso.real_dev = NULL;
633-
real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev,
634-
ipsec->xs);
620+
/* Don't double delete states killed by the user. */
621+
if (ipsec->xs->km.state != XFRM_STATE_DEAD)
622+
real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev,
623+
ipsec->xs);
624+
spin_unlock_bh(&ipsec->xs->lock);
625+
635626
if (real_dev->xfrmdev_ops->xdo_dev_state_free)
636627
real_dev->xfrmdev_ops->xdo_dev_state_free(real_dev,
637628
ipsec->xs);
@@ -643,34 +634,33 @@ static void bond_ipsec_free_sa(struct net_device *bond_dev,
643634
struct xfrm_state *xs)
644635
{
645636
struct net_device *real_dev;
646-
netdevice_tracker tracker;
637+
struct bond_ipsec *ipsec;
647638
struct bonding *bond;
648-
struct slave *slave;
649639

650640
if (!bond_dev)
651641
return;
652642

653-
rcu_read_lock();
654643
bond = netdev_priv(bond_dev);
655-
slave = rcu_dereference(bond->curr_active_slave);
656-
real_dev = slave ? slave->dev : NULL;
657-
netdev_hold(real_dev, &tracker, GFP_ATOMIC);
658-
rcu_read_unlock();
659-
660-
if (!slave)
661-
goto out;
662644

645+
mutex_lock(&bond->ipsec_lock);
663646
if (!xs->xso.real_dev)
664647
goto out;
665648

666-
WARN_ON(xs->xso.real_dev != real_dev);
649+
real_dev = xs->xso.real_dev;
667650

668651
xs->xso.real_dev = NULL;
669-
if (real_dev && real_dev->xfrmdev_ops &&
652+
if (real_dev->xfrmdev_ops &&
670653
real_dev->xfrmdev_ops->xdo_dev_state_free)
671654
real_dev->xfrmdev_ops->xdo_dev_state_free(real_dev, xs);
672655
out:
673-
netdev_put(real_dev, &tracker);
656+
list_for_each_entry(ipsec, &bond->ipsec_list, list) {
657+
if (ipsec->xs == xs) {
658+
list_del(&ipsec->list);
659+
kfree(ipsec);
660+
break;
661+
}
662+
}
663+
mutex_unlock(&bond->ipsec_lock);
674664
}
675665

676666
/**

include/net/xfrm.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,11 @@ struct xfrm_dev_offload {
154154
*/
155155
struct net_device *dev;
156156
netdevice_tracker dev_tracker;
157-
/* This is a private pointer used by the bonding driver.
158-
* Device drivers should not use it.
157+
/* This is a private pointer used by the bonding driver (and eventually
158+
* should be moved there). Device drivers should not use it.
159+
* Protected by xfrm_state.lock AND bond.ipsec_lock in most cases,
160+
* except in the .xdo_dev_state_del() flow, where only xfrm_state.lock
161+
* is held.
159162
*/
160163
struct net_device *real_dev;
161164
unsigned long offload_handle;

0 commit comments

Comments
 (0)