Skip to content

Commit c3ddbad

Browse files
kyrtapzjcaamano
authored andcommitted
Prevent UDN deletion race condition by checking controller state
Adds GetActiveNetwork method to verify network controllers are stopped before allowing UDN/CUDN deletion, preventing race conditions in a scenario where a network is recreated before the cluster-manager controller stopped running. Signed-off-by: Patryk Diak <[email protected]>
1 parent 8f7fc5a commit c3ddbad

File tree

6 files changed

+49
-1
lines changed

6 files changed

+49
-1
lines changed

go-controller/pkg/clustermanager/clustermanager.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ func NewClusterManager(
146146
ovnClient.UserDefinedNetworkClient,
147147
wf.UserDefinedNetworkInformer(), wf.ClusterUserDefinedNetworkInformer(),
148148
udntemplate.RenderNetAttachDefManifest,
149+
cm.networkManager.Interface(),
149150
wf.PodCoreInformer(),
150151
wf.NamespaceInformer(),
151152
cm.recorder,

go-controller/pkg/clustermanager/userdefinednetwork/controller.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import (
3838
userdefinednetworkinformer "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/informers/externalversions/userdefinednetwork/v1"
3939
userdefinednetworklister "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/listers/userdefinednetwork/v1"
4040
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics"
41+
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager"
4142
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"
4243
)
4344

@@ -72,6 +73,8 @@ type Controller struct {
7273
// trying to create an object with the same name.
7374
createNetworkLock sync.Mutex
7475

76+
networkManager networkmanager.Interface
77+
7578
udnClient userdefinednetworkclientset.Interface
7679
udnLister userdefinednetworklister.UserDefinedNetworkLister
7780
cudnLister userdefinednetworklister.ClusterUserDefinedNetworkLister
@@ -93,6 +96,7 @@ func New(
9396
udnInformer userdefinednetworkinformer.UserDefinedNetworkInformer,
9497
cudnInformer userdefinednetworkinformer.ClusterUserDefinedNetworkInformer,
9598
renderNadFn RenderNetAttachDefManifest,
99+
networkManager networkmanager.Interface,
96100
podInformer corev1informer.PodInformer,
97101
namespaceInformer corev1informer.NamespaceInformer,
98102
eventRecorder record.EventRecorder,
@@ -108,6 +112,7 @@ func New(
108112
renderNadFn: renderNadFn,
109113
podInformer: podInformer,
110114
namespaceInformer: namespaceInformer,
115+
networkManager: networkManager,
111116
networkInUseRequeueInterval: defaultNetworkInUseCheckInterval,
112117
namespaceTracker: map[string]sets.Set[string]{},
113118
eventRecorder: eventRecorder,
@@ -410,6 +415,12 @@ func (c *Controller) syncUserDefinedNetwork(udn *userdefinednetworkv1.UserDefine
410415
return nil, fmt.Errorf("failed to delete NetworkAttachmentDefinition [%s/%s]: %w", udn.Namespace, udn.Name, err)
411416
}
412417

418+
// Ensure that the network controller is stopped(GetActiveNetwork returns nil) before allowing the UDN
419+
// to be removed.
420+
if c.networkManager.GetActiveNetwork(util.GenerateUDNNetworkName(udn.Namespace, udn.Name)) != nil {
421+
return nil, &networkInUseError{err: fmt.Errorf("cannot remove UDN, controller for network %s is still running", util.GenerateUDNNetworkName(udn.Namespace, udn.Name))}
422+
}
423+
413424
controllerutil.RemoveFinalizer(udn, template.FinalizerUserDefinedNetwork)
414425
udn, err := c.udnClient.K8sV1().UserDefinedNetworks(udn.Namespace).Update(context.Background(), udn, metav1.UpdateOptions{})
415426
if err != nil {
@@ -582,6 +593,12 @@ func (c *Controller) syncClusterUDN(cudn *userdefinednetworkv1.ClusterUserDefine
582593
return nil, errors.Join(errs...)
583594
}
584595

596+
// Ensure that the network controller is stopped(GetActiveNetwork returns nil) before allowing the cUDN
597+
// to be removed.
598+
if c.networkManager.GetActiveNetwork(util.GenerateCUDNNetworkName(cudn.Name)) != nil {
599+
return nil, &networkInUseError{err: fmt.Errorf("cannot remove cluster UDN, controller for network %s is still running", util.GenerateCUDNNetworkName(cudn.Name))}
600+
}
601+
585602
var err error
586603
controllerutil.RemoveFinalizer(cudn, template.FinalizerUserDefinedNetwork)
587604
cudn, err = c.udnClient.K8sV1().ClusterUserDefinedNetworks().Update(context.Background(), cudn, metav1.UpdateOptions{})

go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ import (
2626
udnclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned"
2727
udnfakeclient "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1/apis/clientset/versioned/fake"
2828
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory"
29+
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager"
30+
nmtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/networkmanager"
2931
ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types"
3032
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"
3133

@@ -59,9 +61,11 @@ var _ = Describe("User Defined Network Controller", func() {
5961
Expect(err).NotTo(HaveOccurred())
6062
Expect(f.Start()).To(Succeed())
6163

64+
networkManager, err := networkmanager.NewForCluster(&nmtest.FakeControllerManager{}, f, cs, nil)
65+
Expect(err).NotTo(HaveOccurred())
6266
return New(cs.NetworkAttchDefClient, f.NADInformer(),
6367
cs.UserDefinedNetworkClient, f.UserDefinedNetworkInformer(), f.ClusterUserDefinedNetworkInformer(),
64-
renderNADStub, f.PodCoreInformer(), f.NamespaceInformer(), nil,
68+
renderNADStub, networkManager.Interface(), f.PodCoreInformer(), f.NamespaceInformer(), nil,
6569
)
6670
}
6771

go-controller/pkg/networkmanager/api.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ type Interface interface {
3939
// GetNetwork returns the network of the given name or nil if unknown
4040
GetNetwork(name string) util.NetInfo
4141

42+
// GetActiveNetwork returns the NetInfo currently held by the controller for the given network.
43+
// This may differ from the NetInfo returned by GetNetwork which reflects the API state.
44+
// Returns nil if there is no running controller for the provided network.
45+
GetActiveNetwork(network string) util.NetInfo
46+
4247
// DoWithLock takes care of locking and unlocking while iterating over all role primary user defined networks.
4348
DoWithLock(f func(network util.NetInfo) error) error
4449
GetActiveNetworkNamespaces(networkName string) ([]string, error)
@@ -204,4 +209,11 @@ func (nm defaultNetworkManager) GetActiveNetworkNamespaces(_ string) ([]string,
204209
return []string{"default"}, nil
205210
}
206211

212+
func (nm defaultNetworkManager) GetActiveNetwork(network string) util.NetInfo {
213+
if network != types.DefaultNetworkName {
214+
return nil
215+
}
216+
return &util.DefaultNetInfo{}
217+
}
218+
207219
var def Controller = &defaultNetworkManager{}

go-controller/pkg/networkmanager/nad_controller.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -669,3 +669,13 @@ func (c *nadController) handleNetworkID(old util.NetInfo, new util.MutableNetInf
669669

670670
return nil
671671
}
672+
673+
func (c *nadController) GetActiveNetwork(network string) util.NetInfo {
674+
c.RLock()
675+
defer c.RUnlock()
676+
state := c.networkController.getNetworkState(network)
677+
if state == nil {
678+
return nil
679+
}
680+
return state.controller
681+
}

go-controller/pkg/testing/networkmanager/fake.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ func (fnm *FakeNetworkManager) GetNetwork(networkName string) util.NetInfo {
7878
return &util.DefaultNetInfo{}
7979
}
8080

81+
func (fnm *FakeNetworkManager) GetActiveNetwork(networkName string) util.NetInfo {
82+
return fnm.GetNetwork(networkName)
83+
}
84+
8185
func (fnm *FakeNetworkManager) GetActiveNetworkNamespaces(networkName string) ([]string, error) {
8286
namespaces := make([]string, 0)
8387
for namespaceName, primaryNAD := range fnm.PrimaryNetworks {

0 commit comments

Comments
 (0)