Skip to content

Commit e74eb40

Browse files
authored
Merge pull request #5339 from npinaeva/remove-handlers
scale test fixes: memory leak + changed ofport
2 parents fcf5be2 + 84ed994 commit e74eb40

File tree

3 files changed

+40
-9
lines changed

3 files changed

+40
-9
lines changed

go-controller/pkg/node/openflow_manager.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -275,12 +275,12 @@ func checkPorts(netConfigs []*bridgeUDNConfiguration, physIntf, ofPortPhys strin
275275

276276
}
277277
if netConfig.ofPortPatch != curOfportPatch {
278-
if netConfig.isDefaultNetwork() || curOfportPatch != "" {
278+
if netConfig.isDefaultNetwork() {
279279
klog.Errorf("Fatal error: patch port %s ofport changed from %s to %s",
280280
netConfig.patchPort, netConfig.ofPortPatch, curOfportPatch)
281281
os.Exit(1)
282282
} else {
283-
klog.Warningf("Patch port %s removed for existing network", netConfig.patchPort)
283+
klog.Warningf("UDN patch port %s changed for existing network from %v to %v. Expecting bridge config update.", netConfig.patchPort, netConfig.ofPortPatch, curOfportPatch)
284284
}
285285
}
286286
}

go-controller/pkg/ovn/controller/services/services_controller.go

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,11 @@ type Controller struct {
168168
useTemplates bool
169169

170170
netInfo util.NetInfo
171+
172+
// handlers stored for shutdown
173+
nodeHandler cache.ResourceEventHandlerRegistration
174+
svcHandler cache.ResourceEventHandlerRegistration
175+
endpointHandler cache.ResourceEventHandlerRegistration
171176
}
172177

173178
// Run will not return until stopCh is closed. workers determines how many
@@ -180,15 +185,15 @@ func (c *Controller) Run(workers int, stopCh <-chan struct{}, wg *sync.WaitGroup
180185
// wait until we're told to stop
181186
<-stopCh
182187

183-
klog.Infof("Shutting down controller %s for network=%s", controllerName, c.netInfo.GetNetworkName())
184-
c.queue.ShutDown()
188+
c.Cleanup()
185189
}()
186190

187191
c.useLBGroups = useLBGroups
188192
c.useTemplates = useTemplates
189193
klog.Infof("Starting controller %s for network=%s", controllerName, c.netInfo.GetNetworkName())
190194

191-
nodeHandler, err := c.nodeTracker.Start(c.nodeInformer)
195+
var err error
196+
c.nodeHandler, err = c.nodeTracker.Start(c.nodeInformer)
192197
if err != nil {
193198
return err
194199
}
@@ -197,12 +202,12 @@ func (c *Controller) Run(workers int, stopCh <-chan struct{}, wg *sync.WaitGroup
197202
c.startupDoneLock.Lock()
198203
c.startupDone = false
199204
c.startupDoneLock.Unlock()
200-
if !util.WaitForHandlerSyncWithTimeout(nodeControllerName, stopCh, types.HandlerSyncTimeout, nodeHandler.HasSynced) {
205+
if !util.WaitForHandlerSyncWithTimeout(nodeControllerName, stopCh, types.HandlerSyncTimeout, c.nodeHandler.HasSynced) {
201206
return fmt.Errorf("error syncing node tracker handler")
202207
}
203208

204209
klog.Infof("Setting up event handlers for services for network=%s", c.netInfo.GetNetworkName())
205-
svcHandler, err := c.serviceInformer.Informer().AddEventHandler(factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{
210+
c.svcHandler, err = c.serviceInformer.Informer().AddEventHandler(factory.WithUpdateHandlingForObjReplace(cache.ResourceEventHandlerFuncs{
206211
AddFunc: c.onServiceAdd,
207212
UpdateFunc: c.onServiceUpdate,
208213
DeleteFunc: c.onServiceDelete,
@@ -212,7 +217,7 @@ func (c *Controller) Run(workers int, stopCh <-chan struct{}, wg *sync.WaitGroup
212217
}
213218

214219
klog.Infof("Setting up event handlers for endpoint slices for network=%s", c.netInfo.GetNetworkName())
215-
endpointHandler, err := c.endpointSliceInformer.Informer().AddEventHandler(factory.WithUpdateHandlingForObjReplace(
220+
c.endpointHandler, err = c.endpointSliceInformer.Informer().AddEventHandler(factory.WithUpdateHandlingForObjReplace(
216221
// Filter out endpointslices that don't belong to this network (i.e. keep only kube-generated endpointslices if
217222
// on default network, keep only mirrored endpointslices for this network if on UDN)
218223
util.GetEndpointSlicesEventHandlerForNetwork(
@@ -227,7 +232,7 @@ func (c *Controller) Run(workers int, stopCh <-chan struct{}, wg *sync.WaitGroup
227232
}
228233

229234
klog.Infof("Waiting for service and endpoint handlers to sync for network=%s", c.netInfo.GetNetworkName())
230-
if !util.WaitForHandlerSyncWithTimeout(controllerName, stopCh, types.HandlerSyncTimeout, svcHandler.HasSynced, endpointHandler.HasSynced) {
235+
if !util.WaitForHandlerSyncWithTimeout(controllerName, stopCh, types.HandlerSyncTimeout, c.svcHandler.HasSynced, c.endpointHandler.HasSynced) {
231236
return fmt.Errorf("error syncing service and endpoint handlers")
232237
}
233238

@@ -255,6 +260,27 @@ func (c *Controller) Run(workers int, stopCh <-chan struct{}, wg *sync.WaitGroup
255260
return nil
256261
}
257262

263+
func (c *Controller) Cleanup() {
264+
klog.Infof("Shutting down controller %s for network=%s", controllerName, c.netInfo.GetNetworkName())
265+
c.queue.ShutDown()
266+
267+
if c.nodeHandler != nil {
268+
if err := c.nodeInformer.Informer().RemoveEventHandler(c.nodeHandler); err != nil {
269+
klog.Errorf("Failed to remove node handler for network %s: %v", c.netInfo.GetNetworkName(), err)
270+
}
271+
}
272+
if c.svcHandler != nil {
273+
if err := c.serviceInformer.Informer().RemoveEventHandler(c.svcHandler); err != nil {
274+
klog.Errorf("Failed to remove service handler for network %s: %v", c.netInfo.GetNetworkName(), err)
275+
}
276+
}
277+
if c.endpointHandler != nil {
278+
if err := c.endpointSliceInformer.Informer().RemoveEventHandler(c.endpointHandler); err != nil {
279+
klog.Errorf("Failed to remove endpoint handler for network %s: %v", c.netInfo.GetNetworkName(), err)
280+
}
281+
}
282+
}
283+
258284
// worker runs a worker thread that just dequeues items, processes them, and
259285
// marks them done. You may run as many of these in parallel as you wish; the
260286
// workqueue guarantees that they will not end up processing the same service

go-controller/pkg/util/multi_network.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,11 @@ func (nInfo *secondaryNetInfo) canReconcile(other NetInfo) bool {
822822
if nInfo == nil && other == nil {
823823
return true
824824
}
825+
// if network ID has changed, it means the network was re-created, and all controllers
826+
// should execute delete+create instead of update
827+
if nInfo.GetNetworkID() != types.InvalidID && other.GetNetworkID() != types.InvalidID && nInfo.GetNetworkID() != other.GetNetworkID() {
828+
return false
829+
}
825830
if nInfo.netName != other.GetNetworkName() {
826831
return false
827832
}

0 commit comments

Comments
 (0)