Skip to content

Commit 61301aa

Browse files
authored
Merge pull request #2131 from marquiz/devel/api-controller-memleak
nfd-master: fix memory leak when leader election is enabled
2 parents 2612b84 + a595439 commit 61301aa

File tree

1 file changed

+18
-4
lines changed

1 file changed

+18
-4
lines changed

pkg/nfd-master/nfd-master.go

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,9 @@ type nfdMaster struct {
149149
updaterPool *updaterPool
150150
deniedNs
151151
config *NFDConfig
152+
153+
// isLeader indicates if this instance is the leader, changing dynamically
154+
isLeader bool
152155
}
153156

154157
// NewNfdMaster creates a new NfdMaster server instance.
@@ -305,10 +308,11 @@ func (m *nfdMaster) Run() error {
305308

306309
// Run updater that handles events from the nfd CRD API.
307310
if m.args.EnableLeaderElection {
308-
go m.nfdAPIUpdateHandlerWithLeaderElection()
311+
go m.startLeaderElectionHandler()
309312
} else {
310-
go m.nfdAPIUpdateHandler()
313+
m.isLeader = true
311314
}
315+
go m.nfdAPIUpdateHandler()
312316

313317
// Register health probe (at this point we're "ready and live")
314318
httpMux.HandleFunc("/healthz", m.Healthz)
@@ -349,6 +353,12 @@ func (m *nfdMaster) nfdAPIUpdateHandler() {
349353
case nodeFeatureGroupName := <-m.nfdController.updateNodeFeatureGroupChan:
350354
nodeFeatureGroup[nodeFeatureGroupName] = struct{}{}
351355
case <-rateLimit:
356+
// If we're not the leader, don't do anything, sleep a bit longer
357+
if !m.isLeader {
358+
rateLimit = time.After(5 * time.Second)
359+
break
360+
}
361+
352362
// NodeFeature
353363
errUpdateAll := false
354364
if updateAll {
@@ -1289,7 +1299,7 @@ func (m *nfdMaster) startNfdApiController() error {
12891299
return nil
12901300
}
12911301

1292-
func (m *nfdMaster) nfdAPIUpdateHandlerWithLeaderElection() {
1302+
func (m *nfdMaster) startLeaderElectionHandler() {
12931303
ctx := context.Background()
12941304
lock := &resourcelock.LeaseLock{
12951305
LeaseMeta: metav1.ObjectMeta{
@@ -1310,11 +1320,15 @@ func (m *nfdMaster) nfdAPIUpdateHandlerWithLeaderElection() {
13101320
RenewDeadline: m.config.LeaderElection.RenewDeadline.Duration,
13111321
Callbacks: leaderelection.LeaderCallbacks{
13121322
OnStartedLeading: func(_ context.Context) {
1313-
m.nfdAPIUpdateHandler()
1323+
m.isLeader = true
13141324
},
13151325
OnStoppedLeading: func() {
13161326
// We lost the lock.
13171327
klog.InfoS("leaderelection lock was lost")
1328+
// We stop (i.e. exit), makes sure that in-flight
1329+
// requests/re-tries will be stopped TODO: more graceful
1330+
// handling that does not exit the pod (set m.isLeader to false
1331+
// and flush updater queue and flush updater queues...)
13181332
m.Stop()
13191333
},
13201334
},

0 commit comments

Comments
 (0)