Skip to content

Commit 48b7e62

Browse files
authored
Merge pull request #2136 from marquiz/release-0.16
[release-0.16] nfd-master: fix memory leak when leader election is enabled
2 parents 90e8cee + 3c81c55 commit 48b7e62

File tree

1 file changed

+18
-4
lines changed

1 file changed

+18
-4
lines changed

pkg/nfd-master/nfd-master.go

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,9 @@ type nfdMaster struct {
161161
updaterPool *updaterPool
162162
deniedNs
163163
config *NFDConfig
164+
165+
// isLeader indicates if this instance is the leader, changing dynamically
166+
isLeader bool
164167
}
165168

166169
// NewNfdMaster creates a new NfdMaster server instance.
@@ -344,10 +347,11 @@ func (m *nfdMaster) Run() error {
344347
// Run updater that handles events from the nfd CRD API.
345348
if m.nfdController != nil {
346349
if m.args.EnableLeaderElection {
347-
go m.nfdAPIUpdateHandlerWithLeaderElection()
350+
go m.startLeaderElectionHandler()
348351
} else {
349-
go m.nfdAPIUpdateHandler()
352+
m.isLeader = true
350353
}
354+
go m.nfdAPIUpdateHandler()
351355
}
352356

353357
// Start gRPC server for liveness probe (at this point we're "live")
@@ -506,6 +510,12 @@ func (m *nfdMaster) nfdAPIUpdateHandler() {
506510
case nodeFeatureGroupName := <-m.nfdController.updateNodeFeatureGroupChan:
507511
nodeFeatureGroup[nodeFeatureGroupName] = struct{}{}
508512
case <-rateLimit:
513+
// If we're not the leader, don't do anything, sleep a bit longer
514+
if !m.isLeader {
515+
rateLimit = time.After(5 * time.Second)
516+
break
517+
}
518+
509519
// NodeFeature
510520
errUpdateAll := false
511521
if updateAll {
@@ -1522,7 +1532,7 @@ func (m *nfdMaster) startNfdApiController() error {
15221532
return nil
15231533
}
15241534

1525-
func (m *nfdMaster) nfdAPIUpdateHandlerWithLeaderElection() {
1535+
func (m *nfdMaster) startLeaderElectionHandler() {
15261536
ctx := context.Background()
15271537
lock := &resourcelock.LeaseLock{
15281538
LeaseMeta: metav1.ObjectMeta{
@@ -1543,11 +1553,15 @@ func (m *nfdMaster) nfdAPIUpdateHandlerWithLeaderElection() {
15431553
RenewDeadline: m.config.LeaderElection.RenewDeadline.Duration,
15441554
Callbacks: leaderelection.LeaderCallbacks{
15451555
OnStartedLeading: func(_ context.Context) {
1546-
m.nfdAPIUpdateHandler()
1556+
m.isLeader = true
15471557
},
15481558
OnStoppedLeading: func() {
15491559
// We lost the lock.
15501560
klog.InfoS("leaderelection lock was lost")
1561+
// We stop (i.e. exit), makes sure that in-flight
1562+
// requests/re-tries will be stopped TODO: more graceful
1563+
// handling that does not exit the pod (set m.isLeader to false
1564+
// and flush updater queue and flush updater queues...)
15511565
m.Stop()
15521566
},
15531567
},

0 commit comments

Comments
 (0)