@@ -149,6 +149,9 @@ type nfdMaster struct {
149149 updaterPool * updaterPool
150150 deniedNs
151151 config * NFDConfig
152+
153+ // isLeader indicates if this instance is the leader, changing dynamically
154+ isLeader bool
152155}
153156
154157// NewNfdMaster creates a new NfdMaster server instance.
@@ -305,10 +308,11 @@ func (m *nfdMaster) Run() error {
305308
306309 // Run updater that handles events from the nfd CRD API.
307310 if m .args .EnableLeaderElection {
308- go m .nfdAPIUpdateHandlerWithLeaderElection ()
311+ go m .startLeaderElectionHandler ()
309312 } else {
310- go m . nfdAPIUpdateHandler ()
313+ m . isLeader = true
311314 }
315+ go m .nfdAPIUpdateHandler ()
312316
313317 // Register health probe (at this point we're "ready and live")
314318 httpMux .HandleFunc ("/healthz" , m .Healthz )
@@ -349,6 +353,12 @@ func (m *nfdMaster) nfdAPIUpdateHandler() {
349353 case nodeFeatureGroupName := <- m .nfdController .updateNodeFeatureGroupChan :
350354 nodeFeatureGroup [nodeFeatureGroupName ] = struct {}{}
351355 case <- rateLimit :
356+ // If we're not the leader, don't do anything, sleep a bit longer
357+ if ! m .isLeader {
358+ rateLimit = time .After (5 * time .Second )
359+ break
360+ }
361+
352362 // NodeFeature
353363 errUpdateAll := false
354364 if updateAll {
@@ -1289,7 +1299,7 @@ func (m *nfdMaster) startNfdApiController() error {
12891299 return nil
12901300}
12911301
1292- func (m * nfdMaster ) nfdAPIUpdateHandlerWithLeaderElection () {
1302+ func (m * nfdMaster ) startLeaderElectionHandler () {
12931303 ctx := context .Background ()
12941304 lock := & resourcelock.LeaseLock {
12951305 LeaseMeta : metav1.ObjectMeta {
@@ -1310,11 +1320,15 @@ func (m *nfdMaster) nfdAPIUpdateHandlerWithLeaderElection() {
13101320 RenewDeadline : m .config .LeaderElection .RenewDeadline .Duration ,
13111321 Callbacks : leaderelection.LeaderCallbacks {
13121322 OnStartedLeading : func (_ context.Context ) {
1313- m .nfdAPIUpdateHandler ()
1323+ m .isLeader = true
13141324 },
13151325 OnStoppedLeading : func () {
13161326 // We lost the lock.
13171327 klog .InfoS ("leaderelection lock was lost" )
1328+ // We stop (i.e. exit), makes sure that in-flight
1329+ // requests/re-tries will be stopped TODO: more graceful
1330+ // handling that does not exit the pod (set m.isLeader to false
1331+ // and flush updater queue and flush updater queues...)
13181332 m .Stop ()
13191333 },
13201334 },
0 commit comments