Skip to content

Commit bba1f03

Browse files
committed
Introduce the concept of inhibiting blockers
Inhibiting blockers will only block the final reboot command not the draining of nodes. Signed-off-by: Georg Doser <georg.doser@nitrado.net>
1 parent 5b2d4d4 commit bba1f03

File tree

2 files changed

+25
-16
lines changed

2 files changed

+25
-16
lines changed

cmd/kured/main.go

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,9 @@ var (
7070
messageTemplateDrain string
7171
messageTemplateReboot string
7272
messageTemplateUncordon string
73-
podSelectors []string
73+
blockingPodSelectors []string
74+
inhibitingPodSelectors []string
75+
inhibitingNodeAnnotations []string
7476
blockingNodeAnnotations []string
7577
rebootCommand string
7678
rebootSignal int
@@ -181,10 +183,14 @@ func main() {
181183
"message template used to notify about a node being drained")
182184
flag.StringVar(&messageTemplateReboot, "message-template-reboot", "Rebooting node %s",
183185
"message template used to notify about a node being rebooted")
184-
flag.StringArrayVar(&podSelectors, "blocking-pod-selector", nil,
186+
flag.StringArrayVar(&blockingPodSelectors, "blocking-pod-selector", nil,
185187
"label selector identifying pods whose presence should prevent reboots")
188+
flag.StringArrayVar(&inhibitingPodSelectors, "inhibiting-pod-selector", nil,
189+
"label selector identifying pods whose presence should prevent final reboots not draining of nodes")
186190
flag.StringArrayVar(&blockingNodeAnnotations, "blocking-node-annotation", nil,
187-
"node annotation whose presence should prevent final reboots not draining")
191+
"node annotation whose presence should prevent node reboots")
192+
flag.StringArrayVar(&inhibitingNodeAnnotations, "inhibiting-node-annotation", nil,
193+
"node annotation whose presence should prevent final reboots not draining of nodes")
188194
flag.StringSliceVar(&rebootDays, "reboot-days", timewindow.EveryDay,
189195
"schedule reboot on these days")
190196
flag.StringVar(&rebootStart, "start-time", "0:00",
@@ -228,7 +234,7 @@ func main() {
228234
log.Infof("PreferNoSchedule taint: %s", preferNoScheduleTaintName)
229235

230236
// This should be printed from blocker list instead of only blocking pod selectors
231-
log.Infof("Blocking Pod Selectors: %v", podSelectors)
237+
log.Infof("Blocking Pod Selectors: %v", blockingPodSelectors)
232238

233239
log.Infof("Reboot period %v", period)
234240
log.Infof("Concurrency: %v", concurrency)
@@ -269,17 +275,19 @@ func main() {
269275
if prometheusURL != "" {
270276
blockCheckers = append(blockCheckers, blockers.NewPrometheusBlockingChecker(papi.Config{Address: prometheusURL}, alertFilter.Regexp, alertFiringOnly, alertFilterMatchOnly))
271277
}
272-
if podSelectors != nil {
273-
blockCheckers = append(blockCheckers, blockers.NewKubernetesBlockingChecker(client, nodeID, podSelectors))
278+
if blockingPodSelectors != nil {
279+
blockCheckers = append(blockCheckers, blockers.NewKubernetesBlockingChecker(client, nodeID, blockingPodSelectors))
274280
}
275281

276282
// These prevent the rebooter to reboot the node, it will still drain the node.
277283
// This is useful for cases in which you want to wait for a condition that is only met after draining the node.
278-
log.Info("Setting up rebooter blockers")
279-
var rebooterBlockCheckers []blockers.RebootBlocker
284+
var inhibitingBlockCheckers []blockers.RebootBlocker
280285
if blockingNodeAnnotations != nil {
281286
log.Info("Setup rebooter blocker for node annotations")
282-
rebooterBlockCheckers = append(rebooterBlockCheckers, blockers.NewNodeBlockingChecker(client, nodeID, blockingNodeAnnotations))
287+
inhibitingBlockCheckers = append(inhibitingBlockCheckers, blockers.NewNodeBlockingChecker(client, nodeID, blockingNodeAnnotations))
288+
}
289+
if inhibitingPodSelectors != nil {
290+
inhibitingBlockCheckers = append(inhibitingBlockCheckers, blockers.NewKubernetesBlockingChecker(client, nodeID, inhibitingPodSelectors))
283291
}
284292

285293
log.Infof("Lock Annotation: %s/%s:%s", dsNamespace, dsName, lockAnnotation)
@@ -295,7 +303,7 @@ func main() {
295303
}
296304
lock := daemonsetlock.New(client, nodeID, dsNamespace, dsName, lockAnnotation, lockTTL, concurrency, lockReleaseDelay)
297305

298-
go rebootAsRequired(nodeID, rebooter, rebootChecker, blockCheckers, rebooterBlockCheckers, window, lock, client)
306+
go rebootAsRequired(nodeID, rebooter, rebootChecker, blockCheckers, inhibitingBlockCheckers, window, lock, client)
299307
go maintainRebootRequiredMetric(nodeID, rebootChecker)
300308

301309
http.Handle("/metrics", promhttp.Handler())
@@ -566,7 +574,7 @@ func updateNodeLabels(client *kubernetes.Clientset, node *v1.Node, labels []stri
566574
}
567575
}
568576

569-
func rebootAsRequired(nodeID string, rebooter reboot.Rebooter, checker checkers.Checker, blockCheckers []blockers.RebootBlocker, rebooterBlockCheckers []blockers.RebootBlocker, window *timewindow.TimeWindow, lock daemonsetlock.Lock, client *kubernetes.Clientset) {
577+
func rebootAsRequired(nodeID string, rebooter reboot.Rebooter, checker checkers.Checker, blockCheckers []blockers.RebootBlocker, inhibitingBlockCheckers []blockers.RebootBlocker, window *timewindow.TimeWindow, lock daemonsetlock.Lock, client *kubernetes.Clientset) {
570578

571579
source := rand.NewSource(time.Now().UnixNano())
572580
tick := delaytick.New(source, 1*time.Minute)
@@ -703,6 +711,11 @@ func rebootAsRequired(nodeID string, rebooter reboot.Rebooter, checker checkers.
703711
}
704712
}
705713

714+
if blockers.RebootBlocked(inhibitingBlockCheckers...) {
715+
log.Info("Reboot required, but blocked by inhibiting blockers")
716+
continue
717+
}
718+
706719
if rebootDelay > 0 {
707720
log.Infof("Delaying reboot for %v", rebootDelay)
708721
time.Sleep(rebootDelay)
@@ -714,10 +727,6 @@ func rebootAsRequired(nodeID string, rebooter reboot.Rebooter, checker checkers.
714727
}
715728
}
716729

717-
if blockers.RebootBlocked(rebooterBlockCheckers...) {
718-
log.Info("Reboot required, but blocked by rebooter blockers")
719-
continue
720-
}
721730
log.Infof("Triggering reboot for node %v", nodeID)
722731

723732
err = rebooter.Reboot()

pkg/blockers/nodeannotation.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ func NewNodeBlockingChecker(client *kubernetes.Clientset, nodename string, nodeA
3333

3434
// IsBlocked for the NodeBlockingChecker will check if a pod, for the node, is preventing
3535
// the reboot. It will warn in the logs about blocking, but does not return an error.
36-
func (kb NodeBlockingChecker) IsBlocked() bool {
36+
func (kb *NodeBlockingChecker) IsBlocked() bool {
3737
node, err := kb.client.CoreV1().Nodes().Get(context.TODO(), kb.nodeName, metav1.GetOptions{})
3838
if err != nil {
3939
log.Warnf("Reboot blocked: node query error: %v", err)

0 commit comments

Comments
 (0)