@@ -51,17 +51,18 @@ const (
5151 eventReasonRemediationSkipped = "RemediationSkipped"
5252
5353 // remediation
54- eventReasonAddFinalizer = "AddFinalizer"
55- eventReasonMarkUnschedulable = "MarkUnschedulable"
56- eventReasonAddNoExecute = "AddNoExecute"
57- eventReasonAddOutOfService = "AddOutOfService"
58- eventReasonUpdateTimeAssumedRebooted = "UpdateTimeAssumedRebooted"
59- eventReasonDeleteResources = "DeleteResources"
60- eventReasonMarkSchedulable = "MarkNodeSchedulable"
61- eventReasonRemoveFinalizer = "RemoveFinalizer"
62- eventReasonRemoveNoExecute = "RemoveNoExecuteTaint"
63- eventReasonRemoveOutOfService = "RemoveOutOfService"
64- eventReasonNodeReboot = "NodeReboot"
54+ eventReasonAddFinalizer = "AddFinalizer"
55+ eventReasonMarkUnschedulable = "MarkUnschedulable"
56+ eventReasonAddNoExecute = "AddNoExecute"
57+ eventReasonAddOutOfService = "AddOutOfService"
58+ eventReasonUpdateTimeAssumedRebooted = "UpdateTimeAssumedRebooted"
59+ eventReasonDeleteResources = "DeleteResources"
60+ eventReasonMarkSchedulable = "MarkNodeSchedulable"
61+ eventReasonRemoveFinalizer = "RemoveFinalizer"
62+ eventReasonRemoveNoExecute = "RemoveNoExecuteTaint"
63+ eventReasonRemoveOutOfService = "RemoveOutOfService"
64+ eventReasonNodeReboot = "NodeReboot"
65+ eventReasonOutOfServiceTimestampExpired = "OutOfServiceTimestampExpired"
6566)
6667
6768var (
8182 Value : "nodeshutdown" ,
8283 Effect : v1 .TaintEffectNoExecute ,
8384 }
85+
86+ // OutOfServiceTimeoutDuration - time after which out-of-service taint is automatically removed
87+ OutOfServiceTimeoutDuration = 1 * time .Minute
8488)
8589
8690type conditionReason string
@@ -430,9 +434,18 @@ func (r *SelfNodeRemediationReconciler) useOutOfServiceTaint(node *v1.Node, snr
430434 isExpired , timeLeft := r .isResourceDeletionExpired (snr )
431435 if ! isExpired {
432436 return timeLeft , nil
437+ } else if snr .GetDeletionTimestamp () != nil { // Time expired and node is healthy
438+ err := r .removeOutOfServiceTaint (node )
439+ if err != nil {
440+ return 0 , err
441+ }
442+ // Emit an event about the timeout expiration
443+ events .WarningEvent (r .Recorder , node , eventReasonOutOfServiceTimestampExpired ,
444+ "Out-of-service taint automatically removed due to timeout expiration on a healthy node" )
445+ return 0 , nil
446+ } else { // if the timer is expired, but the node is still unhealthy exponential backoff is triggered
447+ return 0 , errors .New ("Not ready to delete out-of-service taint" )
433448 }
434- // if the timer is expired, exponential backoff is triggered
435- return 0 , errors .New ("Not ready to delete out-of-service taint" )
436449 }
437450
438451 if err := r .removeOutOfServiceTaint (node ); err != nil {
@@ -902,7 +915,7 @@ func (r *SelfNodeRemediationReconciler) isPodTerminating(pod *v1.Pod) bool {
902915}
903916
904917func (r * SelfNodeRemediationReconciler ) isResourceDeletionExpired (snr * v1alpha1.SelfNodeRemediation ) (bool , time.Duration ) {
905- waitTime := snr .Status .TimeAssumedRebooted .Add (300 * time . Second )
918+ waitTime := snr .Status .TimeAssumedRebooted .Add (OutOfServiceTimeoutDuration )
906919
907920 if waitTime .After (time .Now ()) {
908921 return false , 5 * time .Second
0 commit comments