Skip to content

Commit d822e97

Browse files
authored
Merge pull request kubernetes#3653 from towca/jtuznik/unready-taints
Include taints by condition when determining if a node is unready/still starting
2 parents 34a3f6d + 6a528b4 commit d822e97

File tree

3 files changed

+71
-25
lines changed

3 files changed

+71
-25
lines changed

cluster-autoscaler/clusterstate/clusterstate.go

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -860,22 +860,34 @@ func buildScaleDownStatusClusterwide(candidates map[string][]string, lastProbed
860860
return condition
861861
}
862862

863+
func hasTaint(node *apiv1.Node, taintKey string) bool {
864+
for _, taint := range node.Spec.Taints {
865+
if taint.Key == taintKey {
866+
return true
867+
}
868+
}
869+
return false
870+
}
871+
863872
func isNodeStillStarting(node *apiv1.Node) bool {
864873
for _, condition := range node.Status.Conditions {
865-
if condition.Type == apiv1.NodeReady &&
866-
condition.Status != apiv1.ConditionTrue &&
867-
condition.LastTransitionTime.Time.Sub(node.CreationTimestamp.Time) < MaxStatusSettingDelayAfterCreation {
868-
return true
874+
if condition.Type == apiv1.NodeReady {
875+
notReady := condition.Status != apiv1.ConditionTrue || hasTaint(node, apiv1.TaintNodeNotReady)
876+
if notReady && condition.LastTransitionTime.Time.Sub(node.CreationTimestamp.Time) < MaxStatusSettingDelayAfterCreation {
877+
return true
878+
}
869879
}
870-
if condition.Type == apiv1.NodeDiskPressure &&
871-
condition.Status == apiv1.ConditionTrue &&
872-
condition.LastTransitionTime.Time.Sub(node.CreationTimestamp.Time) < MaxStatusSettingDelayAfterCreation {
873-
return true
880+
if condition.Type == apiv1.NodeDiskPressure {
881+
notReady := condition.Status == apiv1.ConditionTrue || hasTaint(node, apiv1.TaintNodeDiskPressure)
882+
if notReady && condition.LastTransitionTime.Time.Sub(node.CreationTimestamp.Time) < MaxStatusSettingDelayAfterCreation {
883+
return true
884+
}
874885
}
875-
if condition.Type == apiv1.NodeNetworkUnavailable &&
876-
condition.Status == apiv1.ConditionTrue &&
877-
condition.LastTransitionTime.Time.Sub(node.CreationTimestamp.Time) < MaxStatusSettingDelayAfterCreation {
878-
return true
886+
if condition.Type == apiv1.NodeNetworkUnavailable {
887+
notReady := condition.Status == apiv1.ConditionTrue || hasTaint(node, apiv1.TaintNodeNetworkUnavailable)
888+
if notReady && condition.LastTransitionTime.Time.Sub(node.CreationTimestamp.Time) < MaxStatusSettingDelayAfterCreation {
889+
return true
890+
}
879891
}
880892
}
881893
return false

cluster-autoscaler/clusterstate/clusterstate_test.go

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -773,27 +773,46 @@ func TestIsNodeStillStarting(t *testing.T) {
773773
desc string
774774
condition apiv1.NodeConditionType
775775
status apiv1.ConditionStatus
776+
taintKey string
776777
expectedResult bool
777778
}{
778-
{"unready", apiv1.NodeReady, apiv1.ConditionFalse, true},
779-
{"readiness unknown", apiv1.NodeReady, apiv1.ConditionUnknown, true},
780-
{"out of disk", apiv1.NodeDiskPressure, apiv1.ConditionTrue, true},
781-
{"network unavailable", apiv1.NodeNetworkUnavailable, apiv1.ConditionTrue, true},
782-
{"started", apiv1.NodeReady, apiv1.ConditionTrue, false},
779+
{"unready", apiv1.NodeReady, apiv1.ConditionFalse, "", true},
780+
{"readiness unknown", apiv1.NodeReady, apiv1.ConditionUnknown, "", true},
781+
{"out of disk", apiv1.NodeDiskPressure, apiv1.ConditionTrue, "", true},
782+
{"network unavailable", apiv1.NodeNetworkUnavailable, apiv1.ConditionTrue, "", true},
783+
{"started", apiv1.NodeReady, apiv1.ConditionTrue, "", false},
784+
{"unready and unready taint", apiv1.NodeReady, apiv1.ConditionFalse, apiv1.TaintNodeNotReady, true},
785+
{"readiness unknown and unready taint", apiv1.NodeReady, apiv1.ConditionUnknown, apiv1.TaintNodeNotReady, true},
786+
{"disk pressure and disk pressure taint", apiv1.NodeDiskPressure, apiv1.ConditionTrue, apiv1.TaintNodeDiskPressure, true},
787+
{"network unavailable and network unavailable taint", apiv1.NodeNetworkUnavailable, apiv1.ConditionTrue, apiv1.TaintNodeNetworkUnavailable, true},
788+
{"ready but unready taint", apiv1.NodeReady, apiv1.ConditionTrue, apiv1.TaintNodeNotReady, true},
789+
{"no disk pressure but disk pressure taint", apiv1.NodeDiskPressure, apiv1.ConditionFalse, apiv1.TaintNodeDiskPressure, true},
790+
{"network available but network unavailable taint", apiv1.NodeNetworkUnavailable, apiv1.ConditionFalse, apiv1.TaintNodeNetworkUnavailable, true},
783791
}
784-
785-
now := time.Now()
786792
for _, tc := range testCases {
787-
t.Run("recent "+tc.desc, func(t *testing.T) {
793+
createTestNode := func(timeSinceCreation time.Duration) *apiv1.Node {
788794
node := BuildTestNode("n1", 1000, 1000)
789-
node.CreationTimestamp.Time = now
790-
SetNodeCondition(node, tc.condition, tc.status, now.Add(1*time.Minute))
795+
node.CreationTimestamp.Time = time.Time{}
796+
testedTime := node.CreationTimestamp.Time.Add(timeSinceCreation)
797+
798+
SetNodeCondition(node, tc.condition, tc.status, testedTime)
799+
800+
if tc.taintKey != "" {
801+
node.Spec.Taints = []apiv1.Taint{{
802+
Key: tc.taintKey,
803+
Effect: apiv1.TaintEffectNoSchedule,
804+
TimeAdded: &metav1.Time{Time: testedTime},
805+
}}
806+
}
807+
808+
return node
809+
}
810+
t.Run("recent "+tc.desc, func(t *testing.T) {
811+
node := createTestNode(1 * time.Minute)
791812
assert.Equal(t, tc.expectedResult, isNodeStillStarting(node))
792813
})
793814
t.Run("long "+tc.desc, func(t *testing.T) {
794-
node := BuildTestNode("n1", 1000, 1000)
795-
node.CreationTimestamp.Time = now
796-
SetNodeCondition(node, tc.condition, tc.status, now.Add(30*time.Minute))
815+
node := createTestNode(30 * time.Minute)
797816
// No matter what are the node's conditions, stop considering it not started after long enough.
798817
assert.False(t, isNodeStillStarting(node))
799818
})

cluster-autoscaler/utils/kubernetes/ready.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,21 @@ func GetReadinessState(node *apiv1.Node) (isNodeReady bool, lastTransitionTime t
6767
}
6868
}
6969
}
70+
71+
notReadyTaints := map[string]bool{
72+
apiv1.TaintNodeNotReady: true,
73+
apiv1.TaintNodeDiskPressure: true,
74+
apiv1.TaintNodeNetworkUnavailable: true,
75+
}
76+
for _, taint := range node.Spec.Taints {
77+
if notReadyTaints[taint.Key] {
78+
canNodeBeReady = false
79+
if taint.TimeAdded != nil && lastTransitionTime.Before(taint.TimeAdded.Time) {
80+
lastTransitionTime = taint.TimeAdded.Time
81+
}
82+
}
83+
}
84+
7085
if !readyFound {
7186
return false, time.Time{}, fmt.Errorf("readiness information not found")
7287
}

0 commit comments

Comments
 (0)