Skip to content

Commit 8383850

Browse files
authored
Merge pull request kubernetes#81416 from krzysied/node_controller_cleanup
Removing unnecessary code from node lifecycle controller
2 parents 53f4e0f + 6842e11 commit 8383850

File tree

2 files changed

+235
-45
lines changed

2 files changed

+235
-45
lines changed

pkg/controller/nodelifecycle/node_lifecycle_controller.go

Lines changed: 22 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -595,13 +595,14 @@ func (nc *Controller) doNoExecuteTaintingPass() {
595595
// Because we want to mimic NodeStatus.Condition["Ready"] we make "unreachable" and "not ready" taints mutually exclusive.
596596
taintToAdd := v1.Taint{}
597597
oppositeTaint := v1.Taint{}
598-
if condition.Status == v1.ConditionFalse {
598+
switch condition.Status {
599+
case v1.ConditionFalse:
599600
taintToAdd = *NotReadyTaintTemplate
600601
oppositeTaint = *UnreachableTaintTemplate
601-
} else if condition.Status == v1.ConditionUnknown {
602+
case v1.ConditionUnknown:
602603
taintToAdd = *UnreachableTaintTemplate
603604
oppositeTaint = *NotReadyTaintTemplate
604-
} else {
605+
default:
605606
// It seems that the Node is ready again, so there's no need to taint it.
606607
klog.V(4).Infof("Node %v was in a taint queue, but it's ready now. Ignoring taint request.", value.Value)
607608
return true, 0
@@ -720,7 +721,8 @@ func (nc *Controller) monitorNodeHealth() error {
720721
decisionTimestamp := nc.now()
721722
if currentReadyCondition != nil {
722723
// Check eviction timeout against decisionTimestamp
723-
if observedReadyCondition.Status == v1.ConditionFalse {
724+
switch observedReadyCondition.Status {
725+
case v1.ConditionFalse:
724726
if nc.useTaintBasedEvictions {
725727
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
726728
if taintutils.TaintExists(node.Spec.Taints, UnreachableTaintTemplate) {
@@ -746,8 +748,7 @@ func (nc *Controller) monitorNodeHealth() error {
746748
}
747749
}
748750
}
749-
}
750-
if observedReadyCondition.Status == v1.ConditionUnknown {
751+
case v1.ConditionUnknown:
751752
if nc.useTaintBasedEvictions {
752753
// We want to update the taint straight away if Node is already tainted with the UnreachableTaint
753754
if taintutils.TaintExists(node.Spec.Taints, NotReadyTaintTemplate) {
@@ -773,8 +774,7 @@ func (nc *Controller) monitorNodeHealth() error {
773774
}
774775
}
775776
}
776-
}
777-
if observedReadyCondition.Status == v1.ConditionTrue {
777+
case v1.ConditionTrue:
778778
if nc.useTaintBasedEvictions {
779779
removed, err := nc.markNodeAsReachable(node)
780780
if err != nil {
@@ -807,7 +807,6 @@ func (nc *Controller) monitorNodeHealth() error {
807807
// tryUpdateNodeHealth checks a given node's conditions and tries to update it. Returns grace period to
808808
// which given node is entitled, state of current and last observed Ready Condition, and an error if it occurred.
809809
func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.NodeCondition, *v1.NodeCondition, error) {
810-
var err error
811810
var gracePeriod time.Duration
812811
var observedReadyCondition v1.NodeCondition
813812
_, currentReadyCondition := nodeutil.GetNodeCondition(&node.Status, v1.NodeReady)
@@ -836,7 +835,6 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
836835
observedReadyCondition = *currentReadyCondition
837836
gracePeriod = nc.nodeMonitorGracePeriod
838837
}
839-
840838
savedNodeHealth, found := nc.nodeHealthMap[node.Name]
841839
// There are following cases to check:
842840
// - both saved and new status have no Ready Condition set - we leave everything as it is,
@@ -858,35 +856,35 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
858856
_, savedCondition = nodeutil.GetNodeCondition(savedNodeHealth.status, v1.NodeReady)
859857
savedLease = savedNodeHealth.lease
860858
}
861-
_, observedCondition := nodeutil.GetNodeCondition(&node.Status, v1.NodeReady)
859+
862860
if !found {
863861
klog.Warningf("Missing timestamp for Node %s. Assuming now as a timestamp.", node.Name)
864862
savedNodeHealth = &nodeHealthData{
865863
status: &node.Status,
866864
probeTimestamp: nc.now(),
867865
readyTransitionTimestamp: nc.now(),
868866
}
869-
} else if savedCondition == nil && observedCondition != nil {
867+
} else if savedCondition == nil && currentReadyCondition != nil {
870868
klog.V(1).Infof("Creating timestamp entry for newly observed Node %s", node.Name)
871869
savedNodeHealth = &nodeHealthData{
872870
status: &node.Status,
873871
probeTimestamp: nc.now(),
874872
readyTransitionTimestamp: nc.now(),
875873
}
876-
} else if savedCondition != nil && observedCondition == nil {
874+
} else if savedCondition != nil && currentReadyCondition == nil {
877875
klog.Errorf("ReadyCondition was removed from Status of Node %s", node.Name)
878876
// TODO: figure out what to do in this case. For now we do the same thing as above.
879877
savedNodeHealth = &nodeHealthData{
880878
status: &node.Status,
881879
probeTimestamp: nc.now(),
882880
readyTransitionTimestamp: nc.now(),
883881
}
884-
} else if savedCondition != nil && observedCondition != nil && savedCondition.LastHeartbeatTime != observedCondition.LastHeartbeatTime {
882+
} else if savedCondition != nil && currentReadyCondition != nil && savedCondition.LastHeartbeatTime != currentReadyCondition.LastHeartbeatTime {
885883
var transitionTime metav1.Time
886884
// If ReadyCondition changed since the last time we checked, we update the transition timestamp to "now",
887885
// otherwise we leave it as it is.
888-
if savedCondition.LastTransitionTime != observedCondition.LastTransitionTime {
889-
klog.V(3).Infof("ReadyCondition for Node %s transitioned from %v to %v", node.Name, savedCondition, observedCondition)
886+
if savedCondition.LastTransitionTime != currentReadyCondition.LastTransitionTime {
887+
klog.V(3).Infof("ReadyCondition for Node %s transitioned from %v to %v", node.Name, savedCondition, currentReadyCondition)
890888
transitionTime = nc.now()
891889
} else {
892890
transitionTime = savedNodeHealth.readyTransitionTimestamp
@@ -919,31 +917,9 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
919917
if nc.now().After(savedNodeHealth.probeTimestamp.Add(gracePeriod)) {
920918
// NodeReady condition or lease was last set longer ago than gracePeriod, so
921919
// update it to Unknown (regardless of its current value) in the master.
922-
if currentReadyCondition == nil {
923-
klog.V(2).Infof("node %v is never updated by kubelet", node.Name)
924-
node.Status.Conditions = append(node.Status.Conditions, v1.NodeCondition{
925-
Type: v1.NodeReady,
926-
Status: v1.ConditionUnknown,
927-
Reason: "NodeStatusNeverUpdated",
928-
Message: fmt.Sprintf("Kubelet never posted node status."),
929-
LastHeartbeatTime: node.CreationTimestamp,
930-
LastTransitionTime: nc.now(),
931-
})
932-
} else {
933-
klog.V(4).Infof("node %v hasn't been updated for %+v. Last ready condition is: %+v",
934-
node.Name, nc.now().Time.Sub(savedNodeHealth.probeTimestamp.Time), observedReadyCondition)
935-
if observedReadyCondition.Status != v1.ConditionUnknown {
936-
currentReadyCondition.Status = v1.ConditionUnknown
937-
currentReadyCondition.Reason = "NodeStatusUnknown"
938-
currentReadyCondition.Message = "Kubelet stopped posting node status."
939-
// LastProbeTime is the last time we heard from kubelet.
940-
currentReadyCondition.LastHeartbeatTime = observedReadyCondition.LastHeartbeatTime
941-
currentReadyCondition.LastTransitionTime = nc.now()
942-
}
943-
}
944920

945-
// remaining node conditions should also be set to Unknown
946-
remainingNodeConditionTypes := []v1.NodeConditionType{
921+
nodeConditionTypes := []v1.NodeConditionType{
922+
v1.NodeReady,
947923
v1.NodeMemoryPressure,
948924
v1.NodeDiskPressure,
949925
v1.NodePIDPressure,
@@ -952,7 +928,7 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
952928
}
953929

954930
nowTimestamp := nc.now()
955-
for _, nodeConditionType := range remainingNodeConditionTypes {
931+
for _, nodeConditionType := range nodeConditionTypes {
956932
_, currentCondition := nodeutil.GetNodeCondition(&node.Status, nodeConditionType)
957933
if currentCondition == nil {
958934
klog.V(2).Infof("Condition %v of node %v was never updated by kubelet", nodeConditionType, node.Name)
@@ -975,10 +951,11 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
975951
}
976952
}
977953
}
954+
// We need to update currentReadyCondition due to its value potentially changed.
955+
_, currentReadyCondition = nodeutil.GetNodeCondition(&node.Status, v1.NodeReady)
978956

979-
_, currentCondition := nodeutil.GetNodeCondition(&node.Status, v1.NodeReady)
980-
if !apiequality.Semantic.DeepEqual(currentCondition, &observedReadyCondition) {
981-
if _, err = nc.kubeClient.CoreV1().Nodes().UpdateStatus(node); err != nil {
957+
if !apiequality.Semantic.DeepEqual(currentReadyCondition, &observedReadyCondition) {
958+
if _, err := nc.kubeClient.CoreV1().Nodes().UpdateStatus(node); err != nil {
982959
klog.Errorf("Error updating node %s: %v", node.Name, err)
983960
return gracePeriod, observedReadyCondition, currentReadyCondition, err
984961
}
@@ -992,7 +969,7 @@ func (nc *Controller) tryUpdateNodeHealth(node *v1.Node) (time.Duration, v1.Node
992969
}
993970
}
994971

995-
return gracePeriod, observedReadyCondition, currentReadyCondition, err
972+
return gracePeriod, observedReadyCondition, currentReadyCondition, nil
996973
}
997974

998975
func (nc *Controller) handleDisruption(zoneToNodeConditions map[string][]*v1.NodeCondition, nodes []*v1.Node) {

pkg/controller/nodelifecycle/node_lifecycle_controller_test.go

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3057,3 +3057,216 @@ func TestReconcileNodeLabels(t *testing.T) {
30573057
}
30583058
}
30593059
}
3060+
3061+
func TestTryUpdateNodeHealth(t *testing.T) {
3062+
fakeNow := metav1.Date(2017, 1, 1, 12, 0, 0, 0, time.UTC)
3063+
fakeOld := metav1.Date(2016, 1, 1, 12, 0, 0, 0, time.UTC)
3064+
evictionTimeout := 10 * time.Minute
3065+
3066+
fakeNodeHandler := &testutil.FakeNodeHandler{
3067+
Existing: []*v1.Node{
3068+
{
3069+
ObjectMeta: metav1.ObjectMeta{
3070+
Name: "node0",
3071+
CreationTimestamp: fakeNow,
3072+
},
3073+
Status: v1.NodeStatus{
3074+
Conditions: []v1.NodeCondition{
3075+
{
3076+
Type: v1.NodeReady,
3077+
Status: v1.ConditionTrue,
3078+
LastHeartbeatTime: fakeNow,
3079+
LastTransitionTime: fakeNow,
3080+
},
3081+
},
3082+
},
3083+
},
3084+
},
3085+
Clientset: fake.NewSimpleClientset(&v1.PodList{Items: []v1.Pod{*testutil.NewPod("pod0", "node0")}}),
3086+
}
3087+
3088+
nodeController, _ := newNodeLifecycleControllerFromClient(
3089+
fakeNodeHandler,
3090+
evictionTimeout,
3091+
testRateLimiterQPS,
3092+
testRateLimiterQPS,
3093+
testLargeClusterThreshold,
3094+
testUnhealthyThreshold,
3095+
testNodeMonitorGracePeriod,
3096+
testNodeStartupGracePeriod,
3097+
testNodeMonitorPeriod,
3098+
true)
3099+
nodeController.now = func() metav1.Time { return fakeNow }
3100+
nodeController.recorder = testutil.NewFakeRecorder()
3101+
3102+
getStatus := func(cond *v1.NodeCondition) *v1.ConditionStatus {
3103+
if cond == nil {
3104+
return nil
3105+
}
3106+
return &cond.Status
3107+
}
3108+
3109+
tests := []struct {
3110+
name string
3111+
node *v1.Node
3112+
}{
3113+
{
3114+
name: "Status true",
3115+
node: &v1.Node{
3116+
ObjectMeta: metav1.ObjectMeta{
3117+
Name: "node0",
3118+
CreationTimestamp: fakeNow,
3119+
},
3120+
Status: v1.NodeStatus{
3121+
Conditions: []v1.NodeCondition{
3122+
{
3123+
Type: v1.NodeReady,
3124+
Status: v1.ConditionTrue,
3125+
LastHeartbeatTime: fakeNow,
3126+
LastTransitionTime: fakeNow,
3127+
},
3128+
},
3129+
},
3130+
},
3131+
},
3132+
{
3133+
name: "Status false",
3134+
node: &v1.Node{
3135+
ObjectMeta: metav1.ObjectMeta{
3136+
Name: "node0",
3137+
CreationTimestamp: fakeNow,
3138+
},
3139+
Status: v1.NodeStatus{
3140+
Conditions: []v1.NodeCondition{
3141+
{
3142+
Type: v1.NodeReady,
3143+
Status: v1.ConditionFalse,
3144+
LastHeartbeatTime: fakeNow,
3145+
LastTransitionTime: fakeNow,
3146+
},
3147+
},
3148+
},
3149+
},
3150+
},
3151+
{
3152+
name: "Status unknown",
3153+
node: &v1.Node{
3154+
ObjectMeta: metav1.ObjectMeta{
3155+
Name: "node0",
3156+
CreationTimestamp: fakeNow,
3157+
},
3158+
Status: v1.NodeStatus{
3159+
Conditions: []v1.NodeCondition{
3160+
{
3161+
Type: v1.NodeReady,
3162+
Status: v1.ConditionUnknown,
3163+
LastHeartbeatTime: fakeNow,
3164+
LastTransitionTime: fakeNow,
3165+
},
3166+
},
3167+
},
3168+
},
3169+
},
3170+
{
3171+
name: "Status nil",
3172+
node: &v1.Node{
3173+
ObjectMeta: metav1.ObjectMeta{
3174+
Name: "node0",
3175+
CreationTimestamp: fakeNow,
3176+
},
3177+
Status: v1.NodeStatus{
3178+
Conditions: []v1.NodeCondition{},
3179+
},
3180+
},
3181+
},
3182+
{
3183+
name: "Status true - after grace period",
3184+
node: &v1.Node{
3185+
ObjectMeta: metav1.ObjectMeta{
3186+
Name: "node0",
3187+
CreationTimestamp: fakeOld,
3188+
},
3189+
Status: v1.NodeStatus{
3190+
Conditions: []v1.NodeCondition{
3191+
{
3192+
Type: v1.NodeReady,
3193+
Status: v1.ConditionTrue,
3194+
LastHeartbeatTime: fakeOld,
3195+
LastTransitionTime: fakeOld,
3196+
},
3197+
},
3198+
},
3199+
},
3200+
},
3201+
{
3202+
name: "Status false - after grace period",
3203+
node: &v1.Node{
3204+
ObjectMeta: metav1.ObjectMeta{
3205+
Name: "node0",
3206+
CreationTimestamp: fakeOld,
3207+
},
3208+
Status: v1.NodeStatus{
3209+
Conditions: []v1.NodeCondition{
3210+
{
3211+
Type: v1.NodeReady,
3212+
Status: v1.ConditionFalse,
3213+
LastHeartbeatTime: fakeOld,
3214+
LastTransitionTime: fakeOld,
3215+
},
3216+
},
3217+
},
3218+
},
3219+
},
3220+
{
3221+
name: "Status unknown - after grace period",
3222+
node: &v1.Node{
3223+
ObjectMeta: metav1.ObjectMeta{
3224+
Name: "node0",
3225+
CreationTimestamp: fakeOld,
3226+
},
3227+
Status: v1.NodeStatus{
3228+
Conditions: []v1.NodeCondition{
3229+
{
3230+
Type: v1.NodeReady,
3231+
Status: v1.ConditionUnknown,
3232+
LastHeartbeatTime: fakeOld,
3233+
LastTransitionTime: fakeOld,
3234+
},
3235+
},
3236+
},
3237+
},
3238+
},
3239+
{
3240+
name: "Status nil - after grace period",
3241+
node: &v1.Node{
3242+
ObjectMeta: metav1.ObjectMeta{
3243+
Name: "node0",
3244+
CreationTimestamp: fakeOld,
3245+
},
3246+
Status: v1.NodeStatus{
3247+
Conditions: []v1.NodeCondition{},
3248+
},
3249+
},
3250+
},
3251+
}
3252+
3253+
for _, test := range tests {
3254+
t.Run(test.name, func(t *testing.T) {
3255+
nodeController.nodeHealthMap[test.node.Name] = &nodeHealthData{
3256+
status: &test.node.Status,
3257+
probeTimestamp: test.node.CreationTimestamp,
3258+
readyTransitionTimestamp: test.node.CreationTimestamp,
3259+
}
3260+
_, _, currentReadyCondition, err := nodeController.tryUpdateNodeHealth(test.node)
3261+
if err != nil {
3262+
t.Fatalf("unexpected error: %v", err)
3263+
}
3264+
_, savedReadyCondition := nodeutil.GetNodeCondition(nodeController.nodeHealthMap[test.node.Name].status, v1.NodeReady)
3265+
savedStatus := getStatus(savedReadyCondition)
3266+
currentStatus := getStatus(currentReadyCondition)
3267+
if currentStatus != savedStatus {
3268+
t.Errorf("expected %v, got %v", savedStatus, currentStatus)
3269+
}
3270+
})
3271+
}
3272+
}

0 commit comments

Comments
 (0)