Skip to content

Commit 6fcb503

Browse files
committed
fix(cluster-autoscaler): prevent panic in SimulateNodeRemoval by handling missing node info
1 parent d9d2e0f commit 6fcb503

File tree

2 files changed

+25
-0
lines changed

2 files changed

+25
-0
lines changed

cluster-autoscaler/simulator/cluster.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@ limitations under the License.
1717
package simulator
1818

1919
import (
20+
"errors"
2021
"fmt"
2122
"time"
2223

2324
apiv1 "k8s.io/api/core/v1"
25+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2426
"k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb"
2527
"k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot"
2628
"k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules"
@@ -91,6 +93,8 @@ const (
9193
BlockedByPod
9294
// UnexpectedError - node can't be removed because of an unexpected error.
9395
UnexpectedError
96+
// NoNodeInfo - node can't be removed because it doesn't have any node info in the cluster snapshot.
97+
NoNodeInfo
9498
)
9599

96100
// RemovalSimulator is a helper object for simulating node removal scenarios.
@@ -151,6 +155,12 @@ func (r *RemovalSimulator) SimulateNodeRemoval(
151155
nodeInfo, err := r.clusterSnapshot.GetNodeInfo(nodeName)
152156
if err != nil {
153157
klog.Errorf("Can't retrieve node %s from snapshot, err: %v", nodeName, err)
158+
unremovableReason := UnexpectedError
159+
if errors.Is(err, clustersnapshot.ErrNodeNotFound) {
160+
unremovableReason = NoNodeInfo
161+
}
162+
unremovableNode := &UnremovableNode{Node: &apiv1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}}, Reason: unremovableReason}
163+
return nil, unremovableNode
154164
}
155165
klog.V(2).Infof("Simulating node %s removal", nodeName)
156166

cluster-autoscaler/simulator/cluster_test.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ func TestFindNodesToRemove(t *testing.T) {
8888
fullNode := BuildTestNode("n4", 1000, 2000000)
8989
fullNodeInfo := framework.NewTestNodeInfo(fullNode)
9090

91+
// node with no info in cluster snapshot
92+
nodeWithoutInfo := &apiv1.Node{ObjectMeta: metav1.ObjectMeta{Name: "n5"}}
93+
9194
SetNodeReadyState(emptyNode, true, time.Time{})
9295
SetNodeReadyState(drainableNode, true, time.Time{})
9396
SetNodeReadyState(nonDrainableNode, true, time.Time{})
@@ -137,6 +140,10 @@ func TestFindNodesToRemove(t *testing.T) {
137140
Node: drainableNode,
138141
PodsToReschedule: []*apiv1.Pod{pod1, pod2},
139142
}
143+
nodeWithoutInfoUnremovable := UnremovableNode{
144+
Node: nodeWithoutInfo,
145+
Reason: NoNodeInfo,
146+
}
140147

141148
clusterSnapshot := testsnapshot.NewTestSnapshotOrDie(t)
142149

@@ -240,6 +247,14 @@ func TestFindNodesToRemove(t *testing.T) {
240247
{Node: topoNode3, Reason: BlockedByPod, BlockingPod: &drain.BlockingPod{Pod: blocker2, Reason: drain.NotReplicated}},
241248
},
242249
},
250+
{
251+
name: "candidate not in clusterSnapshot should be marked unremovable",
252+
candidates: []string{nodeWithoutInfo.Name},
253+
allNodes: []*apiv1.Node{},
254+
pods: []*apiv1.Pod{},
255+
toRemove: nil,
256+
unremovable: []*UnremovableNode{&nodeWithoutInfoUnremovable},
257+
},
243258
}
244259

245260
for _, test := range tests {

0 commit comments

Comments
 (0)