Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions cluster-autoscaler/core/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,17 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
)

// isVirtualNode determines if the node is created by virtual kubelet
func isVirtualNode(node *apiv1.Node) bool {
return node.ObjectMeta.Labels["type"] == "virtual-kubelet"
const (
// VirtualKubeletNodeLabelValue is the value of the label that is set on virtual kubelet
VirtualKubeletNodeLabelValue = "virtual-kubelet"
)

// isVirtualKubeletNode determines if the node is created by virtual kubelet
func isVirtualKubeletNode(node *apiv1.Node) bool {
if node == nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return node != nil && node.ObjectMeta.Labels["type"] == VirtualKubeletNodeLabelValue is more concise

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion! I agree the one-liner is more concise, but I opted for the explicit if node == nil form to improve readability and maintainability.

It's quite common to use explicit nil checks, especially when dealing with nested struct access, as it allows easier debugging and extension in the future (e.g., adding logs or metrics inside the nil branch).

Happy to change if we prefer the concise form globally.

return false
}
return node.ObjectMeta.Labels["type"] == VirtualKubeletNodeLabelValue
}

// FilterOutNodesFromNotAutoscaledGroups return subset of input nodes for which cloud provider does not
Expand All @@ -40,7 +48,7 @@ func FilterOutNodesFromNotAutoscaledGroups(nodes []*apiv1.Node, cloudProvider cl

for _, node := range nodes {
// Exclude the virtual node here since it may have lots of resource and exceed the total resource limit
if isVirtualNode(node) {
if isVirtualKubeletNode(node) {
continue
}
nodeGroup, err := cloudProvider.NodeGroupForNode(node)
Expand Down
213 changes: 213 additions & 0 deletions cluster-autoscaler/core/utils/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package utils

import (
"errors"
"testing"
"time"

Expand All @@ -25,6 +26,8 @@ import (
"github.com/stretchr/testify/assert"
apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
mock_cloudprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/mocks"
caerrors "k8s.io/autoscaler/cluster-autoscaler/utils/errors"
)

func TestGetNodeResource(t *testing.T) {
Expand Down Expand Up @@ -75,6 +78,12 @@ func TestGetNodeCoresAndMemory(t *testing.T) {
cores, memory = GetNodeCoresAndMemory(nodeWithMissingCapacity)
assert.Equal(t, int64(0), cores)
assert.Equal(t, int64(0), memory)

// if we have negative capacity defined in capacity we expect getNodeCoresAndMemory to return 0s
nodeWithNegativeCapacity := BuildTestNode("n1", -1000, -2*MiB)
cores, memory = GetNodeCoresAndMemory(nodeWithNegativeCapacity)
assert.Equal(t, int64(0), cores)
assert.Equal(t, int64(0), memory)
}

func TestGetOldestPod(t *testing.T) {
Expand All @@ -88,3 +97,207 @@ func TestGetOldestPod(t *testing.T) {
assert.Equal(t, p1.CreationTimestamp.Time, GetOldestCreateTime([]*apiv1.Pod{p1, p2, p3}))
assert.Equal(t, p1.CreationTimestamp.Time, GetOldestCreateTime([]*apiv1.Pod{p3, p2, p1}))
}

func TestIsVirtualNode(t *testing.T) {
type args struct {
node *apiv1.Node
}
tests := []struct {
name string
args args
want bool
}{
{
name: "returns false when node is nil",
args: args{
node: nil,
},
want: false,
},
{
name: "returns false when node does not have virtual-kubelet label",
args: args{
node: &apiv1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"type": "not-virtual-kubelet",
},
},
},
},
want: false,
},
{
name: "returns true when node has virtual-kubelet label",
args: args{
node: &apiv1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"type": VirtualKubeletNodeLabelValue,
},
},
},
},
want: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equalf(t, tt.want, isVirtualKubeletNode(tt.args.node), "isVirtualNode(%v)", tt.args.node)
})
}
}

func TestFilterOutNodesFromNotAutoscaledGroups(t *testing.T) {
nodeInGroup := BuildTestNode("node-in-group", 1000, 2*MiB)
nodeNotInGroup := BuildTestNode("node-not-in-group", 1000, 2*MiB)
nodeError := BuildTestNode("node-error", 1000, 2*MiB)
nodeVirtual := BuildTestNode("node-virtual", 1000, 2*MiB)
nodeVirtual.Labels["type"] = VirtualKubeletNodeLabelValue

mockProvider := &mock_cloudprovider.CloudProvider{}
mockProvider.On("NodeGroupForNode", nodeInGroup).Return(new(mock_cloudprovider.NodeGroup), nil)
mockProvider.On("NodeGroupForNode", nodeNotInGroup).Return(nil, nil)
mockProvider.On("NodeGroupForNode", nodeError).Return(nil, errors.New("some error"))

t.Run("ignores virtual nodes", func(t *testing.T) {
result, err := FilterOutNodesFromNotAutoscaledGroups([]*apiv1.Node{nodeVirtual}, mockProvider)
assert.NoError(t, err)
assert.Empty(t, result)

mockProvider.AssertNotCalled(t, "NodeGroupForNode", nodeVirtual)
})

t.Run("returns error if cloud provider fails", func(t *testing.T) {
result, err := FilterOutNodesFromNotAutoscaledGroups([]*apiv1.Node{nodeError}, mockProvider)
assert.Error(t, err)
assert.Equal(t, caerrors.CloudProviderError, err.Type())
assert.Empty(t, result)
})

t.Run("filters out nodes in autoscaled groups", func(t *testing.T) {
result, err := FilterOutNodesFromNotAutoscaledGroups([]*apiv1.Node{nodeInGroup, nodeNotInGroup}, mockProvider)
assert.NoError(t, err)
assert.Len(t, result, 1)
assert.Equal(t, "node-not-in-group", result[0].Name)
})

mockProvider.AssertExpectations(t)
}

func TestHasHardInterPodAffinity(t *testing.T) {
type args struct {
affinity *apiv1.Affinity
}
tests := []struct {
name string
args args
want bool
}{
{
name: "returns false when affinity is nil",
args: args{
affinity: nil,
},
want: false,
},
{
name: "returns false when affinity is empty",
args: args{
affinity: &apiv1.Affinity{},
},
want: false,
},
{
name: "returns true when PodAffinity has RequiredDuringScheduling terms",
args: args{
affinity: &apiv1.Affinity{
PodAffinity: &apiv1.PodAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []apiv1.PodAffinityTerm{
{LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "test"}}},
},
},
},
},
want: true,
},
{
name: "returns false when PodAffinity has empty RequiredDuringScheduling",
args: args{
affinity: &apiv1.Affinity{
PodAffinity: &apiv1.PodAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []apiv1.WeightedPodAffinityTerm{
{Weight: 10, PodAffinityTerm: apiv1.PodAffinityTerm{}},
},
},
},
},
want: false,
},
{
name: "returns true when PodAntiAffinity has RequiredDuringScheduling terms",
args: args{
affinity: &apiv1.Affinity{
PodAntiAffinity: &apiv1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []apiv1.PodAffinityTerm{
{LabelSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": "test"}}},
},
},
},
},
want: true,
},
{
name: "returns false when PodAntiAffinity has empty RequiredDuringScheduling",
args: args{
affinity: &apiv1.Affinity{
PodAntiAffinity: &apiv1.PodAntiAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []apiv1.WeightedPodAffinityTerm{
{Weight: 10, PodAffinityTerm: apiv1.PodAffinityTerm{}},
},
},
},
},
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equalf(t, tt.want, hasHardInterPodAffinity(tt.args.affinity), "hasHardInterPodAffinity(%v)", tt.args.affinity)
})
}
}

func TestGetOldestCreateTimeWithGpu(t *testing.T) {
fixedTime := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
p1 := BuildTestPod("p1", 500, 1000)
p1.CreationTimestamp = metav1.NewTime(fixedTime.Add(-1 * time.Minute))
p2 := BuildTestPod("p2", 500, 1000)
p2.CreationTimestamp = metav1.NewTime(fixedTime.Add(-2 * time.Minute))
RequestGpuForPod(p2, 1)
p3 := BuildTestPod("p3", 500, 1000)
p3.CreationTimestamp = metav1.NewTime(fixedTime.Add(-3 * time.Minute))
RequestGpuForPod(p3, 1)

t.Run("returns false when no pods", func(t *testing.T) {
found, _ := GetOldestCreateTimeWithGpu([]*apiv1.Pod{})
assert.False(t, found)
})

t.Run("returns false when no GPU pods", func(t *testing.T) {
found, _ := GetOldestCreateTimeWithGpu([]*apiv1.Pod{p1})
assert.False(t, found)
})

t.Run("returns creation time when single GPU pod", func(t *testing.T) {
found, ts := GetOldestCreateTimeWithGpu([]*apiv1.Pod{p2})
assert.True(t, found)
assert.Equal(t, p2.CreationTimestamp.Time, ts)
})

t.Run("returns oldest time among multiple GPU pods", func(t *testing.T) {
found, ts := GetOldestCreateTimeWithGpu([]*apiv1.Pod{p1, p2, p3})
assert.True(t, found)
assert.Equal(t, p3.CreationTimestamp.Time, ts)
})
}
Loading