Skip to content

Commit 9dbdfe7

Browse files
committed
Remove provider specific operations from the autoscaling tests and reintroduce the tests.
Note that these tests will take now more time to run as they are relying on the scale up and scale down to prepare the test case and restore the cluster state. Remove all the gke and gce specific tests including: - GPUs - volumes (no way to provision volumes without provider specific infrastructure) - scale up/down from/to 0 - tests checking what happens after breaking nodes (no way to simulate temporary network failure without provider assumptions) Remove the scalability tests that were not run and unmaintained. Update the autoscaler version that is used by the tests. Update the autoscaler status parsing logic for the tests to pass with newer version of autoscaler.
1 parent cf480a3 commit 9dbdfe7

File tree

14 files changed

+3363
-24
lines changed

14 files changed

+3363
-24
lines changed

cluster/gce/manifests/cluster-autoscaler.manifest

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
"containers": [
2323
{
2424
"name": "cluster-autoscaler",
25-
"image": "registry.k8s.io/autoscaling/cluster-autoscaler:v1.26.1",
25+
"image": "registry.k8s.io/autoscaling/cluster-autoscaler:v1.31.1",
2626
"livenessProbe": {
2727
"httpGet": {
2828
"path": "/health-check",
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/*
2+
Copyright 2017 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package autoscaling
18+
19+
import (
20+
"context"
21+
"time"
22+
23+
v1 "k8s.io/api/core/v1"
24+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
25+
"k8s.io/kubernetes/test/e2e/feature"
26+
"k8s.io/kubernetes/test/e2e/framework"
27+
e2eautoscaling "k8s.io/kubernetes/test/e2e/framework/autoscaling"
28+
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
29+
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
30+
admissionapi "k8s.io/pod-security-admission/api"
31+
32+
"github.com/onsi/ginkgo/v2"
33+
"github.com/onsi/gomega/gmeasure"
34+
)
35+
36+
var _ = SIGDescribe(feature.ClusterSizeAutoscalingScaleUp, framework.WithSlow(), "Autoscaling", func() {
37+
f := framework.NewDefaultFramework("autoscaling")
38+
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
39+
var experiment *gmeasure.Experiment
40+
41+
ginkgo.Describe("Autoscaling a service", func() {
42+
ginkgo.BeforeEach(func(ctx context.Context) {
43+
// Check if Cloud Autoscaler is enabled by trying to get its ConfigMap.
44+
_, err := f.ClientSet.CoreV1().ConfigMaps("kube-system").Get(ctx, "cluster-autoscaler-status", metav1.GetOptions{})
45+
if err != nil {
46+
e2eskipper.Skipf("test expects Cluster Autoscaler to be enabled")
47+
}
48+
experiment = gmeasure.NewExperiment("Autoscaling a service")
49+
ginkgo.AddReportEntry(experiment.Name, experiment)
50+
})
51+
52+
ginkgo.Context("from 1 pod and 3 nodes to 8 pods and >=4 nodes", func() {
53+
const nodesNum = 3 // Expect there to be 3 nodes before and after the test.
54+
55+
ginkgo.BeforeEach(func(ctx context.Context) {
56+
nodes, err := e2enode.GetReadySchedulableNodes(ctx, f.ClientSet)
57+
framework.ExpectNoError(err)
58+
nodeCount := len(nodes.Items)
59+
if nodeCount != nodesNum {
60+
e2eskipper.Skipf("test expects %d schedulable nodes, found %d", nodesNum, nodeCount)
61+
}
62+
// As the last deferred cleanup ensure that the state is restored.
63+
// AfterEach does not allow for this because it runs before other deferred
64+
// cleanups happen, and they are blocking cluster restoring its initial size.
65+
ginkgo.DeferCleanup(func(ctx context.Context) {
66+
ginkgo.By("Waiting for scale down after test")
67+
framework.ExpectNoError(e2enode.WaitForReadyNodes(ctx, f.ClientSet, nodeCount, 15*time.Minute))
68+
})
69+
})
70+
71+
ginkgo.It("takes less than 15 minutes", func(ctx context.Context) {
72+
// Measured over multiple samples, scaling takes 10 +/- 2 minutes, so 15 minutes should be fully sufficient.
73+
const timeToWait = 15 * time.Minute
74+
75+
// Calculate the CPU request of the service.
76+
// This test expects that 8 pods will not fit in 'nodesNum' nodes, but will fit in >='nodesNum'+1 nodes.
77+
// Make it so that 'nodesNum' pods fit perfectly per node.
78+
nodes, err := e2enode.GetReadySchedulableNodes(ctx, f.ClientSet)
79+
framework.ExpectNoError(err)
80+
nodeCpus := nodes.Items[0].Status.Allocatable[v1.ResourceCPU]
81+
nodeCPUMillis := (&nodeCpus).MilliValue()
82+
cpuRequestMillis := int64(nodeCPUMillis / nodesNum)
83+
84+
// Start the service we want to scale and wait for it to be up and running.
85+
nodeMemoryBytes := nodes.Items[0].Status.Allocatable[v1.ResourceMemory]
86+
nodeMemoryMB := (&nodeMemoryBytes).Value() / 1024 / 1024
87+
memRequestMB := nodeMemoryMB / 10 // Ensure each pod takes not more than 10% of node's allocatable memory.
88+
replicas := 1
89+
resourceConsumer := e2eautoscaling.NewDynamicResourceConsumer(ctx, "resource-consumer", f.Namespace.Name, e2eautoscaling.KindDeployment, replicas, 0, 0, 0, cpuRequestMillis, memRequestMB, f.ClientSet, f.ScalesGetter, e2eautoscaling.Disable, e2eautoscaling.Idle)
90+
ginkgo.DeferCleanup(resourceConsumer.CleanUp)
91+
resourceConsumer.WaitForReplicas(ctx, replicas, 1*time.Minute) // Should finish ~immediately, so 1 minute is more than enough.
92+
93+
// Enable Horizontal Pod Autoscaler with 50% target utilization and
94+
// scale up the CPU usage to trigger autoscaling to 8 pods for target to be satisfied.
95+
targetCPUUtilizationPercent := int32(50)
96+
hpa := e2eautoscaling.CreateCPUResourceHorizontalPodAutoscaler(ctx, resourceConsumer, targetCPUUtilizationPercent, 1, 10)
97+
ginkgo.DeferCleanup(e2eautoscaling.DeleteHorizontalPodAutoscaler, resourceConsumer, hpa.Name)
98+
cpuLoad := 8 * cpuRequestMillis * int64(targetCPUUtilizationPercent) / 100 // 8 pods utilized to the target level
99+
resourceConsumer.ConsumeCPU(int(cpuLoad))
100+
101+
// Measure the time it takes for the service to scale to 8 pods with 50% CPU utilization each.
102+
experiment.SampleDuration("total scale-up time", func(idx int) {
103+
resourceConsumer.WaitForReplicas(ctx, 8, timeToWait)
104+
}, gmeasure.SamplingConfig{N: 1})
105+
}) // Increase to run the test more than once.
106+
})
107+
})
108+
})

0 commit comments

Comments
 (0)