|
| 1 | +/* |
| 2 | +Copyright 2017 The Kubernetes Authors. |
| 3 | +
|
| 4 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +you may not use this file except in compliance with the License. |
| 6 | +You may obtain a copy of the License at |
| 7 | +
|
| 8 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +
|
| 10 | +Unless required by applicable law or agreed to in writing, software |
| 11 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +See the License for the specific language governing permissions and |
| 14 | +limitations under the License. |
| 15 | +*/ |
| 16 | + |
| 17 | +package autoscaling |
| 18 | + |
| 19 | +import ( |
| 20 | + "context" |
| 21 | + "time" |
| 22 | + |
| 23 | + v1 "k8s.io/api/core/v1" |
| 24 | + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
| 25 | + "k8s.io/kubernetes/test/e2e/feature" |
| 26 | + "k8s.io/kubernetes/test/e2e/framework" |
| 27 | + e2eautoscaling "k8s.io/kubernetes/test/e2e/framework/autoscaling" |
| 28 | + e2enode "k8s.io/kubernetes/test/e2e/framework/node" |
| 29 | + e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" |
| 30 | + admissionapi "k8s.io/pod-security-admission/api" |
| 31 | + |
| 32 | + "github.com/onsi/ginkgo/v2" |
| 33 | + "github.com/onsi/gomega/gmeasure" |
| 34 | +) |
| 35 | + |
| 36 | +var _ = SIGDescribe(feature.ClusterSizeAutoscalingScaleUp, framework.WithSlow(), "Autoscaling", func() { |
| 37 | + f := framework.NewDefaultFramework("autoscaling") |
| 38 | + f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged |
| 39 | + var experiment *gmeasure.Experiment |
| 40 | + |
| 41 | + ginkgo.Describe("Autoscaling a service", func() { |
| 42 | + ginkgo.BeforeEach(func(ctx context.Context) { |
| 43 | + // Check if Cloud Autoscaler is enabled by trying to get its ConfigMap. |
| 44 | + _, err := f.ClientSet.CoreV1().ConfigMaps("kube-system").Get(ctx, "cluster-autoscaler-status", metav1.GetOptions{}) |
| 45 | + if err != nil { |
| 46 | + e2eskipper.Skipf("test expects Cluster Autoscaler to be enabled") |
| 47 | + } |
| 48 | + experiment = gmeasure.NewExperiment("Autoscaling a service") |
| 49 | + ginkgo.AddReportEntry(experiment.Name, experiment) |
| 50 | + }) |
| 51 | + |
| 52 | + ginkgo.Context("from 1 pod and 3 nodes to 8 pods and >=4 nodes", func() { |
| 53 | + const nodesNum = 3 // Expect there to be 3 nodes before and after the test. |
| 54 | + |
| 55 | + ginkgo.BeforeEach(func(ctx context.Context) { |
| 56 | + nodes, err := e2enode.GetReadySchedulableNodes(ctx, f.ClientSet) |
| 57 | + framework.ExpectNoError(err) |
| 58 | + nodeCount := len(nodes.Items) |
| 59 | + if nodeCount != nodesNum { |
| 60 | + e2eskipper.Skipf("test expects %d schedulable nodes, found %d", nodesNum, nodeCount) |
| 61 | + } |
| 62 | + // As the last deferred cleanup ensure that the state is restored. |
| 63 | + // AfterEach does not allow for this because it runs before other deferred |
| 64 | + // cleanups happen, and they are blocking cluster restoring its initial size. |
| 65 | + ginkgo.DeferCleanup(func(ctx context.Context) { |
| 66 | + ginkgo.By("Waiting for scale down after test") |
| 67 | + framework.ExpectNoError(e2enode.WaitForReadyNodes(ctx, f.ClientSet, nodeCount, 15*time.Minute)) |
| 68 | + }) |
| 69 | + }) |
| 70 | + |
| 71 | + ginkgo.It("takes less than 15 minutes", func(ctx context.Context) { |
| 72 | + // Measured over multiple samples, scaling takes 10 +/- 2 minutes, so 15 minutes should be fully sufficient. |
| 73 | + const timeToWait = 15 * time.Minute |
| 74 | + |
| 75 | + // Calculate the CPU request of the service. |
| 76 | + // This test expects that 8 pods will not fit in 'nodesNum' nodes, but will fit in >='nodesNum'+1 nodes. |
| 77 | + // Make it so that 'nodesNum' pods fit perfectly per node. |
| 78 | + nodes, err := e2enode.GetReadySchedulableNodes(ctx, f.ClientSet) |
| 79 | + framework.ExpectNoError(err) |
| 80 | + nodeCpus := nodes.Items[0].Status.Allocatable[v1.ResourceCPU] |
| 81 | + nodeCPUMillis := (&nodeCpus).MilliValue() |
| 82 | + cpuRequestMillis := int64(nodeCPUMillis / nodesNum) |
| 83 | + |
| 84 | + // Start the service we want to scale and wait for it to be up and running. |
| 85 | + nodeMemoryBytes := nodes.Items[0].Status.Allocatable[v1.ResourceMemory] |
| 86 | + nodeMemoryMB := (&nodeMemoryBytes).Value() / 1024 / 1024 |
| 87 | + memRequestMB := nodeMemoryMB / 10 // Ensure each pod takes not more than 10% of node's allocatable memory. |
| 88 | + replicas := 1 |
| 89 | + resourceConsumer := e2eautoscaling.NewDynamicResourceConsumer(ctx, "resource-consumer", f.Namespace.Name, e2eautoscaling.KindDeployment, replicas, 0, 0, 0, cpuRequestMillis, memRequestMB, f.ClientSet, f.ScalesGetter, e2eautoscaling.Disable, e2eautoscaling.Idle) |
| 90 | + ginkgo.DeferCleanup(resourceConsumer.CleanUp) |
| 91 | + resourceConsumer.WaitForReplicas(ctx, replicas, 1*time.Minute) // Should finish ~immediately, so 1 minute is more than enough. |
| 92 | + |
| 93 | + // Enable Horizontal Pod Autoscaler with 50% target utilization and |
| 94 | + // scale up the CPU usage to trigger autoscaling to 8 pods for target to be satisfied. |
| 95 | + targetCPUUtilizationPercent := int32(50) |
| 96 | + hpa := e2eautoscaling.CreateCPUResourceHorizontalPodAutoscaler(ctx, resourceConsumer, targetCPUUtilizationPercent, 1, 10) |
| 97 | + ginkgo.DeferCleanup(e2eautoscaling.DeleteHorizontalPodAutoscaler, resourceConsumer, hpa.Name) |
| 98 | + cpuLoad := 8 * cpuRequestMillis * int64(targetCPUUtilizationPercent) / 100 // 8 pods utilized to the target level |
| 99 | + resourceConsumer.ConsumeCPU(int(cpuLoad)) |
| 100 | + |
| 101 | + // Measure the time it takes for the service to scale to 8 pods with 50% CPU utilization each. |
| 102 | + experiment.SampleDuration("total scale-up time", func(idx int) { |
| 103 | + resourceConsumer.WaitForReplicas(ctx, 8, timeToWait) |
| 104 | + }, gmeasure.SamplingConfig{N: 1}) |
| 105 | + }) // Increase to run the test more than once. |
| 106 | + }) |
| 107 | + }) |
| 108 | +}) |
0 commit comments