From b2fafed3d1777d335dcf3c41798b8f10115a3a67 Mon Sep 17 00:00:00 2001 From: Kam Saiyed Date: Wed, 6 Aug 2025 19:25:30 +0000 Subject: [PATCH 1/5] Allow admission-controller to intercept pods when CPUBoost is set --- .../deploy/vpa-v1-crd-gen.yaml | 73 +++++ .../pkg/admission-controller/main.go | 11 +- .../resource/pod/patch/resource_updates.go | 106 +++++- .../pod/patch/resource_updates_test.go | 307 +++++++++++++++++- .../resource/pod/patch/util.go | 8 + .../resource/vpa/handler.go | 41 +++ .../resource/vpa/matcher.go | 2 +- .../pkg/apis/autoscaling.k8s.io/v1/types.go | 50 +++ .../pkg/features/features.go | 5 + .../pkg/features/versioned_features.go | 3 + .../pkg/utils/annotations/vpa_cpu_boost.go | 71 ++++ .../pkg/utils/test/test_vpa.go | 23 ++ 12 files changed, 692 insertions(+), 8 deletions(-) create mode 100644 vertical-pod-autoscaler/pkg/utils/annotations/vpa_cpu_boost.go diff --git a/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml b/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml index 70adb552bb39..7643d1fed571 100644 --- a/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml +++ b/vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml @@ -372,9 +372,82 @@ spec: - Auto - "Off" type: string + startupBoost: + description: |- + StartupBoost specifies the startup boost policy for the container. + This overrides any pod-level startup boost policy. + properties: + cpu: + description: CPU specifies the CPU startup boost policy. + properties: + duration: + description: |- + Duration indicates for how long to keep the pod boosted after it goes to Ready. + Defaults to 0s. + type: string + factor: + description: Factor specifies the factor to apply + to the CPU request. + format: int32 + type: integer + quantity: + anyOf: + - type: integer + - type: string + description: Quantity specifies the absolute CPU + resource quantity. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: |- + Type specifies the kind of boost to apply. + Supported values are: "Factor", "Quantity". + Defaults to "Factor". + enum: + - Factor + - Quantity + type: string + type: object + type: object type: object type: array type: object + startupBoost: + description: StartupBoost specifies the startup boost policy for the + pod. + properties: + cpu: + description: CPU specifies the CPU startup boost policy. + properties: + duration: + description: |- + Duration indicates for how long to keep the pod boosted after it goes to Ready. + Defaults to 0s. + type: string + factor: + description: Factor specifies the factor to apply to the CPU + request. + format: int32 + type: integer + quantity: + anyOf: + - type: integer + - type: string + description: Quantity specifies the absolute CPU resource + quantity. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: + description: |- + Type specifies the kind of boost to apply. + Supported values are: "Factor", "Quantity". + Defaults to "Factor". + enum: + - Factor + - Quantity + type: string + type: object + type: object targetRef: description: |- TargetRef points to the controller managing the set of pods for the diff --git a/vertical-pod-autoscaler/pkg/admission-controller/main.go b/vertical-pod-autoscaler/pkg/admission-controller/main.go index efb633bad6a6..fc08b6a0639e 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/main.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/main.go @@ -25,6 +25,7 @@ import ( "time" "github.com/spf13/pflag" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/client-go/informers" kube_client "k8s.io/client-go/kubernetes" typedadmregv1 "k8s.io/client-go/kubernetes/typed/admissionregistration/v1" @@ -78,6 +79,7 @@ var ( registerWebhook = flag.Bool("register-webhook", true, "If set to true, admission webhook object will be created on start up to register with the API server.") webhookLabels = flag.String("webhook-labels", "", "Comma separated list of labels to add to the webhook object. Format: key1:value1,key2:value2") registerByURL = flag.Bool("register-by-url", false, "If set to true, admission webhook will be registered by URL (webhookAddress:webhookPort) instead of by service name") + maxAllowedCpu = flag.String("container-recommendation-max-allowed-cpu", "", "Maximum amount of CPU that will be recommended for a container.") ) func main() { @@ -93,6 +95,13 @@ func main() { klog.FlushAndExit(klog.ExitFlushTimeout, 1) } + if *maxAllowedCpu != "" { + if _, err := resource.ParseQuantity(*maxAllowedCpu); err != nil { + klog.ErrorS(err, "Failed to parse maxAllowedCpu") + klog.FlushAndExit(klog.ExitFlushTimeout, 1) + } + } + healthCheck := metrics.NewHealthCheck(time.Minute) metrics_admission.Register() server.Initialize(&commonFlags.EnableProfiling, healthCheck, address) @@ -145,7 +154,7 @@ func main() { hostname, ) - calculators := []patch.Calculator{patch.NewResourceUpdatesCalculator(recommendationProvider), patch.NewObservedContainersCalculator()} + calculators := []patch.Calculator{patch.NewResourceUpdatesCalculator(recommendationProvider, *maxAllowedCpu), patch.NewObservedContainersCalculator()} as := logic.NewAdmissionServer(podPreprocessor, vpaPreprocessor, limitRangeCalculator, vpaMatcher, calculators) http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { as.Serve(w, r) diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates.go index 3bc230e9f029..c1b2d45a6749 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates.go @@ -21,10 +21,14 @@ import ( "strings" core "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/klog/v2" resource_admission "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/recommendation" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" resourcehelpers "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/resources" vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" ) @@ -37,13 +41,19 @@ const ( type resourcesUpdatesPatchCalculator struct { recommendationProvider recommendation.Provider + maxAllowedCpu resource.Quantity } // NewResourceUpdatesCalculator returns a calculator for // resource update patches. -func NewResourceUpdatesCalculator(recommendationProvider recommendation.Provider) Calculator { +func NewResourceUpdatesCalculator(recommendationProvider recommendation.Provider, maxAllowedCpu string) Calculator { + var maxAllowedCpuQuantity resource.Quantity + if maxAllowedCpu != "" { + maxAllowedCpuQuantity = resource.MustParse(maxAllowedCpu) + } return &resourcesUpdatesPatchCalculator{ recommendationProvider: recommendationProvider, + maxAllowedCpu: maxAllowedCpuQuantity, } } @@ -52,11 +62,22 @@ func (*resourcesUpdatesPatchCalculator) PatchResourceTarget() PatchResourceTarge } func (c *resourcesUpdatesPatchCalculator) CalculatePatches(pod *core.Pod, vpa *vpa_types.VerticalPodAutoscaler) ([]resource_admission.PatchRecord, error) { + klog.Infof("Calculating patches for pod %s/%s with VPA %s", pod.Namespace, pod.Name, vpa.Name) result := []resource_admission.PatchRecord{} containersResources, annotationsPerContainer, err := c.recommendationProvider.GetContainersResourcesForPod(pod, vpa) if err != nil { - return []resource_admission.PatchRecord{}, fmt.Errorf("failed to calculate resource patch for pod %s/%s: %v", pod.Namespace, pod.Name, err) + return nil, fmt.Errorf("failed to calculate resource patch for pod %s/%s: %v", pod.Namespace, pod.Name, err) + } + + if vpa_api_util.GetUpdateMode(vpa) == vpa_types.UpdateModeOff { + // If update mode is "Off", we don't want to apply any recommendations, + // but we still want to apply startup boost. + for i := range containersResources { + containersResources[i].Requests = nil + containersResources[i].Limits = nil + } + annotationsPerContainer = vpa_api_util.ContainerToAnnotationsMap{} } if annotationsPerContainer == nil { @@ -65,9 +86,44 @@ func (c *resourcesUpdatesPatchCalculator) CalculatePatches(pod *core.Pod, vpa *v updatesAnnotation := []string{} for i, containerResources := range containersResources { + // Apply startup boost if configured + if features.Enabled(features.CPUStartupBoost) { + policy := vpa_api_util.GetContainerResourcePolicy(pod.Spec.Containers[i].Name, vpa.Spec.ResourcePolicy) + if policy != nil && policy.Mode != nil && *policy.Mode == vpa_types.ContainerScalingModeOff { + klog.V(4).Infof("Not applying startup boost for container %s since its scaling mode is Off", pod.Spec.Containers[i].Name) + continue + } else { + boost, err := getStartupBoost(&pod.Spec.Containers[i], vpa) + if err != nil { + return nil, err + } + if boost != nil { + if !c.maxAllowedCpu.IsZero() && boost.Cmp(c.maxAllowedCpu) > 0 { + cappedBoost := c.maxAllowedCpu + boost = &cappedBoost + } + if containerResources.Requests == nil { + containerResources.Requests = core.ResourceList{} + } + containerResources.Requests[core.ResourceCPU] = *boost + if containerResources.Limits == nil { + containerResources.Limits = core.ResourceList{} + } + containerResources.Limits[core.ResourceCPU] = *boost + originalResources, err := annotations.GetOriginalResourcesAnnotationValue(&pod.Spec.Containers[i]) + if err != nil { + return nil, err + } + result = append(result, GetAddAnnotationPatch(annotations.StartupCPUBoostAnnotation, originalResources)) + } + } + } + newPatches, newUpdatesAnnotation := getContainerPatch(pod, i, annotationsPerContainer, containerResources) - result = append(result, newPatches...) - updatesAnnotation = append(updatesAnnotation, newUpdatesAnnotation) + if len(newPatches) > 0 { + result = append(result, newPatches...) + updatesAnnotation = append(updatesAnnotation, newUpdatesAnnotation) + } } if len(updatesAnnotation) > 0 { @@ -77,6 +133,48 @@ func (c *resourcesUpdatesPatchCalculator) CalculatePatches(pod *core.Pod, vpa *v return result, nil } +func getStartupBoost(container *core.Container, vpa *vpa_types.VerticalPodAutoscaler) (*resource.Quantity, error) { + policy := vpa_api_util.GetContainerResourcePolicy(container.Name, vpa.Spec.ResourcePolicy) + startupBoost := vpa.Spec.StartupBoost + if policy != nil && policy.StartupBoost != nil { + startupBoost = policy.StartupBoost + } + if startupBoost == nil { + return nil, nil + } + + cpuRequest := container.Resources.Requests[core.ResourceCPU] + boostType := startupBoost.CPU.Type + if boostType == "" { + boostType = vpa_types.FactorStartupBoostType + } + + switch boostType { + case vpa_types.FactorStartupBoostType: + if startupBoost.CPU.Factor == nil { + return nil, fmt.Errorf("startupBoost.CPU.Factor is required when Type is Factor or not specified") + } + factor := *startupBoost.CPU.Factor + if factor < 1 { + return nil, fmt.Errorf("boost factor must be >= 1") + } + boostedCPU := cpuRequest.MilliValue() + boostedCPU = int64(float64(boostedCPU) * float64(factor)) + return resource.NewMilliQuantity(boostedCPU, resource.DecimalSI), nil + case vpa_types.QuantityStartupBoostType: + if startupBoost.CPU.Quantity == nil { + return nil, fmt.Errorf("startupBoost.CPU.Quantity is required when Type is Quantity") + } + quantity := *startupBoost.CPU.Quantity + if quantity.Cmp(cpuRequest) < 0 { + return nil, fmt.Errorf("boost quantity %s is less than container's request %s", quantity.String(), cpuRequest.String()) + } + return &quantity, nil + default: + return nil, fmt.Errorf("unsupported startup boost type: %s", startupBoost.CPU.Type) + } +} + func getContainerPatch(pod *core.Pod, i int, annotationsPerContainer vpa_api_util.ContainerToAnnotationsMap, containerResources vpa_api_util.ContainerResources) ([]resource_admission.PatchRecord, string) { var patches []resource_admission.PatchRecord // Add empty resources object if missing. diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates_test.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates_test.go index 2a3cc5d9a0ec..f155ce08c92f 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates_test.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/resource_updates_test.go @@ -24,9 +24,12 @@ import ( "github.com/stretchr/testify/assert" core "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + featuregatetesting "k8s.io/component-base/featuregate/testing" resource_admission "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/test" vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" ) @@ -289,11 +292,22 @@ func TestCalculatePatches_ResourceUpdates(t *testing.T) { addAnnotationRequest([][]string{{cpu}}, limit), }, }, + { + name: "no recommendation present", + pod: test.Pod(). + AddContainer(core.Container{}). + AddContainerStatus(test.ContainerStatus(). + WithCPULimit(resource.MustParse("0")).Get()).Get(), + namespace: "default", + recommendResources: make([]vpa_api_util.ContainerResources, 1), + recommendAnnotations: vpa_api_util.ContainerToAnnotationsMap{}, + expectPatches: []resource_admission.PatchRecord{}, + }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { frp := fakeRecommendationProvider{tc.recommendResources, tc.recommendAnnotations, tc.recommendError} - c := NewResourceUpdatesCalculator(&frp) + c := NewResourceUpdatesCalculator(&frp, "") patches, err := c.CalculatePatches(tc.pod, test.VerticalPodAutoscaler().WithContainer("test").WithName("name").Get()) if tc.expectError == nil { assert.NoError(t, err) @@ -335,7 +349,7 @@ func TestGetPatches_TwoReplacementResources(t *testing.T) { } recommendAnnotations := vpa_api_util.ContainerToAnnotationsMap{} frp := fakeRecommendationProvider{recommendResources, recommendAnnotations, nil} - c := NewResourceUpdatesCalculator(&frp) + c := NewResourceUpdatesCalculator(&frp, "") patches, err := c.CalculatePatches(pod, test.VerticalPodAutoscaler().WithName("name").WithContainer("test").Get()) assert.NoError(t, err) // Order of updates for cpu and unobtanium depends on order of iterating a map, both possible results are valid. @@ -350,3 +364,292 @@ func TestGetPatches_TwoReplacementResources(t *testing.T) { AssertPatchOneOf(t, patches[2], []resource_admission.PatchRecord{cpuFirstUnobtaniumSecond, unobtaniumFirstCpuSecond}) } } + +func TestCalculatePatches_StartupBoost(t *testing.T) { + factor := int32(2) + quantity := resource.MustParse("500m") + invalidFactor := int32(0) + invalidQuantity := resource.MustParse("200m") + factor3 := int32(3) + tests := []struct { + name string + pod *core.Pod + vpa *vpa_types.VerticalPodAutoscaler + recommendResources []vpa_api_util.ContainerResources + recommendAnnotations vpa_api_util.ContainerToAnnotationsMap + recommendError error + maxAllowedCpu string + expectPatches []resource_admission.PatchRecord + expectError error + featureGateEnabled bool + }{ + { + name: "startup boost factor", + pod: &core.Pod{ + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container1", + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithName("name").WithContainer("container1").WithCPUStartupBoost(&factor, nil, "10s").Get(), + recommendResources: []vpa_api_util.ContainerResources{ + { + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + maxAllowedCpu: "", + featureGateEnabled: true, + expectPatches: []resource_admission.PatchRecord{ + GetAddAnnotationPatch(annotations.StartupCPUBoostAnnotation, "{\"requests\":{\"cpu\":\"100m\"},\"limits\":{}}"), + addResourceRequestPatch(0, cpu, "200m"), + addLimitsPatch(0), + addResourceLimitPatch(0, cpu, "200m"), + GetAddAnnotationPatch(ResourceUpdatesAnnotation, "Pod resources updated by name: container 0: cpu request, cpu limit"), + }, + }, + { + name: "startup boost quantity", + pod: &core.Pod{ + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container1", + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithName("name").WithContainer("container1").WithCPUStartupBoost(nil, &quantity, "10s").Get(), + recommendResources: []vpa_api_util.ContainerResources{ + { + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + maxAllowedCpu: "", + featureGateEnabled: true, + expectPatches: []resource_admission.PatchRecord{ + GetAddAnnotationPatch(annotations.StartupCPUBoostAnnotation, "{\"requests\":{\"cpu\":\"100m\"},\"limits\":{}}"), + addResourceRequestPatch(0, cpu, "500m"), + addLimitsPatch(0), + addResourceLimitPatch(0, cpu, "500m"), + GetAddAnnotationPatch(ResourceUpdatesAnnotation, "Pod resources updated by name: container 0: cpu request, cpu limit"), + }, + }, + { + name: "feature gate disabled", + pod: &core.Pod{ + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container1", + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithName("name").WithContainer("container1").WithCPUStartupBoost(&factor, nil, "10s").Get(), + recommendResources: []vpa_api_util.ContainerResources{ + { + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + maxAllowedCpu: "", + featureGateEnabled: false, + expectPatches: []resource_admission.PatchRecord{ + addResourceRequestPatch(0, cpu, "100m"), + addAnnotationRequest([][]string{{cpu}}, "request"), + }, + }, + { + name: "invalid factor", + pod: &core.Pod{ + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container1", + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithName("name").WithContainer("container1").WithCPUStartupBoost(&invalidFactor, nil, "10s").Get(), + recommendResources: []vpa_api_util.ContainerResources{ + { + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + maxAllowedCpu: "", + featureGateEnabled: true, + expectError: fmt.Errorf("boost factor must be >= 1"), + }, + { + name: "quantity less than request", + pod: &core.Pod{ + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container1", + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + cpu: resource.MustParse("500m"), + }, + }, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithName("name").WithContainer("container1").WithCPUStartupBoost(nil, &invalidQuantity, "10s").Get(), + recommendResources: []vpa_api_util.ContainerResources{ + { + Requests: core.ResourceList{ + cpu: resource.MustParse("500m"), + }, + }, + }, + maxAllowedCpu: "", + featureGateEnabled: true, + expectError: fmt.Errorf("boost quantity 200m is less than container's request 500m"), + }, + { + name: "startup boost capped", + pod: &core.Pod{ + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container1", + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + cpu: resource.MustParse("1"), + }, + }, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithName("name").WithContainer("container1").WithCPUStartupBoost(&factor3, nil, "10s").Get(), + recommendResources: []vpa_api_util.ContainerResources{ + { + Requests: core.ResourceList{ + cpu: resource.MustParse("1"), + }, + }, + }, + maxAllowedCpu: "2", + featureGateEnabled: true, + expectPatches: []resource_admission.PatchRecord{ + GetAddAnnotationPatch(annotations.StartupCPUBoostAnnotation, "{\"requests\":{\"cpu\":\"1\"},\"limits\":{}}"), + addResourceRequestPatch(0, cpu, "2"), + addLimitsPatch(0), + addResourceLimitPatch(0, cpu, "2"), + GetAddAnnotationPatch(ResourceUpdatesAnnotation, "Pod resources updated by name: container 0: cpu request, cpu limit"), + }, + }, + { + name: "startup boost with scaling mode off", + pod: &core.Pod{ + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container1", + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithName("name").WithContainer("container1").WithCPUStartupBoost(&factor, nil, "10s").WithScalingMode("container1", vpa_types.ContainerScalingModeOff).Get(), + recommendResources: []vpa_api_util.ContainerResources{ + { + Requests: core.ResourceList{ + cpu: resource.MustParse("1"), + }, + }, + }, + maxAllowedCpu: "", + featureGateEnabled: true, + expectPatches: []resource_admission.PatchRecord{}, + }, + { + name: "startup boost no recommendation", + pod: &core.Pod{ + Spec: core.PodSpec{ + Containers: []core.Container{ + { + Name: "container1", + Resources: core.ResourceRequirements{ + Requests: core.ResourceList{ + cpu: resource.MustParse("100m"), + }, + }, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithName("name").WithContainer("container1").WithCPUStartupBoost(&factor, nil, "10s").Get(), + recommendResources: make([]vpa_api_util.ContainerResources, 1), + maxAllowedCpu: "", + featureGateEnabled: true, + expectPatches: []resource_admission.PatchRecord{ + GetAddAnnotationPatch(annotations.StartupCPUBoostAnnotation, "{\"requests\":{\"cpu\":\"100m\"},\"limits\":{}}"), + addResourceRequestPatch(0, cpu, "200m"), + addLimitsPatch(0), + addResourceLimitPatch(0, cpu, "200m"), + GetAddAnnotationPatch(ResourceUpdatesAnnotation, "Pod resources updated by name: container 0: cpu request, cpu limit"), + }, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.CPUStartupBoost, tc.featureGateEnabled) + + frp := fakeRecommendationProvider{tc.recommendResources, tc.recommendAnnotations, tc.recommendError} + c := NewResourceUpdatesCalculator(&frp, tc.maxAllowedCpu) + patches, err := c.CalculatePatches(tc.pod, tc.vpa) + if tc.expectError == nil { + assert.NoError(t, err) + } else { + if assert.Error(t, err) { + assert.Equal(t, tc.expectError.Error(), err.Error()) + } + } + if assert.Len(t, patches, len(tc.expectPatches), fmt.Sprintf("got %+v, want %+v", patches, tc.expectPatches)) { + for i, gotPatch := range patches { + if !EqPatch(gotPatch, tc.expectPatches[i]) { + t.Errorf("Expected patch at position %d to be %+v, got %+v", i, tc.expectPatches[i], gotPatch) + } + } + } + }) + } +} diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go index 0c68ab6cd557..b930be0f1988 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go @@ -43,6 +43,14 @@ func GetAddAnnotationPatch(annotationName, annotationValue string) resource_admi } } +// GetRemoveAnnotationPatch returns a patch to remove an annotation. +func GetRemoveAnnotationPatch(annotationName string) resource_admission.PatchRecord { + return resource_admission.PatchRecord{ + Op: "remove", + Path: fmt.Sprintf("/metadata/annotations/%s", annotationName), + } +} + // GetAddResourceRequirementValuePatch returns a patch record to add resource requirements to a container. func GetAddResourceRequirementValuePatch(i int, kind string, resource core.ResourceName, quantity resource.Quantity) resource_admission.PatchRecord { return resource_admission.PatchRecord{ diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler.go index f7bed6397d94..c69d578b5a77 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/handler.go @@ -163,9 +163,16 @@ func ValidateVPA(vpa *vpa_types.VerticalPodAutoscaler, isCreate bool) error { return fmt.Errorf("controlledValues shouldn't be specified if container scaling mode is off") } } + if err := validateStartupBoost(policy.StartupBoost, isCreate); err != nil { + return fmt.Errorf("invalid startupBoost in container %s: %v", policy.ContainerName, err) + } } } + if err := validateStartupBoost(vpa.Spec.StartupBoost, isCreate); err != nil { + return fmt.Errorf("invalid startupBoost: %v", err) + } + if isCreate && vpa.Spec.TargetRef == nil { return fmt.Errorf("targetRef is required. If you're using v1beta1 version of the API, please migrate to v1") } @@ -177,6 +184,40 @@ func ValidateVPA(vpa *vpa_types.VerticalPodAutoscaler, isCreate bool) error { return nil } +func validateStartupBoost(startupBoost *vpa_types.StartupBoost, isCreate bool) error { + if startupBoost == nil { + return nil + } + + if !features.Enabled(features.CPUStartupBoost) && isCreate { + return fmt.Errorf("in order to use startupBoost, you must enable feature gate %s in the admission-controller args", features.CPUStartupBoost) + } + + cpuBoost := startupBoost.CPU + boostType := cpuBoost.Type + if boostType == "" { + boostType = vpa_types.FactorStartupBoostType + } + + if boostType != vpa_types.FactorStartupBoostType && boostType != vpa_types.QuantityStartupBoostType { + return fmt.Errorf("unexpected StartupBoost.CPU.Type value %s", boostType) + } + + if boostType == vpa_types.FactorStartupBoostType { + if cpuBoost.Factor == nil { + return fmt.Errorf("StartupBoost.CPU.Factor is required when Type is Factor") + } + if *cpuBoost.Factor < 1 { + return fmt.Errorf("invalid StartupBoost.CPU.Factor: must be >= 1 for Type Factor") + } + } else if boostType == vpa_types.QuantityStartupBoostType { + if cpuBoost.Quantity == nil { + return fmt.Errorf("StartupBoost.CPU.Quantity is required when Type is Quantity") + } + } + return nil +} + func validateResourceResolution(name corev1.ResourceName, val apires.Quantity) error { switch name { case corev1.ResourceCPU: diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/matcher.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/matcher.go index 704749649ad2..5d1e93468d11 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/matcher.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/vpa/matcher.go @@ -69,7 +69,7 @@ func (m *matcher) GetMatchingVPA(ctx context.Context, pod *core.Pod) *vpa_types. var controllingVpa *vpa_types.VerticalPodAutoscaler for _, vpaConfig := range configs { - if vpa_api_util.GetUpdateMode(vpaConfig) == vpa_types.UpdateModeOff { + if vpa_api_util.GetUpdateMode(vpaConfig) == vpa_types.UpdateModeOff && vpaConfig.Spec.StartupBoost == nil { continue } if vpaConfig.Spec.TargetRef == nil { diff --git a/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1/types.go b/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1/types.go index 6ae164ce4cac..eaa3f24a7160 100644 --- a/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1/types.go +++ b/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1/types.go @@ -19,6 +19,7 @@ package v1 import ( autoscaling "k8s.io/api/autoscaling/v1" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -107,8 +108,52 @@ type VerticalPodAutoscalerSpec struct { // recommendation) or contain exactly one recommender. // +optional Recommenders []*VerticalPodAutoscalerRecommenderSelector `json:"recommenders,omitempty" protobuf:"bytes,4,opt,name=recommenders"` + + // StartupBoost specifies the startup boost policy for the pod. + // +optional + StartupBoost *StartupBoost `json:"startupBoost,omitempty" protobuf:"bytes,5,opt,name=startupBoost"` +} + +// StartupBoost defines the startup boost policy. +type StartupBoost struct { + // CPU specifies the CPU startup boost policy. + // +optional + CPU CPUStartupBoost `json:"cpu,omitempty" protobuf:"bytes,1,opt,name=cpu"` +} + +// CPUStartupBoost defines the CPU startup boost policy. +type CPUStartupBoost struct { + // Type specifies the kind of boost to apply. + // Supported values are: "Factor", "Quantity". + // Defaults to "Factor". + // +optional + Type StartupBoostType `json:"type,omitempty" protobuf:"bytes,1,opt,name=type"` + + // Factor specifies the factor to apply to the CPU request. + // +optional + Factor *int32 `json:"factor,omitempty" protobuf:"bytes,2,opt,name=factor"` + + // Quantity specifies the absolute CPU resource quantity. + // +optional + Quantity *resource.Quantity `json:"quantity,omitempty" protobuf:"bytes,3,opt,name=quantity"` + + // Duration indicates for how long to keep the pod boosted after it goes to Ready. + // Defaults to 0s. + // +optional + Duration *metav1.Duration `json:"duration,omitempty" protobuf:"bytes,4,opt,name=duration"` } +// StartupBoostType is the type of startup boost. +// +kubebuilder:validation:Enum=Factor;Quantity +type StartupBoostType string + +const ( + // FactorStartupBoostType applies a factor to the CPU. + FactorStartupBoostType StartupBoostType = "Factor" + // QuantityStartupBoostType applies a fixed quantity to the CPU. + QuantityStartupBoostType StartupBoostType = "Quantity" +) + // EvictionChangeRequirement refers to the relationship between the new target recommendation for a Pod and its current requests, what kind of change is necessary for the Pod to be evicted // +kubebuilder:validation:Enum:=TargetHigherThanRequests;TargetLowerThanRequests type EvictionChangeRequirement string @@ -221,6 +266,11 @@ type ContainerResourcePolicy struct { // The default is "RequestsAndLimits". // +optional ControlledValues *ContainerControlledValues `json:"controlledValues,omitempty" protobuf:"bytes,6,rep,name=controlledValues"` + + // StartupBoost specifies the startup boost policy for the container. + // This overrides any pod-level startup boost policy. + // +optional + StartupBoost *StartupBoost `json:"startupBoost,omitempty" protobuf:"bytes,7,opt,name=startupBoost"` } const ( diff --git a/vertical-pod-autoscaler/pkg/features/features.go b/vertical-pod-autoscaler/pkg/features/features.go index 513973f63958..4b610f0a3bbd 100644 --- a/vertical-pod-autoscaler/pkg/features/features.go +++ b/vertical-pod-autoscaler/pkg/features/features.go @@ -46,6 +46,11 @@ const ( // InPlaceOrRecreate enables the InPlaceOrRecreate update mode to be used. // Requires KEP-1287 InPlacePodVerticalScaling feature-gate to be enabled on the cluster. InPlaceOrRecreate featuregate.Feature = "InPlaceOrRecreate" + + // alpha: v1.5.0 + // components: admission-controller, updater + // CPUStartupBoost enables the CPU startup boost feature. + CPUStartupBoost featuregate.Feature = "CPUStartupBoost" ) // MutableFeatureGate is a mutable, versioned, global FeatureGate. diff --git a/vertical-pod-autoscaler/pkg/features/versioned_features.go b/vertical-pod-autoscaler/pkg/features/versioned_features.go index c3fd990f8013..4db982eecd46 100644 --- a/vertical-pod-autoscaler/pkg/features/versioned_features.go +++ b/vertical-pod-autoscaler/pkg/features/versioned_features.go @@ -27,6 +27,9 @@ import ( // Entries are alphabetized. var defaultVersionedFeatureGates = map[featuregate.Feature]featuregate.VersionedSpecs{ + CPUStartupBoost: { + {Version: version.MustParse("1.5"), Default: false, PreRelease: featuregate.Alpha}, + }, InPlaceOrRecreate: { {Version: version.MustParse("1.4"), Default: false, PreRelease: featuregate.Alpha}, }, diff --git a/vertical-pod-autoscaler/pkg/utils/annotations/vpa_cpu_boost.go b/vertical-pod-autoscaler/pkg/utils/annotations/vpa_cpu_boost.go new file mode 100644 index 000000000000..2ab6a3dd5b14 --- /dev/null +++ b/vertical-pod-autoscaler/pkg/utils/annotations/vpa_cpu_boost.go @@ -0,0 +1,71 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package annotations + +import ( + "encoding/json" + + core "k8s.io/api/core/v1" +) + +const ( + // StartupCPUBoostAnnotation is the annotation set on a pod when a CPU boost is applied. + // The value of the annotation is the original resource specification of the container. + StartupCPUBoostAnnotation = "startup-cpu-boost" +) + +// OriginalResources contains the original resources of a container. +type OriginalResources struct { + Requests core.ResourceList `json:"requests"` + Limits core.ResourceList `json:"limits"` +} + +// GetOriginalResourcesAnnotationValue returns the annotation value for the original resources. +func GetOriginalResourcesAnnotationValue(container *core.Container) (string, error) { + original := OriginalResources{ + Requests: core.ResourceList{}, + Limits: core.ResourceList{}, + } + if cpu, ok := container.Resources.Requests[core.ResourceCPU]; ok { + original.Requests[core.ResourceCPU] = cpu + } + if mem, ok := container.Resources.Requests[core.ResourceMemory]; ok { + original.Requests[core.ResourceMemory] = mem + } + if cpu, ok := container.Resources.Limits[core.ResourceCPU]; ok { + original.Limits[core.ResourceCPU] = cpu + } + if mem, ok := container.Resources.Limits[core.ResourceMemory]; ok { + original.Limits[core.ResourceMemory] = mem + } + b, err := json.Marshal(original) + return string(b), err +} + +// GetOriginalResourcesFromAnnotation returns the original resources from the annotation. +func GetOriginalResourcesFromAnnotation(pod *core.Pod) (*OriginalResources, error) { + val, ok := pod.Annotations[StartupCPUBoostAnnotation] + if !ok { + return nil, nil + } + var original OriginalResources + err := json.Unmarshal([]byte(val), &original) + if err != nil { + return nil, err + } + return &original, nil +} diff --git a/vertical-pod-autoscaler/pkg/utils/test/test_vpa.go b/vertical-pod-autoscaler/pkg/utils/test/test_vpa.go index 6bb3279baff9..eb175e3d3083 100644 --- a/vertical-pod-autoscaler/pkg/utils/test/test_vpa.go +++ b/vertical-pod-autoscaler/pkg/utils/test/test_vpa.go @@ -21,6 +21,7 @@ import ( autoscaling "k8s.io/api/autoscaling/v1" core "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" meta "k8s.io/apimachinery/pkg/apis/meta/v1" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" @@ -47,6 +48,7 @@ type VerticalPodAutoscalerBuilder interface { WithGroupVersion(gv meta.GroupVersion) VerticalPodAutoscalerBuilder WithEvictionRequirements([]*vpa_types.EvictionRequirement) VerticalPodAutoscalerBuilder WithMinReplicas(minReplicas *int32) VerticalPodAutoscalerBuilder + WithCPUStartupBoost(factor *int32, quantity *resource.Quantity, duration string) VerticalPodAutoscalerBuilder AppendCondition(conditionType vpa_types.VerticalPodAutoscalerConditionType, status core.ConditionStatus, reason, message string, lastTransitionTime time.Time) VerticalPodAutoscalerBuilder AppendRecommendation(vpa_types.RecommendedContainerResources) VerticalPodAutoscalerBuilder @@ -81,6 +83,7 @@ type verticalPodAutoscalerBuilder struct { maxAllowed map[string]core.ResourceList controlledValues map[string]*vpa_types.ContainerControlledValues scalingMode map[string]*vpa_types.ContainerScalingMode + startupBoost *vpa_types.StartupBoost recommendation RecommendationBuilder conditions []vpa_types.VerticalPodAutoscalerCondition annotations map[string]string @@ -232,6 +235,25 @@ func (b *verticalPodAutoscalerBuilder) AppendRecommendation(recommendation vpa_t return &c } +func (b *verticalPodAutoscalerBuilder) WithCPUStartupBoost(factor *int32, quantity *resource.Quantity, duration string) VerticalPodAutoscalerBuilder { + c := *b + parsedDuration, _ := time.ParseDuration(duration) + cpuStartupBoost := vpa_types.CPUStartupBoost{ + Duration: &meta.Duration{Duration: parsedDuration}, + } + if factor != nil { + cpuStartupBoost.Type = vpa_types.FactorStartupBoostType + cpuStartupBoost.Factor = factor + } else { + cpuStartupBoost.Type = vpa_types.QuantityStartupBoostType + cpuStartupBoost.Quantity = quantity + } + c.startupBoost = &vpa_types.StartupBoost{ + CPU: cpuStartupBoost, + } + return &c +} + func (b *verticalPodAutoscalerBuilder) Get() *vpa_types.VerticalPodAutoscaler { if len(b.containerNames) == 0 { panic("Must call WithContainer() before Get()") @@ -280,6 +302,7 @@ func (b *verticalPodAutoscalerBuilder) Get() *vpa_types.VerticalPodAutoscaler { ResourcePolicy: &resourcePolicy, TargetRef: b.targetRef, Recommenders: recommenders, + StartupBoost: b.startupBoost, }, Status: vpa_types.VerticalPodAutoscalerStatus{ Recommendation: recommendation, From 3fcc568efa31b9cc74639cbd09a1b0d02e06049d Mon Sep 17 00:00:00 2001 From: Kam Saiyed Date: Wed, 6 Aug 2025 19:25:44 +0000 Subject: [PATCH 2/5] Add e2e tests for admission controller --- .../e2e/v1/admission_controller.go | 120 +++++++++++++++++- 1 file changed, 119 insertions(+), 1 deletion(-) diff --git a/vertical-pod-autoscaler/e2e/v1/admission_controller.go b/vertical-pod-autoscaler/e2e/v1/admission_controller.go index 502a07b81868..043ed3c19d0d 100644 --- a/vertical-pod-autoscaler/e2e/v1/admission_controller.go +++ b/vertical-pod-autoscaler/e2e/v1/admission_controller.go @@ -28,6 +28,7 @@ import ( apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/test" "k8s.io/kubernetes/test/e2e/framework" framework_deployment "k8s.io/kubernetes/test/e2e/framework/deployment" @@ -47,7 +48,7 @@ var _ = AdmissionControllerE2eDescribe("Admission-controller", ginkgo.Label("FG: f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline ginkgo.BeforeEach(func() { - checkInPlaceOrRecreateTestsEnabled(f, true, false) + checkFeatureGateTestsEnabled(f, features.InPlaceOrRecreate, true, false) waitForVpaWebhookRegistration(f) }) @@ -961,6 +962,123 @@ var _ = AdmissionControllerE2eDescribe("Admission-controller", func() { }) }) +var _ = AdmissionControllerE2eDescribe("Admission-controller", ginkgo.Label("FG:CPUStartupBoost"), func() { + f := framework.NewDefaultFramework("vertical-pod-autoscaling") + f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline + + ginkgo.BeforeEach(func() { + checkFeatureGateTestsEnabled(f, features.CPUStartupBoost, true, false) + waitForVpaWebhookRegistration(f) + }) + + ginkgo.It("boosts CPU by factor on pod creation", func() { + initialCPU := ParseQuantityOrDie("100m") + boostedCPU := ParseQuantityOrDie("200m") + d := NewHamsterDeploymentWithResources(f, initialCPU, ParseQuantityOrDie("100Mi")) + + ginkgo.By("Setting up a VPA with a startup boost policy (factor)") + containerName := GetHamsterContainerNameByIndex(0) + factor := int32(2) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(containerName). + WithCPUStartupBoost(&factor, nil, "15s"). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget("100m", "100Mi"). + GetContainerResources(), + ). + Get() + InstallVPA(f, vpaCRD) + + ginkgo.By("Starting the deployment and verifying the pod is boosted") + podList := startDeploymentPods(f, d) + pod := podList.Items[0] + gomega.Expect(pod.Spec.Containers[0].Resources.Requests.Cpu().Cmp(boostedCPU)).To(gomega.Equal(0)) + gomega.Expect(pod.Spec.Containers[0].Resources.Limits.Cpu().Cmp(boostedCPU)).To(gomega.Equal(0)) + }) + + ginkgo.It("boosts CPU by quantity on pod creation", func() { + initialCPU := ParseQuantityOrDie("100m") + boostedCPU := ParseQuantityOrDie("500m") + d := NewHamsterDeploymentWithResources(f, initialCPU, ParseQuantityOrDie("100Mi")) + + ginkgo.By("Setting up a VPA with a startup boost policy (quantity)") + containerName := GetHamsterContainerNameByIndex(0) + quantity := ParseQuantityOrDie("500m") + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(containerName). + WithCPUStartupBoost(nil, &quantity, "15s"). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget("100m", "100Mi"). + GetContainerResources(), + ). + Get() + InstallVPA(f, vpaCRD) + + ginkgo.By("Starting the deployment and verifying the pod is boosted") + podList := startDeploymentPods(f, d) + pod := podList.Items[0] + gomega.Expect(pod.Spec.Containers[0].Resources.Requests.Cpu().Cmp(boostedCPU)).To(gomega.Equal(0)) + gomega.Expect(pod.Spec.Containers[0].Resources.Limits.Cpu().Cmp(boostedCPU)).To(gomega.Equal(0)) + }) + + ginkgo.It("boosts CPU on pod creation when VPA update mode is Off", func() { + initialCPU := ParseQuantityOrDie("100m") + boostedCPU := ParseQuantityOrDie("200m") + d := NewHamsterDeploymentWithResources(f, initialCPU, ParseQuantityOrDie("100Mi")) + + ginkgo.By("Setting up a VPA with updateMode Off and a startup boost policy") + containerName := GetHamsterContainerNameByIndex(0) + factor := int32(2) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(containerName). + WithUpdateMode(vpa_types.UpdateModeOff). // VPA is off, but boost should still work + WithCPUStartupBoost(&factor, nil, "15s"). + Get() + InstallVPA(f, vpaCRD) + + ginkgo.By("Starting the deployment and verifying the pod is boosted") + podList := startDeploymentPods(f, d) + pod := podList.Items[0] + gomega.Expect(pod.Spec.Containers[0].Resources.Requests.Cpu().Cmp(boostedCPU)).To(gomega.Equal(0)) + }) + + ginkgo.It("doesn't boost CPU on pod creation when scaling mode is Off", func() { + initialCPU := ParseQuantityOrDie("100m") + d := NewHamsterDeploymentWithResources(f, initialCPU, ParseQuantityOrDie("100Mi")) + + ginkgo.By("Setting up a VPA with a startup boost policy and scaling mode Off") + containerName := GetHamsterContainerNameByIndex(0) + factor := int32(2) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(hamsterTargetRef). + WithContainer(containerName). + WithCPUStartupBoost(&factor, nil, "15s"). + WithScalingMode(containerName, vpa_types.ContainerScalingModeOff). + Get() + InstallVPA(f, vpaCRD) + + ginkgo.By("Starting the deployment and verifying the pod is NOT boosted") + podList := startDeploymentPods(f, d) + pod := podList.Items[0] + gomega.Expect(pod.Spec.Containers[0].Resources.Requests.Cpu().Cmp(initialCPU)).To(gomega.Equal(0)) + }) +}) + func startDeploymentPods(f *framework.Framework, deployment *appsv1.Deployment) *apiv1.PodList { // Apiserver watch can lag depending on cached object count and apiserver resource usage. // We assume that watch can lag up to 5 seconds. From c16e90f0487526a36040a52d83203c4d7cdadd4d Mon Sep 17 00:00:00 2001 From: Kam Saiyed Date: Wed, 6 Aug 2025 19:26:58 +0000 Subject: [PATCH 3/5] Make changes to updater to unboost the cpu resources --- .../pkg/updater/inplace/resource_updates.go | 24 +- .../inplace/unboost_patch_calculator.go | 48 +++ .../pkg/updater/logic/updater.go | 74 ++++- .../pkg/updater/logic/updater_test.go | 307 +++++++++++++++++- vertical-pod-autoscaler/pkg/updater/main.go | 2 +- .../restriction/pods_inplace_restriction.go | 27 ++ .../pkg/utils/test/test_utils.go | 6 + vertical-pod-autoscaler/pkg/utils/vpa/api.go | 36 ++ .../pkg/utils/vpa/api_test.go | 198 +++++++++++ 9 files changed, 689 insertions(+), 33 deletions(-) create mode 100644 vertical-pod-autoscaler/pkg/updater/inplace/unboost_patch_calculator.go diff --git a/vertical-pod-autoscaler/pkg/updater/inplace/resource_updates.go b/vertical-pod-autoscaler/pkg/updater/inplace/resource_updates.go index d15d2bb67d73..6f3c4c200eec 100644 --- a/vertical-pod-autoscaler/pkg/updater/inplace/resource_updates.go +++ b/vertical-pod-autoscaler/pkg/updater/inplace/resource_updates.go @@ -25,6 +25,7 @@ import ( "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/recommendation" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" ) @@ -49,9 +50,26 @@ func (*resourcesInplaceUpdatesPatchCalculator) PatchResourceTarget() patch.Patch func (c *resourcesInplaceUpdatesPatchCalculator) CalculatePatches(pod *core.Pod, vpa *vpa_types.VerticalPodAutoscaler) ([]resource_admission.PatchRecord, error) { result := []resource_admission.PatchRecord{} - containersResources, _, err := c.recommendationProvider.GetContainersResourcesForPod(pod, vpa) - if err != nil { - return []resource_admission.PatchRecord{}, fmt.Errorf("failed to calculate resource patch for pod %s/%s: %v", pod.Namespace, pod.Name, err) + var containersResources []vpa_api_util.ContainerResources + if vpa_api_util.GetUpdateMode(vpa) == vpa_types.UpdateModeOff { + // If update mode is "Off", we don't want to apply any recommendations, + // but we still want to unboost. + original, err := annotations.GetOriginalResourcesFromAnnotation(pod) + if err != nil { + return nil, err + } + containersResources = []vpa_api_util.ContainerResources{ + { + Requests: original.Requests, + Limits: original.Limits, + }, + } + } else { + var err error + containersResources, _, err = c.recommendationProvider.GetContainersResourcesForPod(pod, vpa) + if err != nil { + return []resource_admission.PatchRecord{}, fmt.Errorf("failed to calculate resource patch for pod %s/%s: %v", pod.Namespace, pod.Name, err) + } } for i, containerResources := range containersResources { diff --git a/vertical-pod-autoscaler/pkg/updater/inplace/unboost_patch_calculator.go b/vertical-pod-autoscaler/pkg/updater/inplace/unboost_patch_calculator.go new file mode 100644 index 000000000000..73c861c2c23d --- /dev/null +++ b/vertical-pod-autoscaler/pkg/updater/inplace/unboost_patch_calculator.go @@ -0,0 +1,48 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package inplace + +import ( + core "k8s.io/api/core/v1" + resource_admission "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch" + vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" + vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" +) + +type unboostAnnotationPatchCalculator struct{} + +// NewUnboostAnnotationCalculator returns a calculator for the unboost annotation patch. +func NewUnboostAnnotationCalculator() patch.Calculator { + return &unboostAnnotationPatchCalculator{} +} + +// PatchResourceTarget returns the Pod resource to apply calculator patches. +func (*unboostAnnotationPatchCalculator) PatchResourceTarget() patch.PatchResourceTarget { + return patch.Pod +} + +// CalculatePatches calculates the patch to remove the startup CPU boost annotation if the pod is ready to be unboosted. +func (c *unboostAnnotationPatchCalculator) CalculatePatches(pod *core.Pod, vpa *vpa_types.VerticalPodAutoscaler) ([]resource_admission.PatchRecord, error) { + if vpa_api_util.PodHasCPUBoostInProgress(pod) && vpa_api_util.PodReady(pod) && vpa_api_util.PodStartupBoostDurationPassed(pod, vpa) { + return []resource_admission.PatchRecord{ + patch.GetRemoveAnnotationPatch(annotations.StartupCPUBoostAnnotation), + }, nil + } + return []resource_admission.PatchRecord{}, nil +} diff --git a/vertical-pod-autoscaler/pkg/updater/logic/updater.go b/vertical-pod-autoscaler/pkg/updater/logic/updater.go index 1ce5a7007f42..31ee00463b50 100644 --- a/vertical-pod-autoscaler/pkg/updater/logic/updater.go +++ b/vertical-pod-autoscaler/pkg/updater/logic/updater.go @@ -159,9 +159,12 @@ func (u *updater) RunOnce(ctx context.Context) { klog.V(3).InfoS("Skipping VPA object in ignored namespace", "vpa", klog.KObj(vpa), "namespace", vpa.Namespace) continue } - if vpa_api_util.GetUpdateMode(vpa) != vpa_types.UpdateModeRecreate && - vpa_api_util.GetUpdateMode(vpa) != vpa_types.UpdateModeAuto && vpa_api_util.GetUpdateMode(vpa) != vpa_types.UpdateModeInPlaceOrRecreate { - klog.V(3).InfoS("Skipping VPA object because its mode is not \"InPlaceOrRecreate\", \"Recreate\" or \"Auto\"", "vpa", klog.KObj(vpa)) + updateMode := vpa_api_util.GetUpdateMode(vpa) + if updateMode != vpa_types.UpdateModeRecreate && + updateMode != vpa_types.UpdateModeAuto && + updateMode != vpa_types.UpdateModeInPlaceOrRecreate && + vpa.Spec.StartupBoost == nil { + klog.V(3).InfoS("Skipping VPA object because its mode is not \"InPlaceOrRecreate\", \"Recreate\" or \"Auto\" and it doesn't have startupBoost configured", "vpa", klog.KObj(vpa)) continue } selector, err := u.selectorFetcher.Fetch(ctx, vpa) @@ -226,8 +229,6 @@ func (u *updater) RunOnce(ctx context.Context) { defer vpasWithInPlaceUpdatablePodsCounter.Observe() defer vpasWithInPlaceUpdatedPodsCounter.Observe() - // NOTE: this loop assumes that controlledPods are filtered - // to contain only Pods controlled by a VPA in auto, recreate, or inPlaceOrRecreate mode for vpa, livePods := range controlledPods { vpaSize := len(livePods) updateMode := vpa_api_util.GetUpdateMode(vpa) @@ -238,31 +239,77 @@ func (u *updater) RunOnce(ctx context.Context) { continue } - evictionLimiter := u.restrictionFactory.NewPodsEvictionRestriction(creatorToSingleGroupStatsMap, podToReplicaCreatorMap) inPlaceLimiter := u.restrictionFactory.NewPodsInPlaceRestriction(creatorToSingleGroupStatsMap, podToReplicaCreatorMap) + podsAvailableForUpdate := make([]*apiv1.Pod, 0) + podsToUnboost := make([]*apiv1.Pod, 0) + withInPlaceUpdated := false - podsForInPlace := make([]*apiv1.Pod, 0) + // First, handle unboosting for pods that have finished their startup period. + if features.Enabled(features.CPUStartupBoost) && vpa.Spec.StartupBoost != nil { + for _, pod := range livePods { + if vpa_api_util.PodHasCPUBoostInProgress(pod) { + if vpa_api_util.PodReady(pod) && vpa_api_util.PodStartupBoostDurationPassed(pod, vpa) { + podsToUnboost = append(podsToUnboost, pod) + } + } else { + podsAvailableForUpdate = append(podsAvailableForUpdate, pod) + } + } + } else { + podsAvailableForUpdate = livePods + } + + for _, pod := range podsToUnboost { + if inPlaceLimiter.CanUnboost(pod, vpa) { + klog.V(2).InfoS("Unboosting pod", "pod", klog.KObj(pod)) + err = u.inPlaceRateLimiter.Wait(ctx) + if err != nil { + klog.V(0).InfoS("In-place rate limiter wait failed for unboosting", "error", err) + return + } + err := inPlaceLimiter.InPlaceUpdate(pod, vpa, u.eventRecorder) + if err != nil { + klog.V(0).InfoS("Unboosting failed", "error", err, "pod", klog.KObj(pod)) + metrics_updater.RecordFailedInPlaceUpdate(vpaSize, "UnboostError") + } else { + klog.V(2).InfoS("Successfully unboosted pod", "pod", klog.KObj(pod)) + withInPlaceUpdated = true + metrics_updater.AddInPlaceUpdatedPod(vpaSize) + } + } + } + + if updateMode == vpa_types.UpdateModeOff || updateMode == vpa_types.UpdateModeInitial { + continue + } + + evictionLimiter := u.restrictionFactory.NewPodsEvictionRestriction(creatorToSingleGroupStatsMap, podToReplicaCreatorMap) podsForEviction := make([]*apiv1.Pod, 0) + podsForInPlace := make([]*apiv1.Pod, 0) + withInPlaceUpdatable := false + withEvictable := false if updateMode == vpa_types.UpdateModeInPlaceOrRecreate && features.Enabled(features.InPlaceOrRecreate) { - podsForInPlace = u.getPodsUpdateOrder(filterNonInPlaceUpdatablePods(livePods, inPlaceLimiter), vpa) + podsForInPlace = u.getPodsUpdateOrder(filterNonInPlaceUpdatablePods(podsAvailableForUpdate, inPlaceLimiter), vpa) inPlaceUpdatablePodsCounter.Add(vpaSize, len(podsForInPlace)) + if len(podsForInPlace) > 0 { + withInPlaceUpdatable = true + } } else { // If the feature gate is not enabled but update mode is InPlaceOrRecreate, updater will always fallback to eviction. if updateMode == vpa_types.UpdateModeInPlaceOrRecreate { klog.InfoS("Warning: feature gate is not enabled for this updateMode", "featuregate", features.InPlaceOrRecreate, "updateMode", vpa_types.UpdateModeInPlaceOrRecreate) } - podsForEviction = u.getPodsUpdateOrder(filterNonEvictablePods(livePods, evictionLimiter), vpa) + podsForEviction = u.getPodsUpdateOrder(filterNonEvictablePods(podsAvailableForUpdate, evictionLimiter), vpa) evictablePodsCounter.Add(vpaSize, updateMode, len(podsForEviction)) + if len(podsForEviction) > 0 { + withEvictable = true + } } - withInPlaceUpdatable := false - withInPlaceUpdated := false - withEvictable := false withEvicted := false for _, pod := range podsForInPlace { - withInPlaceUpdatable = true decision := inPlaceLimiter.CanInPlaceUpdate(pod) if decision == utils.InPlaceDeferred { @@ -289,7 +336,6 @@ func (u *updater) RunOnce(ctx context.Context) { } for _, pod := range podsForEviction { - withEvictable = true if !evictionLimiter.CanEvict(pod) { continue } diff --git a/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go b/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go index 6641d85c2b4c..33e6f8770e37 100644 --- a/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go +++ b/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go @@ -62,6 +62,7 @@ func TestRunOnce_Mode(t *testing.T) { expectedInPlacedCount int canEvict bool canInPlaceUpdate utils.InPlaceDecision + isCPUBoostTest bool }{ { name: "with Auto mode", @@ -133,6 +134,50 @@ func TestRunOnce_Mode(t *testing.T) { canEvict: true, canInPlaceUpdate: utils.InPlaceApproved, }, + { + name: "with InPlaceOrRecreate mode and unboost", + updateMode: vpa_types.UpdateModeInPlaceOrRecreate, + shouldInPlaceFail: false, + expectFetchCalls: true, + expectedEvictionCount: 0, + expectedInPlacedCount: 5, + canEvict: true, + canInPlaceUpdate: utils.InPlaceApproved, + isCPUBoostTest: true, + }, + { + name: "with Recreate mode and unboost", + updateMode: vpa_types.UpdateModeRecreate, + shouldInPlaceFail: false, + expectFetchCalls: true, + expectedEvictionCount: 0, + expectedInPlacedCount: 5, + canEvict: true, + canInPlaceUpdate: utils.InPlaceApproved, + isCPUBoostTest: true, + }, + { + name: "with Auto mode and unboost", + updateMode: vpa_types.UpdateModeAuto, + shouldInPlaceFail: false, + expectFetchCalls: true, + expectedEvictionCount: 0, + expectedInPlacedCount: 5, + canEvict: true, + canInPlaceUpdate: utils.InPlaceApproved, + isCPUBoostTest: true, + }, + { + name: "with InPlaceOrRecreate mode and unboost and In-place fails", + updateMode: vpa_types.UpdateModeInPlaceOrRecreate, + shouldInPlaceFail: true, + expectFetchCalls: true, + expectedEvictionCount: 0, + expectedInPlacedCount: 5, + canEvict: true, + canInPlaceUpdate: utils.InPlaceApproved, + isCPUBoostTest: true, + }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { @@ -145,6 +190,7 @@ func TestRunOnce_Mode(t *testing.T) { tc.expectedEvictionCount, tc.expectedInPlacedCount, tc.canInPlaceUpdate, + tc.isCPUBoostTest, ) }) } @@ -184,6 +230,7 @@ func TestRunOnce_Status(t *testing.T) { tc.expectedEvictionCount, tc.expectedInPlacedCount, utils.InPlaceApproved, + false, ) }) } @@ -198,8 +245,10 @@ func testRunOnceBase( expectedEvictionCount int, expectedInPlacedCount int, canInPlaceUpdate utils.InPlaceDecision, + isCPUBoostTest bool, ) { featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.InPlaceOrRecreate, true) + featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.CPUStartupBoost, true) ctrl := gomock.NewController(t) defer ctrl.Finish() @@ -225,6 +274,18 @@ func testRunOnceBase( eviction := &test.PodsEvictionRestrictionMock{} inplace := &test.PodsInPlaceRestrictionMock{} + vpaObj := test.VerticalPodAutoscaler(). + WithContainer(containerName). + WithTarget("2", "200M"). + WithMinAllowed(containerName, "1", "100M"). + WithMaxAllowed(containerName, "3", "1G"). + WithTargetRef(&v1.CrossVersionObjectReference{ + Kind: rc.Kind, + Name: rc.Name, + APIVersion: rc.APIVersion, + }). + Get() + for i := range pods { pods[i] = test.Pod().WithName("test_"+strconv.Itoa(i)). AddContainer(test.Container().WithName(containerName).WithCPURequest(resource.MustParse("1")).WithMemRequest(resource.MustParse("100M")).Get()). @@ -232,15 +293,30 @@ func testRunOnceBase( Get() pods[i].Labels = labels + if isCPUBoostTest { + pods[i].Annotations = map[string]string{ + "startup-cpu-boost": "", + } + pods[i].Status.Conditions = []apiv1.PodCondition{ + { + Type: apiv1.PodReady, + Status: apiv1.ConditionTrue, + }, + } + } - inplace.On("CanInPlaceUpdate", pods[i]).Return(canInPlaceUpdate) + if !isCPUBoostTest { + inplace.On("CanInPlaceUpdate", pods[i]).Return(canInPlaceUpdate) + eviction.On("CanEvict", pods[i]).Return(true) + } else { + inplace.On("CanUnboost", pods[i], vpaObj).Return(isCPUBoostTest) + } if shouldInPlaceFail { inplace.On("InPlaceUpdate", pods[i], nil).Return(fmt.Errorf("in-place update failed")) } else { inplace.On("InPlaceUpdate", pods[i], nil).Return(nil) } - eviction.On("CanEvict", pods[i]).Return(true) eviction.On("Evict", pods[i], nil).Return(nil) } @@ -252,21 +328,16 @@ func testRunOnceBase( podLister := &test.PodListerMock{} podLister.On("List").Return(pods, nil) - targetRef := &v1.CrossVersionObjectReference{ - Kind: rc.Kind, - Name: rc.Name, - APIVersion: rc.APIVersion, - } - - vpaObj := test.VerticalPodAutoscaler(). - WithContainer(containerName). - WithTarget("2", "200M"). - WithMinAllowed(containerName, "1", "100M"). - WithMaxAllowed(containerName, "3", "1G"). - WithTargetRef(targetRef). - Get() vpaObj.Spec.UpdatePolicy = &vpa_types.PodUpdatePolicy{UpdateMode: &updateMode} + + if isCPUBoostTest { + vpaObj.Spec.StartupBoost = &vpa_types.StartupBoost{ + CPU: vpa_types.CPUStartupBoost{ + Duration: &metav1.Duration{Duration: 1 * time.Minute}, + }, + } + } vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() mockSelectorFetcher := target_mock.NewMockVpaTargetSelectorFetcher(ctrl) @@ -504,3 +575,209 @@ func TestNewEventRecorder(t *testing.T) { }) } } + +func TestRunOnce_AutoUnboostThenEvict(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.InPlaceOrRecreate, true) + + featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.CPUStartupBoost, true) + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + replicas := int32(5) + livePods := 5 + labels := map[string]string{"app": "testingApp"} + selector := parseLabelSelector("app = testingApp") + containerName := "container1" + rc := apiv1.ReplicationController{ + TypeMeta: metav1.TypeMeta{ + Kind: "ReplicationController", + APIVersion: "apps/v1", + }, + ObjectMeta: metav1.ObjectMeta{Name: "rc", Namespace: "default"}, + Spec: apiv1.ReplicationControllerSpec{Replicas: &replicas}, + } + pods := make([]*apiv1.Pod, livePods) + vpaObj := test.VerticalPodAutoscaler(). + WithContainer(containerName). + WithTarget("2", "200M"). + WithMinAllowed(containerName, "1", "100M"). + WithMaxAllowed(containerName, "3", "1G"). + WithTargetRef(&v1.CrossVersionObjectReference{Kind: rc.Kind, Name: rc.Name, APIVersion: rc.APIVersion}). + Get() + updateMode := vpa_types.UpdateModeAuto + vpaObj.Spec.UpdatePolicy = &vpa_types.PodUpdatePolicy{UpdateMode: &updateMode} + vpaObj.Spec.StartupBoost = &vpa_types.StartupBoost{ + CPU: vpa_types.CPUStartupBoost{ + Duration: &metav1.Duration{Duration: 1 * time.Minute}, + }, + } + + for i := range pods { + pods[i] = test.Pod().WithName("test_"+strconv.Itoa(i)). + AddContainer(test.Container().WithName(containerName).WithCPURequest(resource.MustParse("1")).WithMemRequest(resource.MustParse("100M")).Get()). + WithCreator(&rc.ObjectMeta, &rc.TypeMeta). + Get() + pods[i].Labels = labels + } + + eviction := &test.PodsEvictionRestrictionMock{} + inplace := &test.PodsInPlaceRestrictionMock{} + factory := &restriction.FakePodsRestrictionFactory{Eviction: eviction, InPlace: inplace} + vpaLister := &test.VerticalPodAutoscalerListerMock{} + podLister := &test.PodListerMock{} + mockSelectorFetcher := target_mock.NewMockVpaTargetSelectorFetcher(ctrl) + + updater := &updater{ + vpaLister: vpaLister, + podLister: podLister, + restrictionFactory: factory, + evictionRateLimiter: rate.NewLimiter(rate.Inf, 0), + inPlaceRateLimiter: rate.NewLimiter(rate.Inf, 0), + evictionAdmission: priority.NewDefaultPodEvictionAdmission(), + recommendationProcessor: &test.FakeRecommendationProcessor{}, + selectorFetcher: mockSelectorFetcher, + controllerFetcher: controllerfetcher.FakeControllerFetcher{}, + useAdmissionControllerStatus: true, + statusValidator: newFakeValidator(true), + priorityProcessor: priority.NewProcessor(), + } + + // Cycle 1: Unboost the cpu + for i := range pods { + pods[i].Annotations = map[string]string{"startup-cpu-boost": ""} + pods[i].Status.Conditions = []apiv1.PodCondition{ + { + Type: apiv1.PodReady, + Status: apiv1.ConditionTrue, + }, + } + inplace.On("CanUnboost", pods[i], vpaObj).Return(true).Once() + inplace.On("InPlaceUpdate", pods[i], nil).Return(nil) + } + vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() + podLister.On("List").Return(pods, nil).Once() + mockSelectorFetcher.EXPECT().Fetch(gomock.Eq(vpaObj)).Return(selector, nil) + + updater.RunOnce(context.Background()) + inplace.AssertNumberOfCalls(t, "InPlaceUpdate", 5) + inplace.AssertNumberOfCalls(t, "CanUnboost", 5) + eviction.AssertNumberOfCalls(t, "Evict", 0) + + // Cycle 2: Regular patch which will lead to eviction + for i := range pods { + pods[i].Annotations = nil + inplace.On("CanUnboost", pods[i], vpaObj).Return(false).Once() + eviction.On("CanEvict", pods[i]).Return(true) + eviction.On("Evict", pods[i], nil).Return(nil) + } + vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() + podLister.On("List").Return(pods, nil).Once() + mockSelectorFetcher.EXPECT().Fetch(gomock.Eq(vpaObj)).Return(selector, nil) + + updater.RunOnce(context.Background()) + inplace.AssertNumberOfCalls(t, "InPlaceUpdate", 5) // all 5 from previous run only + inplace.AssertNumberOfCalls(t, "CanUnboost", 5) // all 5 from previous run only + eviction.AssertNumberOfCalls(t, "Evict", 5) +} + +func TestRunOnce_AutoUnboostThenInPlace(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.InPlaceOrRecreate, true) + + featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.CPUStartupBoost, true) + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + replicas := int32(5) + livePods := 5 + labels := map[string]string{"app": "testingApp"} + selector := parseLabelSelector("app = testingApp") + containerName := "container1" + rc := apiv1.ReplicationController{ + TypeMeta: metav1.TypeMeta{ + Kind: "ReplicationController", + APIVersion: "apps/v1", + }, + ObjectMeta: metav1.ObjectMeta{Name: "rc", Namespace: "default"}, + Spec: apiv1.ReplicationControllerSpec{Replicas: &replicas}, + } + pods := make([]*apiv1.Pod, livePods) + vpaObj := test.VerticalPodAutoscaler(). + WithContainer(containerName). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + WithTarget("2", "200M"). + WithMinAllowed(containerName, "1", "100M"). + WithMaxAllowed(containerName, "3", "1G"). + WithTargetRef(&v1.CrossVersionObjectReference{Kind: rc.Kind, Name: rc.Name, APIVersion: rc.APIVersion}). + Get() + vpaObj.Spec.StartupBoost = &vpa_types.StartupBoost{ + CPU: vpa_types.CPUStartupBoost{ + Duration: &metav1.Duration{Duration: 1 * time.Minute}, + }, + } + + for i := range pods { + pods[i] = test.Pod().WithName("test_"+strconv.Itoa(i)). + AddContainer(test.Container().WithName(containerName).WithCPURequest(resource.MustParse("1")).WithMemRequest(resource.MustParse("100M")).Get()). + WithCreator(&rc.ObjectMeta, &rc.TypeMeta). + Get() + pods[i].Labels = labels + } + + eviction := &test.PodsEvictionRestrictionMock{} + inplace := &test.PodsInPlaceRestrictionMock{} + factory := &restriction.FakePodsRestrictionFactory{Eviction: eviction, InPlace: inplace} + vpaLister := &test.VerticalPodAutoscalerListerMock{} + podLister := &test.PodListerMock{} + mockSelectorFetcher := target_mock.NewMockVpaTargetSelectorFetcher(ctrl) + + updater := &updater{ + vpaLister: vpaLister, + podLister: podLister, + restrictionFactory: factory, + evictionRateLimiter: rate.NewLimiter(rate.Inf, 0), + inPlaceRateLimiter: rate.NewLimiter(rate.Inf, 0), + evictionAdmission: priority.NewDefaultPodEvictionAdmission(), + recommendationProcessor: &test.FakeRecommendationProcessor{}, + selectorFetcher: mockSelectorFetcher, + controllerFetcher: controllerfetcher.FakeControllerFetcher{}, + useAdmissionControllerStatus: true, + statusValidator: newFakeValidator(true), + priorityProcessor: priority.NewProcessor(), + } + + // Cycle 1: Unboost the cpu + for i := range pods { + pods[i].Annotations = map[string]string{"startup-cpu-boost": ""} + pods[i].Status.Conditions = []apiv1.PodCondition{ + { + Type: apiv1.PodReady, + Status: apiv1.ConditionTrue, + }, + } + inplace.On("CanUnboost", pods[i], vpaObj).Return(true).Once() + inplace.On("InPlaceUpdate", pods[i], nil).Return(nil) + } + vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() + podLister.On("List").Return(pods, nil).Once() + mockSelectorFetcher.EXPECT().Fetch(gomock.Eq(vpaObj)).Return(selector, nil) + + updater.RunOnce(context.Background()) + inplace.AssertNumberOfCalls(t, "InPlaceUpdate", 5) + inplace.AssertNumberOfCalls(t, "CanUnboost", 5) + eviction.AssertNumberOfCalls(t, "Evict", 0) + + // Cycle 2: Regular patch which will lead to eviction + for i := range pods { + pods[i].Annotations = nil + inplace.On("CanInPlaceUpdate", pods[i]).Return(utils.InPlaceApproved) + inplace.On("InPlaceUpdate", pods[i], nil).Return(nil) + } + vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() + podLister.On("List").Return(pods, nil).Once() + mockSelectorFetcher.EXPECT().Fetch(gomock.Eq(vpaObj)).Return(selector, nil) + + updater.RunOnce(context.Background()) + inplace.AssertNumberOfCalls(t, "InPlaceUpdate", 10) + inplace.AssertNumberOfCalls(t, "CanUnboost", 5) // all 5 from previous run only + eviction.AssertNumberOfCalls(t, "Evict", 0) +} diff --git a/vertical-pod-autoscaler/pkg/updater/main.go b/vertical-pod-autoscaler/pkg/updater/main.go index 8394fd54b29c..d120841bac8e 100644 --- a/vertical-pod-autoscaler/pkg/updater/main.go +++ b/vertical-pod-autoscaler/pkg/updater/main.go @@ -206,7 +206,7 @@ func run(healthCheck *metrics.HealthCheck, commonFlag *common.CommonFlags) { recommendationProvider := recommendation.NewProvider(limitRangeCalculator, vpa_api_util.NewCappingRecommendationProcessor(limitRangeCalculator)) - calculators := []patch.Calculator{inplace.NewResourceInPlaceUpdatesCalculator(recommendationProvider), inplace.NewInPlaceUpdatedCalculator()} + calculators := []patch.Calculator{inplace.NewResourceInPlaceUpdatesCalculator(recommendationProvider), inplace.NewInPlaceUpdatedCalculator(), inplace.NewUnboostAnnotationCalculator()} // TODO: use SharedInformerFactory in updater updater, err := updater.NewUpdater( diff --git a/vertical-pod-autoscaler/pkg/updater/restriction/pods_inplace_restriction.go b/vertical-pod-autoscaler/pkg/updater/restriction/pods_inplace_restriction.go index db5a27aace26..4a3810ce3545 100644 --- a/vertical-pod-autoscaler/pkg/updater/restriction/pods_inplace_restriction.go +++ b/vertical-pod-autoscaler/pkg/updater/restriction/pods_inplace_restriction.go @@ -35,6 +35,7 @@ import ( vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features" utils "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/updater/utils" + vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" ) // TODO: Make these configurable by flags @@ -57,6 +58,8 @@ type PodsInPlaceRestriction interface { InPlaceUpdate(pod *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler, eventRecorder record.EventRecorder) error // CanInPlaceUpdate checks if pod can be safely updated in-place. If not, it will return a decision to potentially evict the pod. CanInPlaceUpdate(pod *apiv1.Pod) utils.InPlaceDecision + // CanUnboost checks if a pod can be safely unboosted. + CanUnboost(pod *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler) bool } // PodsInPlaceRestrictionImpl is the implementation of the PodsInPlaceRestriction interface. @@ -98,6 +101,30 @@ func (ip *PodsInPlaceRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) utils.InP return utils.InPlaceDeferred } +// CanUnboost checks if a pod can be safely unboosted. +func (ip *PodsInPlaceRestrictionImpl) CanUnboost(pod *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler) bool { + if !features.Enabled(features.CPUStartupBoost) { + return false + } + ready := vpa_api_util.PodReady(pod) + durationPassed := vpa_api_util.PodStartupBoostDurationPassed(pod, vpa) + hasAnnotation := vpa_api_util.PodHasCPUBoostInProgress(pod) + + klog.V(2).InfoS("Checking if pod can be unboosted", "pod", klog.KObj(pod), "ready", ready, "durationPassed", durationPassed, "hasAnnotation", hasAnnotation) + + if !ready || !durationPassed || !hasAnnotation { + return false + } + cr, present := ip.podToReplicaCreatorMap[getPodID(pod)] + if present { + singleGroupStats, present := ip.creatorToSingleGroupStatsMap[cr] + if present { + return singleGroupStats.isPodDisruptable() + } + } + return false +} + // InPlaceUpdate sends calculates patches and sends resize request to api client. Returns error if pod cannot be in-place updated or if client returned error. // Does not check if pod was actually in-place updated after grace period. func (ip *PodsInPlaceRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler, eventRecorder record.EventRecorder) error { diff --git a/vertical-pod-autoscaler/pkg/utils/test/test_utils.go b/vertical-pod-autoscaler/pkg/utils/test/test_utils.go index 8ae360177f31..202985b0bb62 100644 --- a/vertical-pod-autoscaler/pkg/utils/test/test_utils.go +++ b/vertical-pod-autoscaler/pkg/utils/test/test_utils.go @@ -139,6 +139,12 @@ func (m *PodsInPlaceRestrictionMock) CanInPlaceUpdate(pod *apiv1.Pod) utils.InPl return args.Get(0).(utils.InPlaceDecision) } +// CanUnboost is a mock implementation of PodsInPlaceRestriction.CanUnboost +func (m *PodsInPlaceRestrictionMock) CanUnboost(pod *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler) bool { + args := m.Called(pod, vpa) + return args.Bool(0) +} + // PodListerMock is a mock of PodLister type PodListerMock struct { mock.Mock diff --git a/vertical-pod-autoscaler/pkg/utils/vpa/api.go b/vertical-pod-autoscaler/pkg/utils/vpa/api.go index b30f3fc6039d..a711d7d9b803 100644 --- a/vertical-pod-autoscaler/pkg/utils/vpa/api.go +++ b/vertical-pod-autoscaler/pkg/utils/vpa/api.go @@ -38,6 +38,7 @@ import ( vpa_api "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned/typed/autoscaling.k8s.io/v1" vpa_lister "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/listers/autoscaling.k8s.io/v1" controllerfetcher "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/controller_fetcher" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" ) // VpaWithSelector is a pair of VPA and its selector. @@ -291,3 +292,38 @@ func CreateOrUpdateVpaCheckpoint(vpaCheckpointClient vpa_api.VerticalPodAutoscal } return nil } + +// PodReady returns true if the pod is ready. +func PodReady(pod *core.Pod) bool { + for _, cond := range pod.Status.Conditions { + if cond.Type == core.PodReady && cond.Status == core.ConditionTrue { + return true + } + } + return false +} + +// PodStartupBoostDurationPassed returns true if the startup boost duration has passed. +func PodStartupBoostDurationPassed(pod *core.Pod, vpa *vpa_types.VerticalPodAutoscaler) bool { + if vpa.Spec.StartupBoost == nil || vpa.Spec.StartupBoost.CPU.Duration == nil { + return true + } + if !PodReady(pod) { + return false + } + for _, cond := range pod.Status.Conditions { + if cond.Type == core.PodReady { + return time.Since(cond.LastTransitionTime.Time) > vpa.Spec.StartupBoost.CPU.Duration.Duration + } + } + return false +} + +// PodHasCPUBoostInProgress returns true if the pod has the CPU boost annotation. +func PodHasCPUBoostInProgress(pod *core.Pod) bool { + if pod.Annotations == nil { + return false + } + _, found := pod.Annotations[annotations.StartupCPUBoostAnnotation] + return found +} diff --git a/vertical-pod-autoscaler/pkg/utils/vpa/api_test.go b/vertical-pod-autoscaler/pkg/utils/vpa/api_test.go index 1f1c712f9e95..4fafe3fbc69e 100644 --- a/vertical-pod-autoscaler/pkg/utils/vpa/api_test.go +++ b/vertical-pod-autoscaler/pkg/utils/vpa/api_test.go @@ -399,3 +399,201 @@ func TestFindParentControllerForPod(t *testing.T) { }) } } + +func TestPodReady(t *testing.T) { + testCases := []struct { + name string + pod *core.Pod + expected bool + }{ + { + name: "PodReady condition is True", + pod: &core.Pod{ + Status: core.PodStatus{ + Conditions: []core.PodCondition{ + { + Type: core.PodReady, + Status: core.ConditionTrue, + }, + }, + }, + }, + expected: true, + }, + { + name: "PodReady condition is False", + pod: &core.Pod{ + Status: core.PodStatus{ + Conditions: []core.PodCondition{ + { + Type: core.PodReady, + Status: core.ConditionFalse, + }, + }, + }, + }, + expected: false, + }, + { + name: "No PodReady condition", + pod: &core.Pod{ + Status: core.PodStatus{ + Conditions: []core.PodCondition{}, + }, + }, + expected: false, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expected, PodReady(tc.pod)) + }) + } +} + +func TestPodStartupBoostDurationPassed(t *testing.T) { + now := meta.Now() + past := meta.Time{Time: now.Add(-2 * time.Minute)} + testCases := []struct { + name string + pod *core.Pod + vpa *vpa_types.VerticalPodAutoscaler + expected bool + }{ + { + name: "No StartupBoost config", + pod: &core.Pod{}, + vpa: &vpa_types.VerticalPodAutoscaler{}, + expected: true, + }, + { + name: "No duration in StartupBoost", + pod: &core.Pod{}, + vpa: &vpa_types.VerticalPodAutoscaler{ + Spec: vpa_types.VerticalPodAutoscalerSpec{ + StartupBoost: &vpa_types.StartupBoost{ + CPU: vpa_types.CPUStartupBoost{}, + }, + }, + }, + expected: true, + }, + { + name: "Pod not ready", + pod: &core.Pod{ + Status: core.PodStatus{ + Conditions: []core.PodCondition{ + { + Type: core.PodReady, + Status: core.ConditionFalse, + }, + }, + }, + }, + vpa: &vpa_types.VerticalPodAutoscaler{ + Spec: vpa_types.VerticalPodAutoscalerSpec{ + StartupBoost: &vpa_types.StartupBoost{ + CPU: vpa_types.CPUStartupBoost{ + Duration: &meta.Duration{Duration: 1 * time.Minute}, + }, + }, + }, + }, + expected: false, + }, + { + name: "Duration passed", + pod: &core.Pod{ + Status: core.PodStatus{ + Conditions: []core.PodCondition{ + { + Type: core.PodReady, + Status: core.ConditionTrue, + LastTransitionTime: past, + }, + }, + }, + }, + vpa: &vpa_types.VerticalPodAutoscaler{ + Spec: vpa_types.VerticalPodAutoscalerSpec{ + StartupBoost: &vpa_types.StartupBoost{ + CPU: vpa_types.CPUStartupBoost{ + Duration: &meta.Duration{Duration: 1 * time.Minute}, + }, + }, + }, + }, + expected: true, + }, + { + name: "Duration not passed", + pod: &core.Pod{ + Status: core.PodStatus{ + Conditions: []core.PodCondition{ + { + Type: core.PodReady, + Status: core.ConditionTrue, + LastTransitionTime: now, + }, + }, + }, + }, + vpa: &vpa_types.VerticalPodAutoscaler{ + Spec: vpa_types.VerticalPodAutoscalerSpec{ + StartupBoost: &vpa_types.StartupBoost{ + CPU: vpa_types.CPUStartupBoost{ + Duration: &meta.Duration{Duration: 1 * time.Minute}, + }, + }, + }, + }, + expected: false, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expected, PodStartupBoostDurationPassed(tc.pod, tc.vpa)) + }) + } +} + +func TestPodHasCPUBoostInProgress(t *testing.T) { + testCases := []struct { + name string + pod *core.Pod + expected bool + }{ + { + name: "No annotations", + pod: &core.Pod{}, + expected: false, + }, + { + name: "Annotation present", + pod: &core.Pod{ + ObjectMeta: meta.ObjectMeta{ + Annotations: map[string]string{ + "startup-cpu-boost": "", + }, + }, + }, + expected: true, + }, + { + name: "Annotation not present", + pod: &core.Pod{ + ObjectMeta: meta.ObjectMeta{ + Annotations: map[string]string{ + "another-annotation": "true", + }, + }, + }, + expected: false, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expected, PodHasCPUBoostInProgress(tc.pod)) + }) + } +} From 449f57e3b06efb376cd171a1053502fdd2a46361 Mon Sep 17 00:00:00 2001 From: Kam Saiyed Date: Wed, 6 Aug 2025 19:27:23 +0000 Subject: [PATCH 4/5] Add e2e tests for updater --- vertical-pod-autoscaler/e2e/v1/common.go | 50 ++++++++----- vertical-pod-autoscaler/e2e/v1/updater.go | 87 ++++++++++++++++++++++- 2 files changed, 119 insertions(+), 18 deletions(-) diff --git a/vertical-pod-autoscaler/e2e/v1/common.go b/vertical-pod-autoscaler/e2e/v1/common.go index ce5e8e76074e..26018f2912e8 100644 --- a/vertical-pod-autoscaler/e2e/v1/common.go +++ b/vertical-pod-autoscaler/e2e/v1/common.go @@ -37,8 +37,8 @@ import ( "k8s.io/apimachinery/pkg/util/wait" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" vpa_clientset "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned" - "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features" clientset "k8s.io/client-go/kubernetes" + "k8s.io/component-base/featuregate" "k8s.io/kubernetes/test/e2e/framework" framework_deployment "k8s.io/kubernetes/test/e2e/framework/deployment" ) @@ -359,14 +359,30 @@ func PatchVpaRecommendation(f *framework.Framework, vpa *vpa_types.VerticalPodAu // AnnotatePod adds annotation for an existing pod. func AnnotatePod(f *framework.Framework, podName, annotationName, annotationValue string) { - bytes, err := json.Marshal([]patchRecord{{ + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(context.TODO(), podName, metav1.GetOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred(), "Failed to get pod.") + + patches := []patchRecord{} + if pod.Annotations == nil { + patches = append(patches, patchRecord{ + Op: "add", + Path: "/metadata/annotations", + Value: make(map[string]string), + }) + } + + patches = append(patches, patchRecord{ Op: "add", - Path: fmt.Sprintf("/metadata/annotations/%v", annotationName), + Path: fmt.Sprintf("/metadata/annotations/%s", annotationName), Value: annotationValue, - }}) - pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Patch(context.TODO(), podName, types.JSONPatchType, bytes, metav1.PatchOptions{}) + }) + + bytes, err := json.Marshal(patches) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + patchedPod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Patch(context.TODO(), podName, types.JSONPatchType, bytes, metav1.PatchOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred(), "Failed to patch pod.") - gomega.Expect(pod.Annotations[annotationName]).To(gomega.Equal(annotationValue)) + gomega.Expect(patchedPod.Annotations[annotationName]).To(gomega.Equal(annotationValue)) } // ParseQuantityOrDie parses quantity from string and dies with an error if @@ -613,35 +629,35 @@ func WaitForPodsUpdatedWithoutEviction(f *framework.Framework, initialPods *apiv return err } -// checkInPlaceOrRecreateTestsEnabled check for enabled feature gates in the cluster used for the -// InPlaceOrRecreate VPA feature. -// Use this in a "beforeEach" call before any suites that use InPlaceOrRecreate featuregate. -func checkInPlaceOrRecreateTestsEnabled(f *framework.Framework, checkAdmission, checkUpdater bool) { - ginkgo.By("Checking InPlacePodVerticalScaling cluster feature gate is on") +// checkFeatureGateTestsEnabled check for enabled feature gates in the cluster used for the +// given VPA feature. +// Use this in a "beforeEach" call before any suites that use a featuregate. +func checkFeatureGateTestsEnabled(f *framework.Framework, feature featuregate.Feature, checkAdmission, checkUpdater bool) { + ginkgo.By(fmt.Sprintf("Checking %s cluster feature gate is on", feature)) if checkUpdater { - ginkgo.By("Checking InPlaceOrRecreate VPA feature gate is enabled for updater") + ginkgo.By(fmt.Sprintf("Checking %s VPA feature gate is enabled for updater", feature)) deploy, err := f.ClientSet.AppsV1().Deployments(VpaNamespace).Get(context.TODO(), "vpa-updater", metav1.GetOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) gomega.Expect(deploy.Spec.Template.Spec.Containers).To(gomega.HaveLen(1)) vpaUpdaterPod := deploy.Spec.Template.Spec.Containers[0] gomega.Expect(vpaUpdaterPod.Name).To(gomega.Equal("updater")) - if !anyContainsSubstring(vpaUpdaterPod.Args, fmt.Sprintf("%s=true", string(features.InPlaceOrRecreate))) { - ginkgo.Skip("Skipping suite: InPlaceOrRecreate feature gate is not enabled for the VPA updater") + if !anyContainsSubstring(vpaUpdaterPod.Args, fmt.Sprintf("%s=true", string(feature))) { + ginkgo.Skip(fmt.Sprintf("Skipping suite: %s feature gate is not enabled for the VPA updater", feature)) } } if checkAdmission { - ginkgo.By("Checking InPlaceOrRecreate VPA feature gate is enabled for admission controller") + ginkgo.By(fmt.Sprintf("Checking %s VPA feature gate is enabled for admission controller", feature)) deploy, err := f.ClientSet.AppsV1().Deployments(VpaNamespace).Get(context.TODO(), "vpa-admission-controller", metav1.GetOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) gomega.Expect(deploy.Spec.Template.Spec.Containers).To(gomega.HaveLen(1)) vpaAdmissionPod := deploy.Spec.Template.Spec.Containers[0] gomega.Expect(vpaAdmissionPod.Name).To(gomega.Equal("admission-controller")) - if !anyContainsSubstring(vpaAdmissionPod.Args, fmt.Sprintf("%s=true", string(features.InPlaceOrRecreate))) { - ginkgo.Skip("Skipping suite: InPlaceOrRecreate feature gate is not enabled for VPA admission controller") + if !anyContainsSubstring(vpaAdmissionPod.Args, fmt.Sprintf("%s=true", string(feature))) { + ginkgo.Skip(fmt.Sprintf("Skipping suite: %s feature gate is not enabled for VPA admission controller", feature)) } } } diff --git a/vertical-pod-autoscaler/e2e/v1/updater.go b/vertical-pod-autoscaler/e2e/v1/updater.go index a72cdf6b1eba..12963407081d 100644 --- a/vertical-pod-autoscaler/e2e/v1/updater.go +++ b/vertical-pod-autoscaler/e2e/v1/updater.go @@ -25,6 +25,8 @@ import ( apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/status" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/test" "k8s.io/kubernetes/test/e2e/framework" @@ -145,7 +147,7 @@ var _ = UpdaterE2eDescribe("Updater", ginkgo.Label("FG:InPlaceOrRecreate"), func f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline ginkgo.BeforeEach(func() { - checkInPlaceOrRecreateTestsEnabled(f, false, true) + checkFeatureGateTestsEnabled(f, features.InPlaceOrRecreate, false, true) }) ginkgo.It("In-place update pods when Admission Controller status available", func() { @@ -211,6 +213,89 @@ var _ = UpdaterE2eDescribe("Updater", ginkgo.Label("FG:InPlaceOrRecreate"), func }) }) +var _ = UpdaterE2eDescribe("Updater", ginkgo.Label("FG:CPUStartupBoost"), func() { + f := framework.NewDefaultFramework("vertical-pod-autoscaling") + f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline + + ginkgo.BeforeEach(func() { + checkFeatureGateTestsEnabled(f, features.CPUStartupBoost, false, true) + }) + + ginkgo.It("Unboost pods when they become Ready", func() { + const statusUpdateInterval = 10 * time.Second + + ginkgo.By("Setting up the Admission Controller status") + stopCh := make(chan struct{}) + statusUpdater := status.NewUpdater( + f.ClientSet, + status.AdmissionControllerStatusName, + status.AdmissionControllerStatusNamespace, + statusUpdateInterval, + "e2e test", + ) + defer func() { + // Schedule a cleanup of the Admission Controller status. + // Status is created outside the test namespace. + ginkgo.By("Deleting the Admission Controller status") + close(stopCh) + err := f.ClientSet.CoordinationV1().Leases(status.AdmissionControllerStatusNamespace). + Delete(context.TODO(), status.AdmissionControllerStatusName, metav1.DeleteOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }() + statusUpdater.Run(stopCh) + + podList := setupPodsForCPUBoost(f, "100m", "100Mi") + initialPods := podList.DeepCopy() + + ginkgo.By("Waiting for pods to be in-place updated") + err := WaitForPodsUpdatedWithoutEviction(f, initialPods) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + +}) + +func setupPodsForCPUBoost(f *framework.Framework, hamsterCPU, hamsterMemory string) *apiv1.PodList { + controller := &autoscaling.CrossVersionObjectReference{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "hamster-deployment", + } + ginkgo.By(fmt.Sprintf("Setting up a hamster %v", controller.Kind)) + // Create pods with boosted CPU, which is 2x the target recommendation + boostedCPU := "200m" + setupHamsterController(f, controller.Kind, boostedCPU, hamsterMemory, defaultHamsterReplicas) + podList, err := GetHamsterPods(f) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + ginkgo.By("Setting up a VPA CRD") + containerName := GetHamsterContainerNameByIndex(0) + factor := int32(2) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(controller). + WithUpdateMode(vpa_types.UpdateModeAuto). + WithContainer(containerName). + WithCPUStartupBoost(&factor, nil, "1s"). + AppendRecommendation( + test.Recommendation(). + WithContainer(containerName). + WithTarget(hamsterCPU, hamsterMemory). + GetContainerResources(), + ). + Get() + + InstallVPA(f, vpaCRD) + + ginkgo.By("Annotating pods with boost annotation") + for _, pod := range podList.Items { + original, err := annotations.GetOriginalResourcesAnnotationValue(&pod.Spec.Containers[0]) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + AnnotatePod(f, pod.Name, annotations.StartupCPUBoostAnnotation, original) + } + return podList +} + func setupPodsForUpscalingEviction(f *framework.Framework) *apiv1.PodList { return setupPodsForEviction(f, "100m", "100Mi", nil) } From 6ad3a0a506e35be76508a27da7818311dbc39c1b Mon Sep 17 00:00:00 2001 From: Kam Saiyed Date: Wed, 6 Aug 2025 19:28:56 +0000 Subject: [PATCH 5/5] Add e2e tests to test whole lifecyle of CPU startup boost --- vertical-pod-autoscaler/e2e/v1/actuation.go | 3 +- vertical-pod-autoscaler/e2e/v1/full_vpa.go | 120 +++++++++++++++++++- 2 files changed, 121 insertions(+), 2 deletions(-) diff --git a/vertical-pod-autoscaler/e2e/v1/actuation.go b/vertical-pod-autoscaler/e2e/v1/actuation.go index 509b84c23f64..559b7dd3ff4c 100644 --- a/vertical-pod-autoscaler/e2e/v1/actuation.go +++ b/vertical-pod-autoscaler/e2e/v1/actuation.go @@ -35,6 +35,7 @@ import ( "k8s.io/apimachinery/pkg/util/wait" "k8s.io/autoscaler/vertical-pod-autoscaler/e2e/utils" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features" restriction "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/updater/restriction" updaterutils "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/updater/utils" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" @@ -57,7 +58,7 @@ var _ = ActuationSuiteE2eDescribe("Actuation", ginkgo.Label("FG:InPlaceOrRecreat f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline ginkgo.BeforeEach(func() { - checkInPlaceOrRecreateTestsEnabled(f, true, true) + checkFeatureGateTestsEnabled(f, features.InPlaceOrRecreate, true, true) }) ginkgo.It("still applies recommendations on restart when update mode is InPlaceOrRecreate", func() { diff --git a/vertical-pod-autoscaler/e2e/v1/full_vpa.go b/vertical-pod-autoscaler/e2e/v1/full_vpa.go index ec1467f58a53..ce67113c2d0b 100644 --- a/vertical-pod-autoscaler/e2e/v1/full_vpa.go +++ b/vertical-pod-autoscaler/e2e/v1/full_vpa.go @@ -27,6 +27,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/test" "k8s.io/kubernetes/test/e2e/framework" podsecurity "k8s.io/pod-security-admission/api" @@ -63,7 +64,7 @@ var _ = FullVpaE2eDescribe("Pods under VPA", func() { ginkgo.Describe("with InPlaceOrRecreate update mode", ginkgo.Label("FG:InPlaceOrRecreate"), func() { ginkgo.BeforeEach(func() { - checkInPlaceOrRecreateTestsEnabled(f, true, false) + checkFeatureGateTestsEnabled(f, features.InPlaceOrRecreate, true, false) ns := f.Namespace.Name ginkgo.By("Setting up a hamster deployment") @@ -347,6 +348,123 @@ var _ = FullVpaE2eDescribe("Pods under VPA with non-recognized recommender expli }) }) +var _ = FullVpaE2eDescribe("Pods under VPA with CPUStartupBoost", ginkgo.Label("FG:CPUStartupBoost"), func() { + var ( + rc *ResourceConsumer + ) + replicas := 3 + + ginkgo.AfterEach(func() { + rc.CleanUp() + }) + + f := framework.NewDefaultFramework("vertical-pod-autoscaling") + f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline + + ginkgo.Describe("have CPU startup boost recommendation applied", func() { + ginkgo.BeforeEach(func() { + checkFeatureGateTestsEnabled(f, features.CPUStartupBoost, true, true) + waitForVpaWebhookRegistration(f) + }) + + ginkgo.It("to all containers of a pod", func() { + ns := f.Namespace.Name + ginkgo.By("Setting up a VPA CRD with CPUStartupBoost") + targetRef := &autoscaling.CrossVersionObjectReference{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "hamster", + } + + containerName := GetHamsterContainerNameByIndex(0) + factor := int32(100) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(targetRef). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + WithContainer(containerName). + WithCPUStartupBoost(&factor, nil, "10s"). + Get() + + InstallVPA(f, vpaCRD) + + ginkgo.By("Setting up a hamster deployment") + rc = NewDynamicResourceConsumer("hamster", ns, KindDeployment, + replicas, + 1, /*initCPUTotal*/ + 10, /*initMemoryTotal*/ + 1, /*initCustomMetric*/ + initialCPU, /*cpuRequest*/ + initialMemory, /*memRequest*/ + f.ClientSet, + f.ScalesGetter) + + // Pods should be created with boosted CPU (10m * 100 = 1000m) + err := waitForResourceRequestInRangeInPods( + f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU, + ParseQuantityOrDie("800m"), ParseQuantityOrDie("1200m")) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Pods should be scaled back down in-place after they become Ready and + // StartupBoost.CPU.Duration has elapsed + err = waitForResourceRequestInRangeInPods( + f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU, + ParseQuantityOrDie(minimalCPULowerBound), ParseQuantityOrDie(minimalCPUUpperBound)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + + ginkgo.It("to a subset of containers in a pod", func() { + ns := f.Namespace.Name + + ginkgo.By("Setting up a VPA CRD with CPUStartupBoost") + targetRef := &autoscaling.CrossVersionObjectReference{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: "hamster", + } + + containerName := GetHamsterContainerNameByIndex(0) + factor := int32(100) + vpaCRD := test.VerticalPodAutoscaler(). + WithName("hamster-vpa"). + WithNamespace(f.Namespace.Name). + WithTargetRef(targetRef). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + WithContainer(containerName). + WithCPUStartupBoost(&factor, nil, "10s"). + Get() + + InstallVPA(f, vpaCRD) + + ginkgo.By("Setting up a hamster deployment") + rc = NewDynamicResourceConsumer("hamster", ns, KindDeployment, + replicas, + 1, /*initCPUTotal*/ + 10, /*initMemoryTotal*/ + 1, /*initCustomMetric*/ + initialCPU, /*cpuRequest*/ + initialMemory, /*memRequest*/ + f.ClientSet, + f.ScalesGetter) + + // Pods should be created with boosted CPU (10m * 100 = 1000m) + err := waitForResourceRequestInRangeInPods( + f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU, + ParseQuantityOrDie("999m"), ParseQuantityOrDie("1001m")) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Pods should be scaled back down in-place after they become Ready and + // StartupBoost.CPU.Duration has elapsed + err = waitForResourceRequestInRangeInPods( + f, pollTimeout, metav1.ListOptions{LabelSelector: "name=hamster"}, apiv1.ResourceCPU, + ParseQuantityOrDie(minimalCPULowerBound), ParseQuantityOrDie(minimalCPUUpperBound)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) + }) + +}) + var _ = FullVpaE2eDescribe("OOMing pods under VPA", func() { const replicas = 3