From 79460adc682eb0d3982cf87cd1d58564db29e75b Mon Sep 17 00:00:00 2001 From: Kam Saiyed Date: Mon, 6 Oct 2025 17:29:02 +0000 Subject: [PATCH] Make changes to updater to add the unboosting logic --- .../resource/pod/patch/util.go | 8 + .../pkg/updater/inplace/resource_updates.go | 24 +- .../inplace/unboost_patch_calculator.go | 49 +++ .../pkg/updater/logic/updater.go | 77 ++++- .../pkg/updater/logic/updater_test.go | 298 +++++++++++++++++- vertical-pod-autoscaler/pkg/updater/main.go | 2 +- .../restriction/pods_inplace_restriction.go | 27 ++ .../pkg/utils/test/test_utils.go | 6 + vertical-pod-autoscaler/pkg/utils/vpa/api.go | 36 +++ .../pkg/utils/vpa/api_test.go | 168 ++++++++++ 10 files changed, 662 insertions(+), 33 deletions(-) create mode 100644 vertical-pod-autoscaler/pkg/updater/inplace/unboost_patch_calculator.go diff --git a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go index 0c68ab6cd557..b930be0f1988 100644 --- a/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go +++ b/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch/util.go @@ -43,6 +43,14 @@ func GetAddAnnotationPatch(annotationName, annotationValue string) resource_admi } } +// GetRemoveAnnotationPatch returns a patch to remove an annotation. +func GetRemoveAnnotationPatch(annotationName string) resource_admission.PatchRecord { + return resource_admission.PatchRecord{ + Op: "remove", + Path: fmt.Sprintf("/metadata/annotations/%s", annotationName), + } +} + // GetAddResourceRequirementValuePatch returns a patch record to add resource requirements to a container. func GetAddResourceRequirementValuePatch(i int, kind string, resource core.ResourceName, quantity resource.Quantity) resource_admission.PatchRecord { return resource_admission.PatchRecord{ diff --git a/vertical-pod-autoscaler/pkg/updater/inplace/resource_updates.go b/vertical-pod-autoscaler/pkg/updater/inplace/resource_updates.go index d15d2bb67d73..6f3c4c200eec 100644 --- a/vertical-pod-autoscaler/pkg/updater/inplace/resource_updates.go +++ b/vertical-pod-autoscaler/pkg/updater/inplace/resource_updates.go @@ -25,6 +25,7 @@ import ( "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/recommendation" vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" ) @@ -49,9 +50,26 @@ func (*resourcesInplaceUpdatesPatchCalculator) PatchResourceTarget() patch.Patch func (c *resourcesInplaceUpdatesPatchCalculator) CalculatePatches(pod *core.Pod, vpa *vpa_types.VerticalPodAutoscaler) ([]resource_admission.PatchRecord, error) { result := []resource_admission.PatchRecord{} - containersResources, _, err := c.recommendationProvider.GetContainersResourcesForPod(pod, vpa) - if err != nil { - return []resource_admission.PatchRecord{}, fmt.Errorf("failed to calculate resource patch for pod %s/%s: %v", pod.Namespace, pod.Name, err) + var containersResources []vpa_api_util.ContainerResources + if vpa_api_util.GetUpdateMode(vpa) == vpa_types.UpdateModeOff { + // If update mode is "Off", we don't want to apply any recommendations, + // but we still want to unboost. + original, err := annotations.GetOriginalResourcesFromAnnotation(pod) + if err != nil { + return nil, err + } + containersResources = []vpa_api_util.ContainerResources{ + { + Requests: original.Requests, + Limits: original.Limits, + }, + } + } else { + var err error + containersResources, _, err = c.recommendationProvider.GetContainersResourcesForPod(pod, vpa) + if err != nil { + return []resource_admission.PatchRecord{}, fmt.Errorf("failed to calculate resource patch for pod %s/%s: %v", pod.Namespace, pod.Name, err) + } } for i, containerResources := range containersResources { diff --git a/vertical-pod-autoscaler/pkg/updater/inplace/unboost_patch_calculator.go b/vertical-pod-autoscaler/pkg/updater/inplace/unboost_patch_calculator.go new file mode 100644 index 000000000000..5d9c00df0d80 --- /dev/null +++ b/vertical-pod-autoscaler/pkg/updater/inplace/unboost_patch_calculator.go @@ -0,0 +1,49 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package inplace + +import ( + core "k8s.io/api/core/v1" + + resource_admission "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/admission-controller/resource/pod/patch" + vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" + vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" +) + +type unboostAnnotationPatchCalculator struct{} + +// NewUnboostAnnotationCalculator returns a calculator for the unboost annotation patch. +func NewUnboostAnnotationCalculator() patch.Calculator { + return &unboostAnnotationPatchCalculator{} +} + +// PatchResourceTarget returns the Pod resource to apply calculator patches. +func (*unboostAnnotationPatchCalculator) PatchResourceTarget() patch.PatchResourceTarget { + return patch.Pod +} + +// CalculatePatches calculates the patch to remove the startup CPU boost annotation if the pod is ready to be unboosted. +func (c *unboostAnnotationPatchCalculator) CalculatePatches(pod *core.Pod, vpa *vpa_types.VerticalPodAutoscaler) ([]resource_admission.PatchRecord, error) { + if vpa_api_util.PodHasCPUBoostInProgress(pod) && vpa_api_util.PodReady(pod) && vpa_api_util.PodStartupBoostDurationPassed(pod, vpa) { + return []resource_admission.PatchRecord{ + patch.GetRemoveAnnotationPatch(annotations.StartupCPUBoostAnnotation), + }, nil + } + return []resource_admission.PatchRecord{}, nil +} diff --git a/vertical-pod-autoscaler/pkg/updater/logic/updater.go b/vertical-pod-autoscaler/pkg/updater/logic/updater.go index 1ce5a7007f42..35f3c5ac91d9 100644 --- a/vertical-pod-autoscaler/pkg/updater/logic/updater.go +++ b/vertical-pod-autoscaler/pkg/updater/logic/updater.go @@ -159,9 +159,12 @@ func (u *updater) RunOnce(ctx context.Context) { klog.V(3).InfoS("Skipping VPA object in ignored namespace", "vpa", klog.KObj(vpa), "namespace", vpa.Namespace) continue } - if vpa_api_util.GetUpdateMode(vpa) != vpa_types.UpdateModeRecreate && - vpa_api_util.GetUpdateMode(vpa) != vpa_types.UpdateModeAuto && vpa_api_util.GetUpdateMode(vpa) != vpa_types.UpdateModeInPlaceOrRecreate { - klog.V(3).InfoS("Skipping VPA object because its mode is not \"InPlaceOrRecreate\", \"Recreate\" or \"Auto\"", "vpa", klog.KObj(vpa)) + updateMode := vpa_api_util.GetUpdateMode(vpa) + if updateMode != vpa_types.UpdateModeRecreate && + updateMode != vpa_types.UpdateModeAuto && + updateMode != vpa_types.UpdateModeInPlaceOrRecreate && + vpa.Spec.StartupBoost == nil { + klog.V(3).InfoS("Skipping VPA object because its mode is not \"InPlaceOrRecreate\", \"Recreate\" or \"Auto\" and it doesn't have startupBoost configured", "vpa", klog.KObj(vpa)) continue } selector, err := u.selectorFetcher.Fetch(ctx, vpa) @@ -226,8 +229,6 @@ func (u *updater) RunOnce(ctx context.Context) { defer vpasWithInPlaceUpdatablePodsCounter.Observe() defer vpasWithInPlaceUpdatedPodsCounter.Observe() - // NOTE: this loop assumes that controlledPods are filtered - // to contain only Pods controlled by a VPA in auto, recreate, or inPlaceOrRecreate mode for vpa, livePods := range controlledPods { vpaSize := len(livePods) updateMode := vpa_api_util.GetUpdateMode(vpa) @@ -238,31 +239,80 @@ func (u *updater) RunOnce(ctx context.Context) { continue } - evictionLimiter := u.restrictionFactory.NewPodsEvictionRestriction(creatorToSingleGroupStatsMap, podToReplicaCreatorMap) inPlaceLimiter := u.restrictionFactory.NewPodsInPlaceRestriction(creatorToSingleGroupStatsMap, podToReplicaCreatorMap) + podsAvailableForUpdate := make([]*apiv1.Pod, 0) + podsToUnboost := make([]*apiv1.Pod, 0) + withInPlaceUpdated := false - podsForInPlace := make([]*apiv1.Pod, 0) + if features.Enabled(features.CPUStartupBoost) && vpa.Spec.StartupBoost != nil { + // First, handle unboosting for pods that have finished their startup period. + for _, pod := range livePods { + if vpa_api_util.PodHasCPUBoostInProgress(pod) { + if vpa_api_util.PodReady(pod) && vpa_api_util.PodStartupBoostDurationPassed(pod, vpa) { + podsToUnboost = append(podsToUnboost, pod) + } + } else { + podsAvailableForUpdate = append(podsAvailableForUpdate, pod) + } + } + + // Perform unboosting + for _, pod := range podsToUnboost { + if inPlaceLimiter.CanUnboost(pod, vpa) { + klog.V(2).InfoS("Unboosting pod", "pod", klog.KObj(pod)) + err = u.inPlaceRateLimiter.Wait(ctx) + if err != nil { + klog.V(0).InfoS("In-place rate limiter wait failed for unboosting", "error", err) + return + } + err := inPlaceLimiter.InPlaceUpdate(pod, vpa, u.eventRecorder) + if err != nil { + klog.V(0).InfoS("Unboosting failed", "error", err, "pod", klog.KObj(pod)) + metrics_updater.RecordFailedInPlaceUpdate(vpaSize, "UnboostError") + } else { + klog.V(2).InfoS("Successfully unboosted pod", "pod", klog.KObj(pod)) + withInPlaceUpdated = true + metrics_updater.AddInPlaceUpdatedPod(vpaSize) + } + } + } + } else { + // CPU Startup Boost is not enabled or configured for this VPA, + // so all live pods are available for potential standard VPA updates. + podsAvailableForUpdate = livePods + } + + if updateMode == vpa_types.UpdateModeOff || updateMode == vpa_types.UpdateModeInitial { + continue + } + + evictionLimiter := u.restrictionFactory.NewPodsEvictionRestriction(creatorToSingleGroupStatsMap, podToReplicaCreatorMap) podsForEviction := make([]*apiv1.Pod, 0) + podsForInPlace := make([]*apiv1.Pod, 0) + withInPlaceUpdatable := false + withEvictable := false if updateMode == vpa_types.UpdateModeInPlaceOrRecreate && features.Enabled(features.InPlaceOrRecreate) { - podsForInPlace = u.getPodsUpdateOrder(filterNonInPlaceUpdatablePods(livePods, inPlaceLimiter), vpa) + podsForInPlace = u.getPodsUpdateOrder(filterNonInPlaceUpdatablePods(podsAvailableForUpdate, inPlaceLimiter), vpa) inPlaceUpdatablePodsCounter.Add(vpaSize, len(podsForInPlace)) + if len(podsForInPlace) > 0 { + withInPlaceUpdatable = true + } } else { // If the feature gate is not enabled but update mode is InPlaceOrRecreate, updater will always fallback to eviction. if updateMode == vpa_types.UpdateModeInPlaceOrRecreate { klog.InfoS("Warning: feature gate is not enabled for this updateMode", "featuregate", features.InPlaceOrRecreate, "updateMode", vpa_types.UpdateModeInPlaceOrRecreate) } - podsForEviction = u.getPodsUpdateOrder(filterNonEvictablePods(livePods, evictionLimiter), vpa) + podsForEviction = u.getPodsUpdateOrder(filterNonEvictablePods(podsAvailableForUpdate, evictionLimiter), vpa) evictablePodsCounter.Add(vpaSize, updateMode, len(podsForEviction)) + if len(podsForEviction) > 0 { + withEvictable = true + } } - withInPlaceUpdatable := false - withInPlaceUpdated := false - withEvictable := false withEvicted := false for _, pod := range podsForInPlace { - withInPlaceUpdatable = true decision := inPlaceLimiter.CanInPlaceUpdate(pod) if decision == utils.InPlaceDeferred { @@ -289,7 +339,6 @@ func (u *updater) RunOnce(ctx context.Context) { } for _, pod := range podsForEviction { - withEvictable = true if !evictionLimiter.CanEvict(pod) { continue } diff --git a/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go b/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go index 6641d85c2b4c..7cbc6374a392 100644 --- a/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go +++ b/vertical-pod-autoscaler/pkg/updater/logic/updater_test.go @@ -62,6 +62,7 @@ func TestRunOnce_Mode(t *testing.T) { expectedInPlacedCount int canEvict bool canInPlaceUpdate utils.InPlaceDecision + isCPUBoostTest bool }{ { name: "with Auto mode", @@ -133,6 +134,50 @@ func TestRunOnce_Mode(t *testing.T) { canEvict: true, canInPlaceUpdate: utils.InPlaceApproved, }, + { + name: "with InPlaceOrRecreate mode and unboost", + updateMode: vpa_types.UpdateModeInPlaceOrRecreate, + shouldInPlaceFail: false, + expectFetchCalls: true, + expectedEvictionCount: 0, + expectedInPlacedCount: 5, + canEvict: true, + canInPlaceUpdate: utils.InPlaceApproved, + isCPUBoostTest: true, + }, + { + name: "with Recreate mode and unboost", + updateMode: vpa_types.UpdateModeRecreate, + shouldInPlaceFail: false, + expectFetchCalls: true, + expectedEvictionCount: 0, + expectedInPlacedCount: 5, + canEvict: true, + canInPlaceUpdate: utils.InPlaceApproved, + isCPUBoostTest: true, + }, + { + name: "with Auto mode and unboost", + updateMode: vpa_types.UpdateModeAuto, + shouldInPlaceFail: false, + expectFetchCalls: true, + expectedEvictionCount: 0, + expectedInPlacedCount: 5, + canEvict: true, + canInPlaceUpdate: utils.InPlaceApproved, + isCPUBoostTest: true, + }, + { + name: "with InPlaceOrRecreate mode and unboost and In-place fails", + updateMode: vpa_types.UpdateModeInPlaceOrRecreate, + shouldInPlaceFail: true, + expectFetchCalls: true, + expectedEvictionCount: 0, + expectedInPlacedCount: 5, + canEvict: true, + canInPlaceUpdate: utils.InPlaceApproved, + isCPUBoostTest: true, + }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { @@ -145,6 +190,7 @@ func TestRunOnce_Mode(t *testing.T) { tc.expectedEvictionCount, tc.expectedInPlacedCount, tc.canInPlaceUpdate, + tc.isCPUBoostTest, ) }) } @@ -184,6 +230,7 @@ func TestRunOnce_Status(t *testing.T) { tc.expectedEvictionCount, tc.expectedInPlacedCount, utils.InPlaceApproved, + false, ) }) } @@ -198,8 +245,10 @@ func testRunOnceBase( expectedEvictionCount int, expectedInPlacedCount int, canInPlaceUpdate utils.InPlaceDecision, + isCPUBoostTest bool, ) { featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.InPlaceOrRecreate, true) + featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.CPUStartupBoost, true) ctrl := gomock.NewController(t) defer ctrl.Finish() @@ -225,6 +274,18 @@ func testRunOnceBase( eviction := &test.PodsEvictionRestrictionMock{} inplace := &test.PodsInPlaceRestrictionMock{} + vpaObj := test.VerticalPodAutoscaler(). + WithContainer(containerName). + WithTarget("2", "200M"). + WithMinAllowed(containerName, "1", "100M"). + WithMaxAllowed(containerName, "3", "1G"). + WithTargetRef(&v1.CrossVersionObjectReference{ + Kind: rc.Kind, + Name: rc.Name, + APIVersion: rc.APIVersion, + }). + Get() + for i := range pods { pods[i] = test.Pod().WithName("test_"+strconv.Itoa(i)). AddContainer(test.Container().WithName(containerName).WithCPURequest(resource.MustParse("1")).WithMemRequest(resource.MustParse("100M")).Get()). @@ -232,15 +293,30 @@ func testRunOnceBase( Get() pods[i].Labels = labels + if isCPUBoostTest { + pods[i].Annotations = map[string]string{ + "startup-cpu-boost": "", + } + pods[i].Status.Conditions = []apiv1.PodCondition{ + { + Type: apiv1.PodReady, + Status: apiv1.ConditionTrue, + }, + } + } - inplace.On("CanInPlaceUpdate", pods[i]).Return(canInPlaceUpdate) + if !isCPUBoostTest { + inplace.On("CanInPlaceUpdate", pods[i]).Return(canInPlaceUpdate) + eviction.On("CanEvict", pods[i]).Return(true) + } else { + inplace.On("CanUnboost", pods[i], vpaObj).Return(isCPUBoostTest) + } if shouldInPlaceFail { inplace.On("InPlaceUpdate", pods[i], nil).Return(fmt.Errorf("in-place update failed")) } else { inplace.On("InPlaceUpdate", pods[i], nil).Return(nil) } - eviction.On("CanEvict", pods[i]).Return(true) eviction.On("Evict", pods[i], nil).Return(nil) } @@ -252,21 +328,17 @@ func testRunOnceBase( podLister := &test.PodListerMock{} podLister.On("List").Return(pods, nil) - targetRef := &v1.CrossVersionObjectReference{ - Kind: rc.Kind, - Name: rc.Name, - APIVersion: rc.APIVersion, - } - - vpaObj := test.VerticalPodAutoscaler(). - WithContainer(containerName). - WithTarget("2", "200M"). - WithMinAllowed(containerName, "1", "100M"). - WithMaxAllowed(containerName, "3", "1G"). - WithTargetRef(targetRef). - Get() vpaObj.Spec.UpdatePolicy = &vpa_types.PodUpdatePolicy{UpdateMode: &updateMode} + if isCPUBoostTest { + cpuStartupBoost := &vpa_types.GenericStartupBoost{ + Type: vpa_types.FactorStartupBoostType, + Duration: &metav1.Duration{Duration: 1 * time.Minute}, + } + vpaObj.Spec.StartupBoost = &vpa_types.StartupBoost{ + CPU: cpuStartupBoost, + } + } vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() mockSelectorFetcher := target_mock.NewMockVpaTargetSelectorFetcher(ctrl) @@ -504,3 +576,199 @@ func TestNewEventRecorder(t *testing.T) { }) } } + +func TestRunOnce_AutoUnboostThenEvict(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.InPlaceOrRecreate, true) + + featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.CPUStartupBoost, true) + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + replicas := int32(5) + livePods := 5 + labels := map[string]string{"app": "testingApp"} + selector := parseLabelSelector("app = testingApp") + containerName := "container1" + rc := apiv1.ReplicationController{ + TypeMeta: metav1.TypeMeta{ + Kind: "ReplicationController", + APIVersion: "apps/v1", + }, + ObjectMeta: metav1.ObjectMeta{Name: "rc", Namespace: "default"}, + Spec: apiv1.ReplicationControllerSpec{Replicas: &replicas}, + } + pods := make([]*apiv1.Pod, livePods) + vpaObj := test.VerticalPodAutoscaler(). + WithContainer(containerName). + WithTarget("2", "200M"). + WithMinAllowed(containerName, "1", "100M"). + WithMaxAllowed(containerName, "3", "1G"). + WithTargetRef(&v1.CrossVersionObjectReference{Kind: rc.Kind, Name: rc.Name, APIVersion: rc.APIVersion}). + WithCPUStartupBoost(vpa_types.FactorStartupBoostType, nil, nil, "1m"). + Get() + + for i := range pods { + pods[i] = test.Pod().WithName("test_"+strconv.Itoa(i)). + AddContainer(test.Container().WithName(containerName).WithCPURequest(resource.MustParse("1")).WithMemRequest(resource.MustParse("100M")).Get()). + WithCreator(&rc.ObjectMeta, &rc.TypeMeta). + Get() + pods[i].Labels = labels + } + + eviction := &test.PodsEvictionRestrictionMock{} + inplace := &test.PodsInPlaceRestrictionMock{} + factory := &restriction.FakePodsRestrictionFactory{Eviction: eviction, InPlace: inplace} + vpaLister := &test.VerticalPodAutoscalerListerMock{} + podLister := &test.PodListerMock{} + mockSelectorFetcher := target_mock.NewMockVpaTargetSelectorFetcher(ctrl) + + updater := &updater{ + vpaLister: vpaLister, + podLister: podLister, + restrictionFactory: factory, + evictionRateLimiter: rate.NewLimiter(rate.Inf, 0), + inPlaceRateLimiter: rate.NewLimiter(rate.Inf, 0), + evictionAdmission: priority.NewDefaultPodEvictionAdmission(), + recommendationProcessor: &test.FakeRecommendationProcessor{}, + selectorFetcher: mockSelectorFetcher, + controllerFetcher: controllerfetcher.FakeControllerFetcher{}, + useAdmissionControllerStatus: true, + statusValidator: newFakeValidator(true), + priorityProcessor: priority.NewProcessor(), + } + + // Cycle 1: Unboost the cpu + for i := range pods { + pods[i].Annotations = map[string]string{"startup-cpu-boost": ""} + pods[i].Status.Conditions = []apiv1.PodCondition{ + { + Type: apiv1.PodReady, + Status: apiv1.ConditionTrue, + }, + } + inplace.On("CanUnboost", pods[i], vpaObj).Return(true).Once() + inplace.On("InPlaceUpdate", pods[i], nil).Return(nil) + } + vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() + podLister.On("List").Return(pods, nil).Once() + mockSelectorFetcher.EXPECT().Fetch(gomock.Eq(vpaObj)).Return(selector, nil) + + updater.RunOnce(context.Background()) + inplace.AssertNumberOfCalls(t, "InPlaceUpdate", 5) + inplace.AssertNumberOfCalls(t, "CanUnboost", 5) + eviction.AssertNumberOfCalls(t, "Evict", 0) + + // Cycle 2: Regular patch which will lead to eviction + for i := range pods { + pods[i].Annotations = nil + inplace.On("CanUnboost", pods[i], vpaObj).Return(false).Once() + eviction.On("CanEvict", pods[i]).Return(true) + eviction.On("Evict", pods[i], nil).Return(nil) + } + vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() + podLister.On("List").Return(pods, nil).Once() + mockSelectorFetcher.EXPECT().Fetch(gomock.Eq(vpaObj)).Return(selector, nil) + + updater.RunOnce(context.Background()) + inplace.AssertNumberOfCalls(t, "InPlaceUpdate", 5) // all 5 from previous run only + inplace.AssertNumberOfCalls(t, "CanUnboost", 5) // all 5 from previous run only + eviction.AssertNumberOfCalls(t, "Evict", 5) +} + +func TestRunOnce_AutoUnboostThenInPlace(t *testing.T) { + featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.InPlaceOrRecreate, true) + + featuregatetesting.SetFeatureGateDuringTest(t, features.MutableFeatureGate, features.CPUStartupBoost, true) + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + replicas := int32(5) + livePods := 5 + labels := map[string]string{"app": "testingApp"} + selector := parseLabelSelector("app = testingApp") + containerName := "container1" + rc := apiv1.ReplicationController{ + TypeMeta: metav1.TypeMeta{ + Kind: "ReplicationController", + APIVersion: "apps/v1", + }, + ObjectMeta: metav1.ObjectMeta{Name: "rc", Namespace: "default"}, + Spec: apiv1.ReplicationControllerSpec{Replicas: &replicas}, + } + pods := make([]*apiv1.Pod, livePods) + vpaObj := test.VerticalPodAutoscaler(). + WithContainer(containerName). + WithUpdateMode(vpa_types.UpdateModeInPlaceOrRecreate). + WithTarget("2", "200M"). + WithMinAllowed(containerName, "1", "100M"). + WithMaxAllowed(containerName, "3", "1G"). + WithTargetRef(&v1.CrossVersionObjectReference{Kind: rc.Kind, Name: rc.Name, APIVersion: rc.APIVersion}). + WithCPUStartupBoost(vpa_types.FactorStartupBoostType, nil, nil, "1m"). + Get() + + for i := range pods { + pods[i] = test.Pod().WithName("test_"+strconv.Itoa(i)). + AddContainer(test.Container().WithName(containerName).WithCPURequest(resource.MustParse("1")).WithMemRequest(resource.MustParse("100M")).Get()). + WithCreator(&rc.ObjectMeta, &rc.TypeMeta). + Get() + pods[i].Labels = labels + } + + eviction := &test.PodsEvictionRestrictionMock{} + inplace := &test.PodsInPlaceRestrictionMock{} + factory := &restriction.FakePodsRestrictionFactory{Eviction: eviction, InPlace: inplace} + vpaLister := &test.VerticalPodAutoscalerListerMock{} + podLister := &test.PodListerMock{} + mockSelectorFetcher := target_mock.NewMockVpaTargetSelectorFetcher(ctrl) + + updater := &updater{ + vpaLister: vpaLister, + podLister: podLister, + restrictionFactory: factory, + evictionRateLimiter: rate.NewLimiter(rate.Inf, 0), + inPlaceRateLimiter: rate.NewLimiter(rate.Inf, 0), + evictionAdmission: priority.NewDefaultPodEvictionAdmission(), + recommendationProcessor: &test.FakeRecommendationProcessor{}, + selectorFetcher: mockSelectorFetcher, + controllerFetcher: controllerfetcher.FakeControllerFetcher{}, + useAdmissionControllerStatus: true, + statusValidator: newFakeValidator(true), + priorityProcessor: priority.NewProcessor(), + } + + // Cycle 1: Unboost the cpu + for i := range pods { + pods[i].Annotations = map[string]string{"startup-cpu-boost": ""} + pods[i].Status.Conditions = []apiv1.PodCondition{ + { + Type: apiv1.PodReady, + Status: apiv1.ConditionTrue, + }, + } + inplace.On("CanUnboost", pods[i], vpaObj).Return(true).Once() + inplace.On("InPlaceUpdate", pods[i], nil).Return(nil) + } + vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() + podLister.On("List").Return(pods, nil).Once() + mockSelectorFetcher.EXPECT().Fetch(gomock.Eq(vpaObj)).Return(selector, nil) + + updater.RunOnce(context.Background()) + inplace.AssertNumberOfCalls(t, "InPlaceUpdate", 5) + inplace.AssertNumberOfCalls(t, "CanUnboost", 5) + eviction.AssertNumberOfCalls(t, "Evict", 0) + + // Cycle 2: Regular patch which will lead to eviction + for i := range pods { + pods[i].Annotations = nil + inplace.On("CanInPlaceUpdate", pods[i]).Return(utils.InPlaceApproved) + inplace.On("InPlaceUpdate", pods[i], nil).Return(nil) + } + vpaLister.On("List").Return([]*vpa_types.VerticalPodAutoscaler{vpaObj}, nil).Once() + podLister.On("List").Return(pods, nil).Once() + mockSelectorFetcher.EXPECT().Fetch(gomock.Eq(vpaObj)).Return(selector, nil) + + updater.RunOnce(context.Background()) + inplace.AssertNumberOfCalls(t, "InPlaceUpdate", 10) + inplace.AssertNumberOfCalls(t, "CanUnboost", 5) // all 5 from previous run only + eviction.AssertNumberOfCalls(t, "Evict", 0) +} diff --git a/vertical-pod-autoscaler/pkg/updater/main.go b/vertical-pod-autoscaler/pkg/updater/main.go index 8394fd54b29c..d120841bac8e 100644 --- a/vertical-pod-autoscaler/pkg/updater/main.go +++ b/vertical-pod-autoscaler/pkg/updater/main.go @@ -206,7 +206,7 @@ func run(healthCheck *metrics.HealthCheck, commonFlag *common.CommonFlags) { recommendationProvider := recommendation.NewProvider(limitRangeCalculator, vpa_api_util.NewCappingRecommendationProcessor(limitRangeCalculator)) - calculators := []patch.Calculator{inplace.NewResourceInPlaceUpdatesCalculator(recommendationProvider), inplace.NewInPlaceUpdatedCalculator()} + calculators := []patch.Calculator{inplace.NewResourceInPlaceUpdatesCalculator(recommendationProvider), inplace.NewInPlaceUpdatedCalculator(), inplace.NewUnboostAnnotationCalculator()} // TODO: use SharedInformerFactory in updater updater, err := updater.NewUpdater( diff --git a/vertical-pod-autoscaler/pkg/updater/restriction/pods_inplace_restriction.go b/vertical-pod-autoscaler/pkg/updater/restriction/pods_inplace_restriction.go index db5a27aace26..4a3810ce3545 100644 --- a/vertical-pod-autoscaler/pkg/updater/restriction/pods_inplace_restriction.go +++ b/vertical-pod-autoscaler/pkg/updater/restriction/pods_inplace_restriction.go @@ -35,6 +35,7 @@ import ( vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1" "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features" utils "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/updater/utils" + vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa" ) // TODO: Make these configurable by flags @@ -57,6 +58,8 @@ type PodsInPlaceRestriction interface { InPlaceUpdate(pod *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler, eventRecorder record.EventRecorder) error // CanInPlaceUpdate checks if pod can be safely updated in-place. If not, it will return a decision to potentially evict the pod. CanInPlaceUpdate(pod *apiv1.Pod) utils.InPlaceDecision + // CanUnboost checks if a pod can be safely unboosted. + CanUnboost(pod *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler) bool } // PodsInPlaceRestrictionImpl is the implementation of the PodsInPlaceRestriction interface. @@ -98,6 +101,30 @@ func (ip *PodsInPlaceRestrictionImpl) CanInPlaceUpdate(pod *apiv1.Pod) utils.InP return utils.InPlaceDeferred } +// CanUnboost checks if a pod can be safely unboosted. +func (ip *PodsInPlaceRestrictionImpl) CanUnboost(pod *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler) bool { + if !features.Enabled(features.CPUStartupBoost) { + return false + } + ready := vpa_api_util.PodReady(pod) + durationPassed := vpa_api_util.PodStartupBoostDurationPassed(pod, vpa) + hasAnnotation := vpa_api_util.PodHasCPUBoostInProgress(pod) + + klog.V(2).InfoS("Checking if pod can be unboosted", "pod", klog.KObj(pod), "ready", ready, "durationPassed", durationPassed, "hasAnnotation", hasAnnotation) + + if !ready || !durationPassed || !hasAnnotation { + return false + } + cr, present := ip.podToReplicaCreatorMap[getPodID(pod)] + if present { + singleGroupStats, present := ip.creatorToSingleGroupStatsMap[cr] + if present { + return singleGroupStats.isPodDisruptable() + } + } + return false +} + // InPlaceUpdate sends calculates patches and sends resize request to api client. Returns error if pod cannot be in-place updated or if client returned error. // Does not check if pod was actually in-place updated after grace period. func (ip *PodsInPlaceRestrictionImpl) InPlaceUpdate(podToUpdate *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler, eventRecorder record.EventRecorder) error { diff --git a/vertical-pod-autoscaler/pkg/utils/test/test_utils.go b/vertical-pod-autoscaler/pkg/utils/test/test_utils.go index 8ae360177f31..202985b0bb62 100644 --- a/vertical-pod-autoscaler/pkg/utils/test/test_utils.go +++ b/vertical-pod-autoscaler/pkg/utils/test/test_utils.go @@ -139,6 +139,12 @@ func (m *PodsInPlaceRestrictionMock) CanInPlaceUpdate(pod *apiv1.Pod) utils.InPl return args.Get(0).(utils.InPlaceDecision) } +// CanUnboost is a mock implementation of PodsInPlaceRestriction.CanUnboost +func (m *PodsInPlaceRestrictionMock) CanUnboost(pod *apiv1.Pod, vpa *vpa_types.VerticalPodAutoscaler) bool { + args := m.Called(pod, vpa) + return args.Bool(0) +} + // PodListerMock is a mock of PodLister type PodListerMock struct { mock.Mock diff --git a/vertical-pod-autoscaler/pkg/utils/vpa/api.go b/vertical-pod-autoscaler/pkg/utils/vpa/api.go index b30f3fc6039d..a215ae348782 100644 --- a/vertical-pod-autoscaler/pkg/utils/vpa/api.go +++ b/vertical-pod-autoscaler/pkg/utils/vpa/api.go @@ -38,6 +38,7 @@ import ( vpa_api "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned/typed/autoscaling.k8s.io/v1" vpa_lister "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/listers/autoscaling.k8s.io/v1" controllerfetcher "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/target/controller_fetcher" + "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/annotations" ) // VpaWithSelector is a pair of VPA and its selector. @@ -291,3 +292,38 @@ func CreateOrUpdateVpaCheckpoint(vpaCheckpointClient vpa_api.VerticalPodAutoscal } return nil } + +// PodReady returns true if the pod is ready. +func PodReady(pod *core.Pod) bool { + for _, cond := range pod.Status.Conditions { + if cond.Type == core.PodReady && cond.Status == core.ConditionTrue { + return true + } + } + return false +} + +// PodStartupBoostDurationPassed returns true if the startup boost duration has passed. +func PodStartupBoostDurationPassed(pod *core.Pod, vpa *vpa_types.VerticalPodAutoscaler) bool { + if vpa.Spec.StartupBoost == nil || vpa.Spec.StartupBoost.CPU.Duration == nil || vpa.Spec.StartupBoost.CPU.Duration.Duration == 0 { + return true + } + if !PodReady(pod) { + return false + } + for _, cond := range pod.Status.Conditions { + if cond.Type == core.PodReady { + return time.Since(cond.LastTransitionTime.Time) > vpa.Spec.StartupBoost.CPU.Duration.Duration + } + } + return false +} + +// PodHasCPUBoostInProgress returns true if the pod has the CPU boost annotation. +func PodHasCPUBoostInProgress(pod *core.Pod) bool { + if pod.Annotations == nil { + return false + } + _, found := pod.Annotations[annotations.StartupCPUBoostAnnotation] + return found +} diff --git a/vertical-pod-autoscaler/pkg/utils/vpa/api_test.go b/vertical-pod-autoscaler/pkg/utils/vpa/api_test.go index 1f1c712f9e95..fbc360e34b6e 100644 --- a/vertical-pod-autoscaler/pkg/utils/vpa/api_test.go +++ b/vertical-pod-autoscaler/pkg/utils/vpa/api_test.go @@ -399,3 +399,171 @@ func TestFindParentControllerForPod(t *testing.T) { }) } } + +func TestPodReady(t *testing.T) { + testCases := []struct { + name string + pod *core.Pod + expected bool + }{ + { + name: "PodReady condition is True", + pod: &core.Pod{ + Status: core.PodStatus{ + Conditions: []core.PodCondition{ + { + Type: core.PodReady, + Status: core.ConditionTrue, + }, + }, + }, + }, + expected: true, + }, + { + name: "PodReady condition is False", + pod: &core.Pod{ + Status: core.PodStatus{ + Conditions: []core.PodCondition{ + { + Type: core.PodReady, + Status: core.ConditionFalse, + }, + }, + }, + }, + expected: false, + }, + { + name: "No PodReady condition", + pod: &core.Pod{ + Status: core.PodStatus{ + Conditions: []core.PodCondition{}, + }, + }, + expected: false, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expected, PodReady(tc.pod)) + }) + } +} + +func TestPodStartupBoostDurationPassed(t *testing.T) { + now := meta.Now() + past := meta.Time{Time: now.Add(-2 * time.Minute)} + testCases := []struct { + name string + pod *core.Pod + vpa *vpa_types.VerticalPodAutoscaler + expected bool + }{ + { + name: "No StartupBoost config", + pod: &core.Pod{}, + vpa: &vpa_types.VerticalPodAutoscaler{}, + expected: true, + }, + { + name: "No duration in StartupBoost", + pod: &core.Pod{}, + vpa: test.VerticalPodAutoscaler().WithContainer(containerName).WithCPUStartupBoost(vpa_types.FactorStartupBoostType, nil, nil, "").Get(), + expected: true, + }, + { + name: "Pod not ready", + pod: &core.Pod{ + Status: core.PodStatus{ + Conditions: []core.PodCondition{ + { + Type: core.PodReady, + Status: core.ConditionFalse, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithContainer(containerName).WithCPUStartupBoost(vpa_types.FactorStartupBoostType, nil, nil, "1m").Get(), + expected: false, + }, + { + name: "Duration passed", + pod: &core.Pod{ + Status: core.PodStatus{ + Conditions: []core.PodCondition{ + { + Type: core.PodReady, + Status: core.ConditionTrue, + LastTransitionTime: past, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithContainer(containerName).WithCPUStartupBoost(vpa_types.FactorStartupBoostType, nil, nil, "1m").Get(), + expected: true, + }, + { + name: "Duration not passed", + pod: &core.Pod{ + Status: core.PodStatus{ + Conditions: []core.PodCondition{ + { + Type: core.PodReady, + Status: core.ConditionTrue, + LastTransitionTime: now, + }, + }, + }, + }, + vpa: test.VerticalPodAutoscaler().WithContainer(containerName).WithCPUStartupBoost(vpa_types.FactorStartupBoostType, nil, nil, "1m").Get(), + expected: false, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expected, PodStartupBoostDurationPassed(tc.pod, tc.vpa)) + }) + } +} + +func TestPodHasCPUBoostInProgress(t *testing.T) { + testCases := []struct { + name string + pod *core.Pod + expected bool + }{ + { + name: "No annotations", + pod: &core.Pod{}, + expected: false, + }, + { + name: "Annotation present", + pod: &core.Pod{ + ObjectMeta: meta.ObjectMeta{ + Annotations: map[string]string{ + "startup-cpu-boost": "", + }, + }, + }, + expected: true, + }, + { + name: "Annotation not present", + pod: &core.Pod{ + ObjectMeta: meta.ObjectMeta{ + Annotations: map[string]string{ + "another-annotation": "true", + }, + }, + }, + expected: false, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expected, PodHasCPUBoostInProgress(tc.pod)) + }) + } +}