Skip to content

Commit caedccf

Browse files
committed
Allow resize when pod limits aren't set
1 parent 9df464f commit caedccf

File tree

2 files changed

+235
-21
lines changed

2 files changed

+235
-21
lines changed

pkg/kubelet/kuberuntime/kuberuntime_manager.go

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -666,7 +666,7 @@ func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containe
666666
return true
667667
}
668668

669-
func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podStatus *kubecontainer.PodStatus, podContainerChanges podActions, result kubecontainer.PodSyncResult) {
669+
func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podContainerChanges podActions, result *kubecontainer.PodSyncResult) {
670670
pcm := m.containerManager.NewPodContainerManager()
671671
//TODO(vinaykul,InPlacePodVerticalScaling): Figure out best way to get enforceMemoryQoS value (parameter #4 below) in platform-agnostic way
672672
podResources := cm.ResourceConfigForPod(pod, m.cpuCFSQuota, uint64((m.cpuCFSQuotaPeriod.Duration)/time.Microsecond), false)
@@ -688,7 +688,14 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podStatus *ku
688688
}
689689
err = pcm.SetPodCgroupConfig(pod, podCPUResources)
690690
case v1.ResourceMemory:
691-
err = pcm.SetPodCgroupConfig(pod, podResources)
691+
if !setLimitValue {
692+
// Memory requests aren't written to cgroups.
693+
return nil
694+
}
695+
podMemoryResources := &cm.ResourceConfig{
696+
Memory: podResources.Memory,
697+
}
698+
err = pcm.SetPodCgroupConfig(pod, podMemoryResources)
692699
}
693700
if err != nil {
694701
klog.ErrorS(err, "Failed to set cgroup config", "resource", rName, "pod", pod.Name)
@@ -732,37 +739,39 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podStatus *ku
732739
return err
733740
}
734741
if len(podContainerChanges.ContainersToUpdate[v1.ResourceMemory]) > 0 || podContainerChanges.UpdatePodResources {
735-
if podResources.Memory == nil {
736-
klog.ErrorS(nil, "podResources.Memory is nil", "pod", pod.Name)
737-
result.Fail(fmt.Errorf("podResources.Memory is nil for pod %s", pod.Name))
738-
return
739-
}
740742
currentPodMemoryConfig, err := pcm.GetPodCgroupConfig(pod, v1.ResourceMemory)
741743
if err != nil {
742744
klog.ErrorS(err, "GetPodCgroupConfig for memory failed", "pod", pod.Name)
743745
result.Fail(err)
744746
return
745747
}
746-
currentPodMemoryUsage, err := pcm.GetPodCgroupMemoryUsage(pod)
747-
if err != nil {
748-
klog.ErrorS(err, "GetPodCgroupMemoryUsage failed", "pod", pod.Name)
749-
result.Fail(err)
750-
return
751-
}
752-
if currentPodMemoryUsage >= uint64(*podResources.Memory) {
753-
klog.ErrorS(nil, "Aborting attempt to set pod memory limit less than current memory usage", "pod", pod.Name)
754-
result.Fail(fmt.Errorf("aborting attempt to set pod memory limit less than current memory usage for pod %s", pod.Name))
755-
return
748+
if podResources.Memory != nil {
749+
currentPodMemoryUsage, err := pcm.GetPodCgroupMemoryUsage(pod)
750+
if err != nil {
751+
klog.ErrorS(err, "GetPodCgroupMemoryUsage failed", "pod", pod.Name)
752+
result.Fail(err)
753+
return
754+
}
755+
if currentPodMemoryUsage >= uint64(*podResources.Memory) {
756+
klog.ErrorS(nil, "Aborting attempt to set pod memory limit less than current memory usage", "pod", pod.Name)
757+
result.Fail(fmt.Errorf("aborting attempt to set pod memory limit less than current memory usage for pod %s", pod.Name))
758+
return
759+
}
760+
} else {
761+
// Default pod memory limit to the current memory limit if unset to prevent it from updating.
762+
// TODO(#128675): This does not support removing limits.
763+
podResources.Memory = currentPodMemoryConfig.Memory
756764
}
757765
if errResize := resizeContainers(v1.ResourceMemory, int64(*currentPodMemoryConfig.Memory), *podResources.Memory, 0, 0); errResize != nil {
758766
result.Fail(errResize)
759767
return
760768
}
761769
}
762770
if len(podContainerChanges.ContainersToUpdate[v1.ResourceCPU]) > 0 || podContainerChanges.UpdatePodResources {
763-
if podResources.CPUQuota == nil || podResources.CPUShares == nil {
764-
klog.ErrorS(nil, "podResources.CPUQuota or podResources.CPUShares is nil", "pod", pod.Name)
765-
result.Fail(fmt.Errorf("podResources.CPUQuota or podResources.CPUShares is nil for pod %s", pod.Name))
771+
if podResources.CPUShares == nil {
772+
// This shouldn't happen: ResourceConfigForPod always returns a non-nil value for CPUShares.
773+
klog.ErrorS(nil, "podResources.CPUShares is nil", "pod", pod.Name)
774+
result.Fail(fmt.Errorf("podResources.CPUShares is nil for pod %s", pod.Name))
766775
return
767776
}
768777
currentPodCpuConfig, err := pcm.GetPodCgroupConfig(pod, v1.ResourceCPU)
@@ -771,6 +780,13 @@ func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podStatus *ku
771780
result.Fail(err)
772781
return
773782
}
783+
784+
// Default pod CPUQuota to the current CPUQuota if no limit is set to prevent the pod limit
785+
// from updating.
786+
// TODO(#128675): This does not support removing limits.
787+
if podResources.CPUQuota == nil {
788+
podResources.CPUQuota = currentPodCpuConfig.CPUQuota
789+
}
774790
if errResize := resizeContainers(v1.ResourceCPU, *currentPodCpuConfig.CPUQuota, *podResources.CPUQuota,
775791
int64(*currentPodCpuConfig.CPUShares), int64(*podResources.CPUShares)); errResize != nil {
776792
result.Fail(errResize)
@@ -1351,7 +1367,7 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po
13511367
// Step 7: For containers in podContainerChanges.ContainersToUpdate[CPU,Memory] list, invoke UpdateContainerResources
13521368
if IsInPlacePodVerticalScalingAllowed(pod) {
13531369
if len(podContainerChanges.ContainersToUpdate) > 0 || podContainerChanges.UpdatePodResources {
1354-
m.doPodResizeAction(pod, podStatus, podContainerChanges, result)
1370+
m.doPodResizeAction(pod, podContainerChanges, &result)
13551371
}
13561372
}
13571373

pkg/kubelet/kuberuntime/kuberuntime_manager_test.go

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"fmt"
2323
"path/filepath"
2424
"reflect"
25+
goruntime "runtime"
2526
"sort"
2627
"testing"
2728
"time"
@@ -31,6 +32,7 @@ import (
3132

3233
cadvisorapi "github.com/google/cadvisor/info/v1"
3334
"github.com/stretchr/testify/assert"
35+
"github.com/stretchr/testify/mock"
3436
"github.com/stretchr/testify/require"
3537
noopoteltrace "go.opentelemetry.io/otel/trace/noop"
3638

@@ -47,6 +49,8 @@ import (
4749
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
4850
"k8s.io/kubernetes/pkg/credentialprovider"
4951
"k8s.io/kubernetes/pkg/features"
52+
"k8s.io/kubernetes/pkg/kubelet/cm"
53+
cmtesting "k8s.io/kubernetes/pkg/kubelet/cm/testing"
5054
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
5155
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
5256
imagetypes "k8s.io/kubernetes/pkg/kubelet/images"
@@ -2826,3 +2830,197 @@ func TestGetImageVolumes(t *testing.T) {
28262830
assert.Equal(t, tc.expectedImageVolumePulls, imageVolumePulls)
28272831
}
28282832
}
2833+
2834+
func TestDoPodResizeAction(t *testing.T) {
2835+
if goruntime.GOOS != "linux" {
2836+
t.Skip("unsupported OS")
2837+
}
2838+
2839+
featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)
2840+
_, _, m, err := createTestRuntimeManager()
2841+
require.NoError(t, err)
2842+
m.cpuCFSQuota = true // Enforce CPU Limits
2843+
2844+
for _, tc := range []struct {
2845+
testName string
2846+
currentResources containerResources
2847+
desiredResources containerResources
2848+
updatedResources []v1.ResourceName
2849+
otherContainersHaveLimits bool
2850+
expectedError string
2851+
expectPodCgroupUpdates int
2852+
}{
2853+
{
2854+
testName: "Increase cpu and memory requests and limits, with computed pod limits",
2855+
currentResources: containerResources{
2856+
cpuRequest: 100, cpuLimit: 100,
2857+
memoryRequest: 100, memoryLimit: 100,
2858+
},
2859+
desiredResources: containerResources{
2860+
cpuRequest: 200, cpuLimit: 200,
2861+
memoryRequest: 200, memoryLimit: 200,
2862+
},
2863+
otherContainersHaveLimits: true,
2864+
updatedResources: []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory},
2865+
expectPodCgroupUpdates: 3, // cpu req, cpu lim, mem lim
2866+
},
2867+
{
2868+
testName: "Increase cpu and memory requests and limits, without computed pod limits",
2869+
currentResources: containerResources{
2870+
cpuRequest: 100, cpuLimit: 100,
2871+
memoryRequest: 100, memoryLimit: 100,
2872+
},
2873+
desiredResources: containerResources{
2874+
cpuRequest: 200, cpuLimit: 200,
2875+
memoryRequest: 200, memoryLimit: 200,
2876+
},
2877+
// If some containers don't have limits, pod level limits are not applied
2878+
otherContainersHaveLimits: false,
2879+
updatedResources: []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory},
2880+
expectPodCgroupUpdates: 1, // cpu req, cpu lim, mem lim
2881+
},
2882+
{
2883+
testName: "Increase cpu and memory requests only",
2884+
currentResources: containerResources{
2885+
cpuRequest: 100, cpuLimit: 200,
2886+
memoryRequest: 100, memoryLimit: 200,
2887+
},
2888+
desiredResources: containerResources{
2889+
cpuRequest: 150, cpuLimit: 200,
2890+
memoryRequest: 150, memoryLimit: 200,
2891+
},
2892+
updatedResources: []v1.ResourceName{v1.ResourceCPU},
2893+
expectPodCgroupUpdates: 1, // cpu req
2894+
},
2895+
{
2896+
testName: "Resize memory request no limits",
2897+
currentResources: containerResources{
2898+
cpuRequest: 100,
2899+
memoryRequest: 100,
2900+
},
2901+
desiredResources: containerResources{
2902+
cpuRequest: 100,
2903+
memoryRequest: 200,
2904+
},
2905+
// Memory request resize doesn't generate an update action.
2906+
updatedResources: []v1.ResourceName{},
2907+
},
2908+
{
2909+
testName: "Resize cpu request no limits",
2910+
currentResources: containerResources{
2911+
cpuRequest: 100,
2912+
memoryRequest: 100,
2913+
},
2914+
desiredResources: containerResources{
2915+
cpuRequest: 200,
2916+
memoryRequest: 100,
2917+
},
2918+
updatedResources: []v1.ResourceName{v1.ResourceCPU},
2919+
expectPodCgroupUpdates: 1, // cpu req
2920+
},
2921+
{
2922+
testName: "Add limits",
2923+
currentResources: containerResources{
2924+
cpuRequest: 100,
2925+
memoryRequest: 100,
2926+
},
2927+
desiredResources: containerResources{
2928+
cpuRequest: 100, cpuLimit: 100,
2929+
memoryRequest: 100, memoryLimit: 100,
2930+
},
2931+
updatedResources: []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory},
2932+
expectPodCgroupUpdates: 0,
2933+
},
2934+
{
2935+
testName: "Add limits and pod limits",
2936+
currentResources: containerResources{
2937+
cpuRequest: 100,
2938+
memoryRequest: 100,
2939+
},
2940+
desiredResources: containerResources{
2941+
cpuRequest: 100, cpuLimit: 100,
2942+
memoryRequest: 100, memoryLimit: 100,
2943+
},
2944+
otherContainersHaveLimits: true,
2945+
updatedResources: []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory},
2946+
expectPodCgroupUpdates: 2, // cpu lim, memory lim
2947+
},
2948+
} {
2949+
t.Run(tc.testName, func(t *testing.T) {
2950+
mockCM := cmtesting.NewMockContainerManager(t)
2951+
m.containerManager = mockCM
2952+
mockPCM := cmtesting.NewMockPodContainerManager(t)
2953+
mockCM.EXPECT().NewPodContainerManager().Return(mockPCM)
2954+
2955+
mockPCM.EXPECT().GetPodCgroupConfig(mock.Anything, v1.ResourceMemory).Return(&cm.ResourceConfig{
2956+
Memory: ptr.To(tc.currentResources.memoryLimit),
2957+
}, nil).Maybe()
2958+
mockPCM.EXPECT().GetPodCgroupMemoryUsage(mock.Anything).Return(0, nil).Maybe()
2959+
// Set up mock pod cgroup config
2960+
podCPURequest := tc.currentResources.cpuRequest
2961+
podCPULimit := tc.currentResources.cpuLimit
2962+
if tc.otherContainersHaveLimits {
2963+
podCPURequest += 200
2964+
podCPULimit += 200
2965+
}
2966+
mockPCM.EXPECT().GetPodCgroupConfig(mock.Anything, v1.ResourceCPU).Return(&cm.ResourceConfig{
2967+
CPUShares: ptr.To(cm.MilliCPUToShares(podCPURequest)),
2968+
CPUQuota: ptr.To(cm.MilliCPUToQuota(podCPULimit, cm.QuotaPeriod)),
2969+
}, nil).Maybe()
2970+
if tc.expectPodCgroupUpdates > 0 {
2971+
mockPCM.EXPECT().SetPodCgroupConfig(mock.Anything, mock.Anything).Return(nil).Times(tc.expectPodCgroupUpdates)
2972+
}
2973+
2974+
pod, kps := makeBasePodAndStatus()
2975+
// pod spec and allocated resources are already updated as desired when doPodResizeAction() is called.
2976+
pod.Spec.Containers[0].Resources = v1.ResourceRequirements{
2977+
Requests: v1.ResourceList{
2978+
v1.ResourceCPU: *resource.NewMilliQuantity(tc.desiredResources.cpuRequest, resource.DecimalSI),
2979+
v1.ResourceMemory: *resource.NewQuantity(tc.desiredResources.memoryRequest, resource.DecimalSI),
2980+
},
2981+
Limits: v1.ResourceList{
2982+
v1.ResourceCPU: *resource.NewMilliQuantity(tc.desiredResources.cpuLimit, resource.DecimalSI),
2983+
v1.ResourceMemory: *resource.NewQuantity(tc.desiredResources.memoryLimit, resource.DecimalSI),
2984+
},
2985+
}
2986+
if tc.otherContainersHaveLimits {
2987+
resourceList := v1.ResourceList{
2988+
v1.ResourceCPU: resource.MustParse("100m"),
2989+
v1.ResourceMemory: resource.MustParse("100M"),
2990+
}
2991+
resources := v1.ResourceRequirements{
2992+
Requests: resourceList,
2993+
Limits: resourceList,
2994+
}
2995+
pod.Spec.Containers[1].Resources = resources
2996+
pod.Spec.Containers[2].Resources = resources
2997+
}
2998+
2999+
updateInfo := containerToUpdateInfo{
3000+
apiContainerIdx: 0,
3001+
kubeContainerID: kps.ContainerStatuses[0].ID,
3002+
desiredContainerResources: tc.desiredResources,
3003+
currentContainerResources: &tc.currentResources,
3004+
}
3005+
containersToUpdate := make(map[v1.ResourceName][]containerToUpdateInfo)
3006+
for _, r := range tc.updatedResources {
3007+
containersToUpdate[r] = []containerToUpdateInfo{updateInfo}
3008+
}
3009+
3010+
syncResult := &kubecontainer.PodSyncResult{}
3011+
actions := podActions{
3012+
ContainersToUpdate: containersToUpdate,
3013+
}
3014+
m.doPodResizeAction(pod, actions, syncResult)
3015+
3016+
if tc.expectedError != "" {
3017+
require.Error(t, syncResult.Error())
3018+
require.EqualError(t, syncResult.Error(), tc.expectedError)
3019+
} else {
3020+
require.NoError(t, syncResult.Error())
3021+
}
3022+
3023+
mock.AssertExpectationsForObjects(t, mockPCM)
3024+
})
3025+
}
3026+
}

0 commit comments

Comments
 (0)