Skip to content

Commit 0fff5bb

Browse files
authored
Merge pull request kubernetes#128680 from tallclair/min-cpu
[FG:InPlacePodVerticalScaling] Handle edge cases around CPU MinShares
2 parents 81dc453 + bab6df8 commit 0fff5bb

File tree

7 files changed

+268
-28
lines changed

7 files changed

+268
-28
lines changed

pkg/kubelet/kubelet.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2912,6 +2912,16 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod, podStatus *kubecontaine
29122912
}
29132913
}
29142914
allocatedPod = pod
2915+
2916+
// Special case when the updated allocation matches the actual resources. This can occur
2917+
// when reverting a resize that hasn't been actuated, or when making an equivalent change
2918+
// (such as CPU requests below MinShares). This is an optimization to clear the resize
2919+
// status immediately, rather than waiting for the next SyncPod iteration.
2920+
if allocatedResourcesMatchStatus(allocatedPod, podStatus) {
2921+
// In this case, consider the resize complete.
2922+
kl.statusManager.SetPodResizeStatus(pod.UID, "")
2923+
return allocatedPod, nil
2924+
}
29152925
}
29162926
if resizeStatus != "" {
29172927
kl.statusManager.SetPodResizeStatus(pod.UID, resizeStatus)

pkg/kubelet/kubelet_pods.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1791,7 +1791,9 @@ func allocatedResourcesMatchStatus(allocatedPod *v1.Pod, podStatus *kubecontaine
17911791

17921792
// Only compare resizeable resources, and only compare resources that are explicitly configured.
17931793
if hasCPUReq {
1794-
if !cpuReq.Equal(*cs.Resources.CPURequest) {
1794+
// If both allocated & status CPU requests are at or below MinShares then they are considered equal.
1795+
if !cpuReq.Equal(*cs.Resources.CPURequest) &&
1796+
(cpuReq.MilliValue() > cm.MinShares || cs.Resources.CPURequest.MilliValue() > cm.MinShares) {
17951797
return false
17961798
}
17971799
}
@@ -2150,7 +2152,12 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
21502152
}
21512153
if resources.Requests != nil {
21522154
if cStatus.Resources != nil && cStatus.Resources.CPURequest != nil {
2153-
resources.Requests[v1.ResourceCPU] = cStatus.Resources.CPURequest.DeepCopy()
2155+
// If both the allocated & actual resources are at or below MinShares, preserve the
2156+
// allocated value in the API to avoid confusion and simplify comparisons.
2157+
if cStatus.Resources.CPURequest.MilliValue() > cm.MinShares ||
2158+
resources.Requests.Cpu().MilliValue() > cm.MinShares {
2159+
resources.Requests[v1.ResourceCPU] = cStatus.Resources.CPURequest.DeepCopy()
2160+
}
21542161
} else {
21552162
preserveOldResourcesValue(v1.ResourceCPU, oldStatus.Resources.Requests, resources.Requests)
21562163
}

pkg/kubelet/kubelet_pods_test.go

Lines changed: 97 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4723,7 +4723,8 @@ func TestConvertToAPIContainerStatusesForResources(t *testing.T) {
47234723
for tdesc, tc := range map[string]struct {
47244724
State kubecontainer.State // Defaults to Running
47254725
Resources v1.ResourceRequirements
4726-
AllocatedResources *v1.ResourceRequirements // Defaults to Resources
4726+
AllocatedResources *v1.ResourceRequirements // Defaults to Resources
4727+
ActualResources *kubecontainer.ContainerResources // Defaults to Resources equivalent
47274728
OldStatus v1.ContainerStatus
47284729
Expected v1.ContainerStatus
47294730
}{
@@ -4765,6 +4766,70 @@ func TestConvertToAPIContainerStatusesForResources(t *testing.T) {
47654766
Resources: &v1.ResourceRequirements{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G},
47664767
},
47674768
},
4769+
"BurstableQoSPod without CPU": {
4770+
Resources: v1.ResourceRequirements{Requests: v1.ResourceList{
4771+
v1.ResourceMemory: resource.MustParse("100M"),
4772+
}},
4773+
ActualResources: &kubecontainer.ContainerResources{
4774+
CPURequest: resource.NewMilliQuantity(2, resource.DecimalSI),
4775+
},
4776+
OldStatus: v1.ContainerStatus{
4777+
Name: testContainerName,
4778+
Image: "img",
4779+
ImageID: "img1234",
4780+
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
4781+
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
4782+
v1.ResourceMemory: resource.MustParse("100M"),
4783+
}},
4784+
},
4785+
Expected: v1.ContainerStatus{
4786+
Name: testContainerName,
4787+
ContainerID: testContainerID.String(),
4788+
Image: "img",
4789+
ImageID: "img1234",
4790+
State: v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}},
4791+
AllocatedResources: v1.ResourceList{
4792+
v1.ResourceMemory: resource.MustParse("100M"),
4793+
},
4794+
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
4795+
v1.ResourceMemory: resource.MustParse("100M"),
4796+
}},
4797+
},
4798+
},
4799+
"BurstableQoSPod with below min CPU": {
4800+
Resources: v1.ResourceRequirements{Requests: v1.ResourceList{
4801+
v1.ResourceMemory: resource.MustParse("100M"),
4802+
v1.ResourceCPU: resource.MustParse("1m"),
4803+
}},
4804+
ActualResources: &kubecontainer.ContainerResources{
4805+
CPURequest: resource.NewMilliQuantity(2, resource.DecimalSI),
4806+
},
4807+
OldStatus: v1.ContainerStatus{
4808+
Name: testContainerName,
4809+
Image: "img",
4810+
ImageID: "img1234",
4811+
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
4812+
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
4813+
v1.ResourceMemory: resource.MustParse("100M"),
4814+
v1.ResourceCPU: resource.MustParse("1m"),
4815+
}},
4816+
},
4817+
Expected: v1.ContainerStatus{
4818+
Name: testContainerName,
4819+
ContainerID: testContainerID.String(),
4820+
Image: "img",
4821+
ImageID: "img1234",
4822+
State: v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}},
4823+
AllocatedResources: v1.ResourceList{
4824+
v1.ResourceMemory: resource.MustParse("100M"),
4825+
v1.ResourceCPU: resource.MustParse("1m"),
4826+
},
4827+
Resources: &v1.ResourceRequirements{Requests: v1.ResourceList{
4828+
v1.ResourceMemory: resource.MustParse("100M"),
4829+
v1.ResourceCPU: resource.MustParse("1m"),
4830+
}},
4831+
},
4832+
},
47684833
"GuaranteedQoSPod with CPU and memory CRI status, with ephemeral storage": {
47694834
Resources: v1.ResourceRequirements{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G},
47704835
OldStatus: v1.ContainerStatus{
@@ -5003,10 +5068,13 @@ func TestConvertToAPIContainerStatusesForResources(t *testing.T) {
50035068
tPod.Spec.Containers[0].Resources = tc.Resources
50045069
}
50055070
kubelet.statusManager.SetPodAllocation(tPod)
5006-
resources := &kubecontainer.ContainerResources{
5007-
MemoryLimit: tc.Resources.Limits.Memory(),
5008-
CPULimit: tc.Resources.Limits.Cpu(),
5009-
CPURequest: tc.Resources.Requests.Cpu(),
5071+
resources := tc.ActualResources
5072+
if resources == nil {
5073+
resources = &kubecontainer.ContainerResources{
5074+
MemoryLimit: tc.Resources.Limits.Memory(),
5075+
CPULimit: tc.Resources.Limits.Cpu(),
5076+
CPURequest: tc.Resources.Requests.Cpu(),
5077+
}
50105078
}
50115079
state := kubecontainer.ContainerStateRunning
50125080
if tc.State != "" {
@@ -6698,6 +6766,30 @@ func TestAllocatedResourcesMatchStatus(t *testing.T) {
66986766
CPURequest: resource.NewMilliQuantity(2, resource.DecimalSI),
66996767
},
67006768
expectMatch: true,
6769+
}, {
6770+
name: "burstable: min cpu request",
6771+
allocatedResources: v1.ResourceRequirements{
6772+
Requests: v1.ResourceList{
6773+
v1.ResourceMemory: resource.MustParse("100M"),
6774+
v1.ResourceCPU: resource.MustParse("2m"),
6775+
},
6776+
},
6777+
statusResources: &kubecontainer.ContainerResources{
6778+
CPURequest: resource.NewMilliQuantity(2, resource.DecimalSI),
6779+
},
6780+
expectMatch: true,
6781+
}, {
6782+
name: "burstable: below min cpu request",
6783+
allocatedResources: v1.ResourceRequirements{
6784+
Requests: v1.ResourceList{
6785+
v1.ResourceMemory: resource.MustParse("100M"),
6786+
v1.ResourceCPU: resource.MustParse("1m"),
6787+
},
6788+
},
6789+
statusResources: &kubecontainer.ContainerResources{
6790+
CPURequest: resource.NewMilliQuantity(2, resource.DecimalSI),
6791+
},
6792+
expectMatch: true,
67016793
}, {
67026794
name: "best effort",
67036795
allocatedResources: v1.ResourceRequirements{},

pkg/kubelet/kubelet_test.go

Lines changed: 59 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2588,6 +2588,8 @@ func TestHandlePodResourcesResize(t *testing.T) {
25882588
defer testKubelet.Cleanup()
25892589
kubelet := testKubelet.kubelet
25902590

2591+
cpu1m := resource.MustParse("1m")
2592+
cpu2m := resource.MustParse("2m")
25912593
cpu500m := resource.MustParse("500m")
25922594
cpu1000m := resource.MustParse("1")
25932595
cpu1500m := resource.MustParse("1500m")
@@ -2671,7 +2673,7 @@ func TestHandlePodResourcesResize(t *testing.T) {
26712673

26722674
tests := []struct {
26732675
name string
2674-
pod *v1.Pod
2676+
originalRequests v1.ResourceList
26752677
newRequests v1.ResourceList
26762678
newRequestsAllocated bool // Whether the new requests have already been allocated (but not actuated)
26772679
expectedAllocations v1.ResourceList
@@ -2681,79 +2683,113 @@ func TestHandlePodResourcesResize(t *testing.T) {
26812683
}{
26822684
{
26832685
name: "Request CPU and memory decrease - expect InProgress",
2684-
pod: testPod2,
2686+
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
26852687
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
26862688
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
26872689
expectedResize: v1.PodResizeStatusInProgress,
26882690
expectBackoffReset: true,
26892691
},
26902692
{
26912693
name: "Request CPU increase, memory decrease - expect InProgress",
2692-
pod: testPod2,
2694+
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
26932695
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem500M},
26942696
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem500M},
26952697
expectedResize: v1.PodResizeStatusInProgress,
26962698
expectBackoffReset: true,
26972699
},
26982700
{
26992701
name: "Request CPU decrease, memory increase - expect InProgress",
2700-
pod: testPod2,
2702+
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
27012703
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem1500M},
27022704
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem1500M},
27032705
expectedResize: v1.PodResizeStatusInProgress,
27042706
expectBackoffReset: true,
27052707
},
27062708
{
27072709
name: "Request CPU and memory increase beyond current capacity - expect Deferred",
2708-
pod: testPod2,
2710+
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
27092711
newRequests: v1.ResourceList{v1.ResourceCPU: cpu2500m, v1.ResourceMemory: mem2500M},
27102712
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
27112713
expectedResize: v1.PodResizeStatusDeferred,
27122714
},
27132715
{
27142716
name: "Request CPU decrease and memory increase beyond current capacity - expect Deferred",
2715-
pod: testPod2,
2717+
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
27162718
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem2500M},
27172719
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
27182720
expectedResize: v1.PodResizeStatusDeferred,
27192721
},
27202722
{
27212723
name: "Request memory increase beyond node capacity - expect Infeasible",
2722-
pod: testPod2,
2724+
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
27232725
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem4500M},
27242726
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
27252727
expectedResize: v1.PodResizeStatusInfeasible,
27262728
},
27272729
{
27282730
name: "Request CPU increase beyond node capacity - expect Infeasible",
2729-
pod: testPod2,
2731+
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
27302732
newRequests: v1.ResourceList{v1.ResourceCPU: cpu5000m, v1.ResourceMemory: mem1000M},
27312733
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
27322734
expectedResize: v1.PodResizeStatusInfeasible,
27332735
},
27342736
{
27352737
name: "CPU increase in progress - expect InProgress",
2736-
pod: testPod2,
2738+
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
27372739
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem1000M},
27382740
newRequestsAllocated: true,
27392741
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem1000M},
27402742
expectedResize: v1.PodResizeStatusInProgress,
27412743
},
27422744
{
27432745
name: "No resize",
2744-
pod: testPod2,
2746+
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
27452747
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
27462748
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
27472749
expectedResize: "",
27482750
},
27492751
{
27502752
name: "windows node, expect Infeasible",
2751-
pod: testPod2,
2753+
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
27522754
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
27532755
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
27542756
expectedResize: v1.PodResizeStatusInfeasible,
27552757
goos: "windows",
27562758
},
2759+
{
2760+
name: "Increase CPU from min shares",
2761+
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu2m},
2762+
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m},
2763+
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m},
2764+
expectedResize: v1.PodResizeStatusInProgress,
2765+
expectBackoffReset: true,
2766+
},
2767+
{
2768+
name: "Decrease CPU to min shares",
2769+
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m},
2770+
newRequests: v1.ResourceList{v1.ResourceCPU: cpu2m},
2771+
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu2m},
2772+
expectedResize: v1.PodResizeStatusInProgress,
2773+
expectBackoffReset: true,
2774+
},
2775+
{
2776+
name: "Equivalent min CPU shares",
2777+
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu1m},
2778+
newRequests: v1.ResourceList{v1.ResourceCPU: cpu2m},
2779+
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu2m},
2780+
expectedResize: "",
2781+
// Even though the resize isn't being actuated, we still clear the container backoff
2782+
// since the allocation is changing.
2783+
expectBackoffReset: true,
2784+
},
2785+
{
2786+
name: "Equivalent min CPU shares - already allocated",
2787+
originalRequests: v1.ResourceList{v1.ResourceCPU: cpu2m},
2788+
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1m},
2789+
newRequestsAllocated: true,
2790+
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1m},
2791+
expectedResize: "",
2792+
},
27572793
}
27582794

27592795
for _, tt := range tests {
@@ -2765,22 +2801,26 @@ func TestHandlePodResourcesResize(t *testing.T) {
27652801
}
27662802
kubelet.statusManager = status.NewFakeManager()
27672803

2768-
newPod := tt.pod.DeepCopy()
2804+
originalPod := testPod1.DeepCopy()
2805+
originalPod.Spec.Containers[0].Resources.Requests = tt.originalRequests
2806+
kubelet.podManager.UpdatePod(originalPod)
2807+
2808+
newPod := originalPod.DeepCopy()
27692809
newPod.Spec.Containers[0].Resources.Requests = tt.newRequests
27702810

27712811
if !tt.newRequestsAllocated {
2772-
require.NoError(t, kubelet.statusManager.SetPodAllocation(tt.pod))
2812+
require.NoError(t, kubelet.statusManager.SetPodAllocation(originalPod))
27732813
} else {
27742814
require.NoError(t, kubelet.statusManager.SetPodAllocation(newPod))
27752815
}
27762816

27772817
podStatus := &kubecontainer.PodStatus{
2778-
ID: tt.pod.UID,
2779-
Name: tt.pod.Name,
2780-
Namespace: tt.pod.Namespace,
2781-
ContainerStatuses: make([]*kubecontainer.Status, len(tt.pod.Spec.Containers)),
2818+
ID: originalPod.UID,
2819+
Name: originalPod.Name,
2820+
Namespace: originalPod.Namespace,
2821+
ContainerStatuses: make([]*kubecontainer.Status, len(originalPod.Spec.Containers)),
27822822
}
2783-
for i, c := range tt.pod.Spec.Containers {
2823+
for i, c := range originalPod.Spec.Containers {
27842824
podStatus.ContainerStatuses[i] = &kubecontainer.Status{
27852825
Name: c.Name,
27862826
State: kubecontainer.ContainerStateRunning,
@@ -2794,7 +2834,7 @@ func TestHandlePodResourcesResize(t *testing.T) {
27942834

27952835
now := kubelet.clock.Now()
27962836
// Put the container in backoff so we can confirm backoff is reset.
2797-
backoffKey := kuberuntime.GetStableKey(tt.pod, &tt.pod.Spec.Containers[0])
2837+
backoffKey := kuberuntime.GetStableKey(originalPod, &originalPod.Spec.Containers[0])
27982838
kubelet.backOff.Next(backoffKey, now)
27992839

28002840
updatedPod, err := kubelet.handlePodResourcesResize(newPod, podStatus)

pkg/kubelet/kuberuntime/kuberuntime_manager.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,13 @@ func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containe
598598
// Note: cgroup doesn't support memory request today, so we don't compare that. If canAdmitPod called during
599599
// handlePodResourcesResize finds 'fit', then desiredMemoryRequest == currentMemoryRequest.
600600

601+
// Special case for minimum CPU request
602+
if desiredResources.cpuRequest <= cm.MinShares && currentResources.cpuRequest <= cm.MinShares {
603+
// If both desired & current CPU requests are at or below MinShares,
604+
// then consider these equal.
605+
desiredResources.cpuRequest = currentResources.cpuRequest
606+
}
607+
601608
if currentResources == desiredResources {
602609
// No resize required.
603610
return true
@@ -824,8 +831,14 @@ func (m *kubeGenericRuntimeManager) updatePodContainerResources(ctx context.Cont
824831
case v1.ResourceMemory:
825832
return status.Resources.MemoryLimit.Equal(*container.Resources.Limits.Memory())
826833
case v1.ResourceCPU:
827-
return status.Resources.CPURequest.Equal(*container.Resources.Requests.Cpu()) &&
828-
status.Resources.CPULimit.Equal(*container.Resources.Limits.Cpu())
834+
if !status.Resources.CPULimit.Equal(*container.Resources.Limits.Cpu()) {
835+
return false // limits don't match
836+
} else if status.Resources.CPURequest.Equal(*container.Resources.Requests.Cpu()) {
837+
return true // requests & limits both match
838+
}
839+
// Consider requests equal if both are at or below MinShares.
840+
return status.Resources.CPURequest.MilliValue() <= cm.MinShares &&
841+
container.Resources.Requests.Cpu().MilliValue() <= cm.MinShares
829842
default:
830843
return true // Shouldn't happen.
831844
}

0 commit comments

Comments
 (0)