Skip to content

Commit 53aa727

Browse files
committed
Checkpoint allocated requests and limits
1 parent ee9e229 commit 53aa727

File tree

9 files changed

+72
-77
lines changed

9 files changed

+72
-77
lines changed

pkg/kubelet/kubelet.go

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2612,15 +2612,10 @@ func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) {
26122612
// updateContainerResourceAllocation updates AllocatedResources values
26132613
// (for cpu & memory) from checkpoint store
26142614
func (kl *Kubelet) updateContainerResourceAllocation(pod *v1.Pod) {
2615-
for _, c := range pod.Spec.Containers {
2615+
for i, c := range pod.Spec.Containers {
26162616
allocatedResources, found := kl.statusManager.GetContainerResourceAllocation(string(pod.UID), c.Name)
2617-
if c.Resources.Requests != nil && found {
2618-
if _, ok := allocatedResources[v1.ResourceCPU]; ok {
2619-
c.Resources.Requests[v1.ResourceCPU] = allocatedResources[v1.ResourceCPU]
2620-
}
2621-
if _, ok := allocatedResources[v1.ResourceMemory]; ok {
2622-
c.Resources.Requests[v1.ResourceMemory] = allocatedResources[v1.ResourceMemory]
2623-
}
2617+
if found {
2618+
pod.Spec.Containers[i].Resources = allocatedResources
26242619
}
26252620
}
26262621
}

pkg/kubelet/kubelet_pods.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2082,9 +2082,10 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
20822082
}
20832083
container := kubecontainer.GetContainerSpec(pod, cName)
20842084
// AllocatedResources values come from checkpoint. It is the source-of-truth.
2085-
found := false
2086-
status.AllocatedResources, found = kl.statusManager.GetContainerResourceAllocation(string(pod.UID), cName)
2087-
if !(container.Resources.Requests == nil && container.Resources.Limits == nil) && !found {
2085+
alloc, found := kl.statusManager.GetContainerResourceAllocation(string(pod.UID), cName)
2086+
if found {
2087+
status.AllocatedResources = alloc.Requests
2088+
} else if !(container.Resources.Requests == nil && container.Resources.Limits == nil) {
20882089
// Log error and fallback to AllocatedResources in oldStatus if it exists
20892090
klog.ErrorS(nil, "resource allocation not found in checkpoint store", "pod", pod.Name, "container", cName)
20902091
if oldStatusFound {

pkg/kubelet/kubelet_test.go

Lines changed: 36 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2447,8 +2447,8 @@ func TestPodResourceAllocationReset(t *testing.T) {
24472447
name: "Having both memory and cpu, resource allocation not exists",
24482448
pod: podWithUIDNameNsSpec("1", "pod1", "foo", *cpu500mMem500MPodSpec),
24492449
expectedPodResourceAllocation: state.PodResourceAllocation{
2450-
"1": map[string]v1.ResourceList{
2451-
cpu500mMem500MPodSpec.Containers[0].Name: cpu500mMem500MPodSpec.Containers[0].Resources.Requests,
2450+
"1": map[string]v1.ResourceRequirements{
2451+
cpu500mMem500MPodSpec.Containers[0].Name: cpu500mMem500MPodSpec.Containers[0].Resources,
24522452
},
24532453
},
24542454
},
@@ -2457,8 +2457,8 @@ func TestPodResourceAllocationReset(t *testing.T) {
24572457
pod: podWithUIDNameNsSpec("2", "pod2", "foo", *cpu500mMem500MPodSpec),
24582458
existingPodAllocation: podWithUIDNameNsSpec("2", "pod2", "foo", *cpu500mMem500MPodSpec),
24592459
expectedPodResourceAllocation: state.PodResourceAllocation{
2460-
"2": map[string]v1.ResourceList{
2461-
cpu500mMem500MPodSpec.Containers[0].Name: cpu500mMem500MPodSpec.Containers[0].Resources.Requests,
2460+
"2": map[string]v1.ResourceRequirements{
2461+
cpu500mMem500MPodSpec.Containers[0].Name: cpu500mMem500MPodSpec.Containers[0].Resources,
24622462
},
24632463
},
24642464
},
@@ -2467,17 +2467,17 @@ func TestPodResourceAllocationReset(t *testing.T) {
24672467
pod: podWithUIDNameNsSpec("3", "pod3", "foo", *cpu500mMem500MPodSpec),
24682468
existingPodAllocation: podWithUIDNameNsSpec("3", "pod3", "foo", *cpu800mMem800MPodSpec),
24692469
expectedPodResourceAllocation: state.PodResourceAllocation{
2470-
"3": map[string]v1.ResourceList{
2471-
cpu800mMem800MPodSpec.Containers[0].Name: cpu800mMem800MPodSpec.Containers[0].Resources.Requests,
2470+
"3": map[string]v1.ResourceRequirements{
2471+
cpu800mMem800MPodSpec.Containers[0].Name: cpu800mMem800MPodSpec.Containers[0].Resources,
24722472
},
24732473
},
24742474
},
24752475
{
24762476
name: "Only has cpu, resource allocation not exists",
24772477
pod: podWithUIDNameNsSpec("4", "pod5", "foo", *cpu500mPodSpec),
24782478
expectedPodResourceAllocation: state.PodResourceAllocation{
2479-
"4": map[string]v1.ResourceList{
2480-
cpu500mPodSpec.Containers[0].Name: cpu500mPodSpec.Containers[0].Resources.Requests,
2479+
"4": map[string]v1.ResourceRequirements{
2480+
cpu500mPodSpec.Containers[0].Name: cpu500mPodSpec.Containers[0].Resources,
24812481
},
24822482
},
24832483
},
@@ -2486,8 +2486,8 @@ func TestPodResourceAllocationReset(t *testing.T) {
24862486
pod: podWithUIDNameNsSpec("5", "pod5", "foo", *cpu500mPodSpec),
24872487
existingPodAllocation: podWithUIDNameNsSpec("5", "pod5", "foo", *cpu500mPodSpec),
24882488
expectedPodResourceAllocation: state.PodResourceAllocation{
2489-
"5": map[string]v1.ResourceList{
2490-
cpu500mPodSpec.Containers[0].Name: cpu500mPodSpec.Containers[0].Resources.Requests,
2489+
"5": map[string]v1.ResourceRequirements{
2490+
cpu500mPodSpec.Containers[0].Name: cpu500mPodSpec.Containers[0].Resources,
24912491
},
24922492
},
24932493
},
@@ -2496,17 +2496,17 @@ func TestPodResourceAllocationReset(t *testing.T) {
24962496
pod: podWithUIDNameNsSpec("6", "pod6", "foo", *cpu500mPodSpec),
24972497
existingPodAllocation: podWithUIDNameNsSpec("6", "pod6", "foo", *cpu800mPodSpec),
24982498
expectedPodResourceAllocation: state.PodResourceAllocation{
2499-
"6": map[string]v1.ResourceList{
2500-
cpu800mPodSpec.Containers[0].Name: cpu800mPodSpec.Containers[0].Resources.Requests,
2499+
"6": map[string]v1.ResourceRequirements{
2500+
cpu800mPodSpec.Containers[0].Name: cpu800mPodSpec.Containers[0].Resources,
25012501
},
25022502
},
25032503
},
25042504
{
25052505
name: "Only has memory, resource allocation not exists",
25062506
pod: podWithUIDNameNsSpec("7", "pod7", "foo", *mem500MPodSpec),
25072507
expectedPodResourceAllocation: state.PodResourceAllocation{
2508-
"7": map[string]v1.ResourceList{
2509-
mem500MPodSpec.Containers[0].Name: mem500MPodSpec.Containers[0].Resources.Requests,
2508+
"7": map[string]v1.ResourceRequirements{
2509+
mem500MPodSpec.Containers[0].Name: mem500MPodSpec.Containers[0].Resources,
25102510
},
25112511
},
25122512
},
@@ -2515,8 +2515,8 @@ func TestPodResourceAllocationReset(t *testing.T) {
25152515
pod: podWithUIDNameNsSpec("8", "pod8", "foo", *mem500MPodSpec),
25162516
existingPodAllocation: podWithUIDNameNsSpec("8", "pod8", "foo", *mem500MPodSpec),
25172517
expectedPodResourceAllocation: state.PodResourceAllocation{
2518-
"8": map[string]v1.ResourceList{
2519-
mem500MPodSpec.Containers[0].Name: mem500MPodSpec.Containers[0].Resources.Requests,
2518+
"8": map[string]v1.ResourceRequirements{
2519+
mem500MPodSpec.Containers[0].Name: mem500MPodSpec.Containers[0].Resources,
25202520
},
25212521
},
25222522
},
@@ -2525,17 +2525,17 @@ func TestPodResourceAllocationReset(t *testing.T) {
25252525
pod: podWithUIDNameNsSpec("9", "pod9", "foo", *mem500MPodSpec),
25262526
existingPodAllocation: podWithUIDNameNsSpec("9", "pod9", "foo", *mem800MPodSpec),
25272527
expectedPodResourceAllocation: state.PodResourceAllocation{
2528-
"9": map[string]v1.ResourceList{
2529-
mem800MPodSpec.Containers[0].Name: mem800MPodSpec.Containers[0].Resources.Requests,
2528+
"9": map[string]v1.ResourceRequirements{
2529+
mem800MPodSpec.Containers[0].Name: mem800MPodSpec.Containers[0].Resources,
25302530
},
25312531
},
25322532
},
25332533
{
25342534
name: "No CPU and memory, resource allocation not exists",
25352535
pod: podWithUIDNameNsSpec("10", "pod10", "foo", *emptyPodSpec),
25362536
expectedPodResourceAllocation: state.PodResourceAllocation{
2537-
"10": map[string]v1.ResourceList{
2538-
emptyPodSpec.Containers[0].Name: emptyPodSpec.Containers[0].Resources.Requests,
2537+
"10": map[string]v1.ResourceRequirements{
2538+
emptyPodSpec.Containers[0].Name: emptyPodSpec.Containers[0].Resources,
25392539
},
25402540
},
25412541
},
@@ -2544,27 +2544,29 @@ func TestPodResourceAllocationReset(t *testing.T) {
25442544
pod: podWithUIDNameNsSpec("11", "pod11", "foo", *emptyPodSpec),
25452545
existingPodAllocation: podWithUIDNameNsSpec("11", "pod11", "foo", *emptyPodSpec),
25462546
expectedPodResourceAllocation: state.PodResourceAllocation{
2547-
"11": map[string]v1.ResourceList{
2548-
emptyPodSpec.Containers[0].Name: emptyPodSpec.Containers[0].Resources.Requests,
2547+
"11": map[string]v1.ResourceRequirements{
2548+
emptyPodSpec.Containers[0].Name: emptyPodSpec.Containers[0].Resources,
25492549
},
25502550
},
25512551
},
25522552
}
25532553
for _, tc := range tests {
2554-
if tc.existingPodAllocation != nil {
2555-
// when kubelet restarts, AllocatedResources has already existed before adding pod
2556-
err := kubelet.statusManager.SetPodAllocation(tc.existingPodAllocation)
2557-
if err != nil {
2558-
t.Fatalf("failed to set pod allocation: %v", err)
2554+
t.Run(tc.name, func(t *testing.T) {
2555+
if tc.existingPodAllocation != nil {
2556+
// when kubelet restarts, AllocatedResources has already existed before adding pod
2557+
err := kubelet.statusManager.SetPodAllocation(tc.existingPodAllocation)
2558+
if err != nil {
2559+
t.Fatalf("failed to set pod allocation: %v", err)
2560+
}
25592561
}
2560-
}
2561-
kubelet.HandlePodAdditions([]*v1.Pod{tc.pod})
2562+
kubelet.HandlePodAdditions([]*v1.Pod{tc.pod})
25622563

2563-
allocatedResources, found := kubelet.statusManager.GetContainerResourceAllocation(string(tc.pod.UID), tc.pod.Spec.Containers[0].Name)
2564-
if !found {
2565-
t.Fatalf("resource allocation should exist: (pod: %#v, container: %s)", tc.pod, tc.pod.Spec.Containers[0].Name)
2566-
}
2567-
assert.Equal(t, tc.expectedPodResourceAllocation[string(tc.pod.UID)][tc.pod.Spec.Containers[0].Name], allocatedResources, tc.name)
2564+
allocatedResources, found := kubelet.statusManager.GetContainerResourceAllocation(string(tc.pod.UID), tc.pod.Spec.Containers[0].Name)
2565+
if !found {
2566+
t.Fatalf("resource allocation should exist: (pod: %#v, container: %s)", tc.pod, tc.pod.Spec.Containers[0].Name)
2567+
}
2568+
assert.Equal(t, tc.expectedPodResourceAllocation[string(tc.pod.UID)][tc.pod.Spec.Containers[0].Name], allocatedResources, tc.name)
2569+
})
25682570
}
25692571
}
25702572

pkg/kubelet/status/fake_status_manager.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ func (m *fakeManager) RemoveOrphanedStatuses(podUIDs map[types.UID]bool) {
6363
return
6464
}
6565

66-
func (m *fakeManager) GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceList, bool) {
66+
func (m *fakeManager) GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceRequirements, bool) {
6767
klog.InfoS("GetContainerResourceAllocation()")
6868
return m.state.GetContainerResourceAllocation(podUID, containerName)
6969
}
@@ -76,9 +76,9 @@ func (m *fakeManager) GetPodResizeStatus(podUID string) (v1.PodResizeStatus, boo
7676
func (m *fakeManager) SetPodAllocation(pod *v1.Pod) error {
7777
klog.InfoS("SetPodAllocation()")
7878
for _, container := range pod.Spec.Containers {
79-
var alloc v1.ResourceList
79+
var alloc v1.ResourceRequirements
8080
if container.Resources.Requests != nil {
81-
alloc = container.Resources.Requests.DeepCopy()
81+
alloc = *container.Resources.DeepCopy()
8282
}
8383
m.state.SetContainerResourceAllocation(string(pod.UID), container.Name, alloc)
8484
}

pkg/kubelet/status/state/checkpoint.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package state
1919
import (
2020
"encoding/json"
2121

22-
"k8s.io/api/core/v1"
22+
v1 "k8s.io/api/core/v1"
2323
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
2424
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
2525
)
@@ -28,16 +28,16 @@ var _ checkpointmanager.Checkpoint = &PodResourceAllocationCheckpoint{}
2828

2929
// PodResourceAllocationCheckpoint is used to store resources allocated to a pod in checkpoint
3030
type PodResourceAllocationCheckpoint struct {
31-
AllocationEntries map[string]map[string]v1.ResourceList `json:"allocationEntries,omitempty"`
32-
ResizeStatusEntries map[string]v1.PodResizeStatus `json:"resizeStatusEntries,omitempty"`
33-
Checksum checksum.Checksum `json:"checksum"`
31+
AllocationEntries map[string]map[string]v1.ResourceRequirements `json:"allocationEntries,omitempty"`
32+
ResizeStatusEntries map[string]v1.PodResizeStatus `json:"resizeStatusEntries,omitempty"`
33+
Checksum checksum.Checksum `json:"checksum"`
3434
}
3535

3636
// NewPodResourceAllocationCheckpoint returns an instance of Checkpoint
3737
func NewPodResourceAllocationCheckpoint() *PodResourceAllocationCheckpoint {
3838
//lint:ignore unexported-type-in-api user-facing error message
3939
return &PodResourceAllocationCheckpoint{
40-
AllocationEntries: make(map[string]map[string]v1.ResourceList),
40+
AllocationEntries: make(map[string]map[string]v1.ResourceRequirements),
4141
ResizeStatusEntries: make(map[string]v1.PodResizeStatus),
4242
}
4343
}

pkg/kubelet/status/state/state.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@ limitations under the License.
1717
package state
1818

1919
import (
20-
"k8s.io/api/core/v1"
20+
v1 "k8s.io/api/core/v1"
2121
)
2222

2323
// PodResourceAllocation type is used in tracking resources allocated to pod's containers
24-
type PodResourceAllocation map[string]map[string]v1.ResourceList
24+
type PodResourceAllocation map[string]map[string]v1.ResourceRequirements
2525

2626
// PodResizeStatus type is used in tracking the last resize decision for pod
2727
type PodResizeStatus map[string]v1.PodResizeStatus
@@ -30,24 +30,24 @@ type PodResizeStatus map[string]v1.PodResizeStatus
3030
func (pr PodResourceAllocation) Clone() PodResourceAllocation {
3131
prCopy := make(PodResourceAllocation)
3232
for pod := range pr {
33-
prCopy[pod] = make(map[string]v1.ResourceList)
33+
prCopy[pod] = make(map[string]v1.ResourceRequirements)
3434
for container, alloc := range pr[pod] {
35-
prCopy[pod][container] = alloc.DeepCopy()
35+
prCopy[pod][container] = *alloc.DeepCopy()
3636
}
3737
}
3838
return prCopy
3939
}
4040

4141
// Reader interface used to read current pod resource allocation state
4242
type Reader interface {
43-
GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceList, bool)
43+
GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceRequirements, bool)
4444
GetPodResourceAllocation() PodResourceAllocation
4545
GetPodResizeStatus(podUID string) (v1.PodResizeStatus, bool)
4646
GetResizeStatus() PodResizeStatus
4747
}
4848

4949
type writer interface {
50-
SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceList) error
50+
SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceRequirements) error
5151
SetPodResourceAllocation(PodResourceAllocation) error
5252
SetPodResizeStatus(podUID string, resizeStatus v1.PodResizeStatus) error
5353
SetResizeStatus(PodResizeStatus) error

pkg/kubelet/status/state/state_checkpoint.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import (
2121
"path"
2222
"sync"
2323

24-
"k8s.io/api/core/v1"
24+
v1 "k8s.io/api/core/v1"
2525
"k8s.io/klog/v2"
2626
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
2727
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
@@ -82,7 +82,7 @@ func (sc *stateCheckpoint) storeState() error {
8282

8383
podAllocation := sc.cache.GetPodResourceAllocation()
8484
for pod := range podAllocation {
85-
checkpoint.AllocationEntries[pod] = make(map[string]v1.ResourceList)
85+
checkpoint.AllocationEntries[pod] = make(map[string]v1.ResourceRequirements)
8686
for container, alloc := range podAllocation[pod] {
8787
checkpoint.AllocationEntries[pod][container] = alloc
8888
}
@@ -103,7 +103,7 @@ func (sc *stateCheckpoint) storeState() error {
103103
}
104104

105105
// GetContainerResourceAllocation returns current resources allocated to a pod's container
106-
func (sc *stateCheckpoint) GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceList, bool) {
106+
func (sc *stateCheckpoint) GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceRequirements, bool) {
107107
sc.mux.RLock()
108108
defer sc.mux.RUnlock()
109109
return sc.cache.GetContainerResourceAllocation(podUID, containerName)
@@ -131,7 +131,7 @@ func (sc *stateCheckpoint) GetResizeStatus() PodResizeStatus {
131131
}
132132

133133
// SetContainerResourceAllocation sets resources allocated to a pod's container
134-
func (sc *stateCheckpoint) SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceList) error {
134+
func (sc *stateCheckpoint) SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceRequirements) error {
135135
sc.mux.Lock()
136136
defer sc.mux.Unlock()
137137
sc.cache.SetContainerResourceAllocation(podUID, containerName, alloc)
@@ -185,8 +185,8 @@ func NewNoopStateCheckpoint() State {
185185
return &noopStateCheckpoint{}
186186
}
187187

188-
func (sc *noopStateCheckpoint) GetContainerResourceAllocation(_ string, _ string) (v1.ResourceList, bool) {
189-
return nil, false
188+
func (sc *noopStateCheckpoint) GetContainerResourceAllocation(_ string, _ string) (v1.ResourceRequirements, bool) {
189+
return v1.ResourceRequirements{}, false
190190
}
191191

192192
func (sc *noopStateCheckpoint) GetPodResourceAllocation() PodResourceAllocation {
@@ -201,7 +201,7 @@ func (sc *noopStateCheckpoint) GetResizeStatus() PodResizeStatus {
201201
return nil
202202
}
203203

204-
func (sc *noopStateCheckpoint) SetContainerResourceAllocation(_ string, _ string, _ v1.ResourceList) error {
204+
func (sc *noopStateCheckpoint) SetContainerResourceAllocation(_ string, _ string, _ v1.ResourceRequirements) error {
205205
return nil
206206
}
207207

pkg/kubelet/status/state/state_mem.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package state
1919
import (
2020
"sync"
2121

22-
"k8s.io/api/core/v1"
22+
v1 "k8s.io/api/core/v1"
2323
"k8s.io/klog/v2"
2424
)
2525

@@ -40,12 +40,12 @@ func NewStateMemory() State {
4040
}
4141
}
4242

43-
func (s *stateMemory) GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceList, bool) {
43+
func (s *stateMemory) GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceRequirements, bool) {
4444
s.RLock()
4545
defer s.RUnlock()
4646

4747
alloc, ok := s.podAllocation[podUID][containerName]
48-
return alloc.DeepCopy(), ok
48+
return *alloc.DeepCopy(), ok
4949
}
5050

5151
func (s *stateMemory) GetPodResourceAllocation() PodResourceAllocation {
@@ -72,12 +72,12 @@ func (s *stateMemory) GetResizeStatus() PodResizeStatus {
7272
return prs
7373
}
7474

75-
func (s *stateMemory) SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceList) error {
75+
func (s *stateMemory) SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceRequirements) error {
7676
s.Lock()
7777
defer s.Unlock()
7878

7979
if _, ok := s.podAllocation[podUID]; !ok {
80-
s.podAllocation[podUID] = make(map[string]v1.ResourceList)
80+
s.podAllocation[podUID] = make(map[string]v1.ResourceRequirements)
8181
}
8282

8383
s.podAllocation[podUID][containerName] = alloc

0 commit comments

Comments
 (0)