Skip to content

Commit ea53083

Browse files
committed
Don't checkpoint ResizeStatus
1 parent 4a4748d commit ea53083

File tree

10 files changed

+35
-105
lines changed

10 files changed

+35
-105
lines changed

pkg/kubelet/kubelet.go

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,6 @@ import (
114114
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
115115
"k8s.io/kubernetes/pkg/kubelet/userns"
116116
"k8s.io/kubernetes/pkg/kubelet/util"
117-
"k8s.io/kubernetes/pkg/kubelet/util/format"
118117
"k8s.io/kubernetes/pkg/kubelet/util/manager"
119118
"k8s.io/kubernetes/pkg/kubelet/util/queue"
120119
"k8s.io/kubernetes/pkg/kubelet/util/sliceutils"
@@ -2799,25 +2798,22 @@ func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, v1.PodResizeStatus) {
27992798
func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod, podStatus *kubecontainer.PodStatus) (*v1.Pod, error) {
28002799
allocatedPod, updated := kl.statusManager.UpdatePodFromAllocation(pod)
28012800
if !updated {
2801+
// Desired resources == allocated resources. Check whether a resize is in progress.
28022802
resizeInProgress := !allocatedResourcesMatchStatus(allocatedPod, podStatus)
28032803
if resizeInProgress {
2804-
// If a resize in progress, make sure the cache has the correct state in case the Kubelet restarted.
2805-
if err := kl.statusManager.SetPodResizeStatus(pod.UID, v1.PodResizeStatusInProgress); err != nil {
2806-
klog.ErrorS(err, "Failed to set resize status to InProgress", "pod", format.Pod(pod))
2807-
}
2804+
// If a resize is in progress, make sure the cache has the correct state in case the Kubelet restarted.
2805+
kl.statusManager.SetPodResizeStatus(pod.UID, v1.PodResizeStatusInProgress)
28082806
} else {
28092807
// (Desired == Allocated == Actual) => clear the resize status.
2810-
if err := kl.statusManager.SetPodResizeStatus(pod.UID, ""); err != nil {
2811-
klog.ErrorS(err, "Failed to clear resize status", "pod", format.Pod(pod))
2812-
}
2808+
kl.statusManager.SetPodResizeStatus(pod.UID, "")
28132809
}
2814-
2815-
// Pod is not resizing, nothing more to do here.
2810+
// Pod allocation does not need to be updated.
28162811
return allocatedPod, nil
28172812
}
28182813

28192814
kl.podResizeMutex.Lock()
28202815
defer kl.podResizeMutex.Unlock()
2816+
// Desired resources != allocated resources. Can we update the allocation to the desired resources?
28212817
fit, resizeStatus := kl.canResizePod(pod)
28222818
if fit {
28232819
// Update pod resource allocation checkpoint
@@ -2827,9 +2823,7 @@ func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod, podStatus *kubecontaine
28272823
allocatedPod = pod
28282824
}
28292825
if resizeStatus != "" {
2830-
if err := kl.statusManager.SetPodResizeStatus(pod.UID, resizeStatus); err != nil {
2831-
klog.ErrorS(err, "Failed to set resize status", "pod", format.Pod(pod), "resizeStatus", resizeStatus)
2832-
}
2826+
kl.statusManager.SetPodResizeStatus(pod.UID, resizeStatus)
28332827
}
28342828
return allocatedPod, nil
28352829
}

pkg/kubelet/kubelet_pods.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1750,13 +1750,11 @@ func (kl *Kubelet) determinePodResizeStatus(allocatedPod *v1.Pod, podStatus *kub
17501750

17511751
// If pod is terminal, clear the resize status.
17521752
if podIsTerminal {
1753-
if err := kl.statusManager.SetPodResizeStatus(allocatedPod.UID, ""); err != nil {
1754-
klog.ErrorS(err, "SetPodResizeStatus failed for terminal pod", "pod", format.Pod(allocatedPod))
1755-
}
1753+
kl.statusManager.SetPodResizeStatus(allocatedPod.UID, "")
17561754
return ""
17571755
}
17581756

1759-
resizeStatus, _ := kl.statusManager.GetPodResizeStatus(string(allocatedPod.UID))
1757+
resizeStatus := kl.statusManager.GetPodResizeStatus(allocatedPod.UID)
17601758
return resizeStatus
17611759
}
17621760

pkg/kubelet/kubelet_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2777,7 +2777,7 @@ func TestHandlePodResourcesResize(t *testing.T) {
27772777
require.True(t, found, "container allocation")
27782778
assert.Equal(t, tt.expectedAllocations, alloc.Requests, "stored container allocation")
27792779

2780-
resizeStatus, _ := kubelet.statusManager.GetPodResizeStatus(string(newPod.UID))
2780+
resizeStatus := kubelet.statusManager.GetPodResizeStatus(newPod.UID)
27812781
assert.Equal(t, tt.expectedResize, resizeStatus)
27822782
})
27832783
}

pkg/kubelet/status/fake_status_manager.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ func (m *fakeManager) GetContainerResourceAllocation(podUID string, containerNam
6868
return m.state.GetContainerResourceAllocation(podUID, containerName)
6969
}
7070

71-
func (m *fakeManager) GetPodResizeStatus(podUID string) (v1.PodResizeStatus, bool) {
72-
return m.state.GetPodResizeStatus(podUID)
71+
func (m *fakeManager) GetPodResizeStatus(podUID types.UID) v1.PodResizeStatus {
72+
return m.state.GetPodResizeStatus(string(podUID))
7373
}
7474

7575
func (m *fakeManager) UpdatePodFromAllocation(pod *v1.Pod) (*v1.Pod, bool) {
@@ -86,8 +86,8 @@ func (m *fakeManager) SetPodAllocation(pod *v1.Pod) error {
8686
return nil
8787
}
8888

89-
func (m *fakeManager) SetPodResizeStatus(podUID types.UID, resizeStatus v1.PodResizeStatus) error {
90-
return m.state.SetPodResizeStatus(string(podUID), resizeStatus)
89+
func (m *fakeManager) SetPodResizeStatus(podUID types.UID, resizeStatus v1.PodResizeStatus) {
90+
m.state.SetPodResizeStatus(string(podUID), resizeStatus)
9191
}
9292

9393
// NewFakeManager creates empty/fake memory manager

pkg/kubelet/status/state/checkpoint.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ import (
2828
var _ checkpointmanager.Checkpoint = &Checkpoint{}
2929

3030
type PodResourceAllocationInfo struct {
31-
AllocationEntries map[string]map[string]v1.ResourceRequirements `json:"allocationEntries,omitempty"`
32-
ResizeStatusEntries map[string]v1.PodResizeStatus `json:"resizeStatusEntries,omitempty"`
31+
AllocationEntries map[string]map[string]v1.ResourceRequirements `json:"allocationEntries,omitempty"`
3332
}
3433

3534
// Checkpoint represents a structure to store pod resource allocation checkpoint data

pkg/kubelet/status/state/state.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,13 @@ func (pr PodResourceAllocation) Clone() PodResourceAllocation {
4242
type Reader interface {
4343
GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceRequirements, bool)
4444
GetPodResourceAllocation() PodResourceAllocation
45-
GetPodResizeStatus(podUID string) (v1.PodResizeStatus, bool)
46-
GetResizeStatus() PodResizeStatus
45+
GetPodResizeStatus(podUID string) v1.PodResizeStatus
4746
}
4847

4948
type writer interface {
5049
SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceRequirements) error
5150
SetPodResourceAllocation(PodResourceAllocation) error
52-
SetPodResizeStatus(podUID string, resizeStatus v1.PodResizeStatus) error
53-
SetResizeStatus(PodResizeStatus) error
51+
SetPodResizeStatus(podUID string, resizeStatus v1.PodResizeStatus)
5452
Delete(podUID string, containerName string) error
5553
ClearState() error
5654
}

pkg/kubelet/status/state/state_checkpoint.go

Lines changed: 6 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,6 @@ func (sc *stateCheckpoint) restoreState() error {
8080
if err != nil {
8181
return fmt.Errorf("failed to set pod resource allocation: %w", err)
8282
}
83-
err = sc.cache.SetResizeStatus(praInfo.ResizeStatusEntries)
84-
if err != nil {
85-
return fmt.Errorf("failed to set resize status: %w", err)
86-
}
8783
klog.V(2).InfoS("State checkpoint: restored pod resource allocation state from checkpoint")
8884
return nil
8985
}
@@ -92,10 +88,8 @@ func (sc *stateCheckpoint) restoreState() error {
9288
func (sc *stateCheckpoint) storeState() error {
9389
podAllocation := sc.cache.GetPodResourceAllocation()
9490

95-
podResizeStatus := sc.cache.GetResizeStatus()
9691
checkpoint, err := NewCheckpoint(&PodResourceAllocationInfo{
97-
AllocationEntries: podAllocation,
98-
ResizeStatusEntries: podResizeStatus,
92+
AllocationEntries: podAllocation,
9993
})
10094
if err != nil {
10195
return fmt.Errorf("failed to create checkpoint: %w", err)
@@ -123,19 +117,12 @@ func (sc *stateCheckpoint) GetPodResourceAllocation() PodResourceAllocation {
123117
}
124118

125119
// GetPodResizeStatus returns the last resize decision for a pod
126-
func (sc *stateCheckpoint) GetPodResizeStatus(podUID string) (v1.PodResizeStatus, bool) {
120+
func (sc *stateCheckpoint) GetPodResizeStatus(podUID string) v1.PodResizeStatus {
127121
sc.mux.RLock()
128122
defer sc.mux.RUnlock()
129123
return sc.cache.GetPodResizeStatus(podUID)
130124
}
131125

132-
// GetResizeStatus returns the set of resize decisions made
133-
func (sc *stateCheckpoint) GetResizeStatus() PodResizeStatus {
134-
sc.mux.RLock()
135-
defer sc.mux.RUnlock()
136-
return sc.cache.GetResizeStatus()
137-
}
138-
139126
// SetContainerResourceAllocation sets resources allocated to a pod's container
140127
func (sc *stateCheckpoint) SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceRequirements) error {
141128
sc.mux.Lock()
@@ -153,19 +140,10 @@ func (sc *stateCheckpoint) SetPodResourceAllocation(a PodResourceAllocation) err
153140
}
154141

155142
// SetPodResizeStatus sets the last resize decision for a pod
156-
func (sc *stateCheckpoint) SetPodResizeStatus(podUID string, resizeStatus v1.PodResizeStatus) error {
143+
func (sc *stateCheckpoint) SetPodResizeStatus(podUID string, resizeStatus v1.PodResizeStatus) {
157144
sc.mux.Lock()
158145
defer sc.mux.Unlock()
159146
sc.cache.SetPodResizeStatus(podUID, resizeStatus)
160-
return sc.storeState()
161-
}
162-
163-
// SetResizeStatus sets the resize decisions
164-
func (sc *stateCheckpoint) SetResizeStatus(rs PodResizeStatus) error {
165-
sc.mux.Lock()
166-
defer sc.mux.Unlock()
167-
sc.cache.SetResizeStatus(rs)
168-
return sc.storeState()
169147
}
170148

171149
// Delete deletes allocations for specified pod
@@ -199,12 +177,8 @@ func (sc *noopStateCheckpoint) GetPodResourceAllocation() PodResourceAllocation
199177
return nil
200178
}
201179

202-
func (sc *noopStateCheckpoint) GetPodResizeStatus(_ string) (v1.PodResizeStatus, bool) {
203-
return "", false
204-
}
205-
206-
func (sc *noopStateCheckpoint) GetResizeStatus() PodResizeStatus {
207-
return nil
180+
func (sc *noopStateCheckpoint) GetPodResizeStatus(_ string) v1.PodResizeStatus {
181+
return ""
208182
}
209183

210184
func (sc *noopStateCheckpoint) SetContainerResourceAllocation(_ string, _ string, _ v1.ResourceRequirements) error {
@@ -215,13 +189,7 @@ func (sc *noopStateCheckpoint) SetPodResourceAllocation(_ PodResourceAllocation)
215189
return nil
216190
}
217191

218-
func (sc *noopStateCheckpoint) SetPodResizeStatus(_ string, _ v1.PodResizeStatus) error {
219-
return nil
220-
}
221-
222-
func (sc *noopStateCheckpoint) SetResizeStatus(_ PodResizeStatus) error {
223-
return nil
224-
}
192+
func (sc *noopStateCheckpoint) SetPodResizeStatus(_ string, _ v1.PodResizeStatus) {}
225193

226194
func (sc *noopStateCheckpoint) Delete(_ string, _ string) error {
227195
return nil

pkg/kubelet/status/state/state_checkpoint_test.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,6 @@ func Test_stateCheckpoint_formatUpgraded(t *testing.T) {
146146
},
147147
},
148148
},
149-
ResizeStatusEntries: map[string]v1.PodResizeStatus{},
150149
}
151150
checkpoint := &Checkpoint{}
152151
err := checkpoint.UnmarshalCheckpoint([]byte(checkpointContent))
@@ -160,7 +159,6 @@ func Test_stateCheckpoint_formatUpgraded(t *testing.T) {
160159

161160
actualPodResourceAllocationInfo := &PodResourceAllocationInfo{}
162161
actualPodResourceAllocationInfo.AllocationEntries = sc.cache.GetPodResourceAllocation()
163-
actualPodResourceAllocationInfo.ResizeStatusEntries = sc.cache.GetResizeStatus()
164162
require.NoError(t, err, "failed to get pod resource allocation info")
165163
require.Equal(t, expectedPodResourceAllocationInfo, actualPodResourceAllocationInfo, "pod resource allocation info is not equal")
166164
}

pkg/kubelet/status/state/state_mem.go

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -54,22 +54,11 @@ func (s *stateMemory) GetPodResourceAllocation() PodResourceAllocation {
5454
return s.podAllocation.Clone()
5555
}
5656

57-
func (s *stateMemory) GetPodResizeStatus(podUID string) (v1.PodResizeStatus, bool) {
57+
func (s *stateMemory) GetPodResizeStatus(podUID string) v1.PodResizeStatus {
5858
s.RLock()
5959
defer s.RUnlock()
6060

61-
resizeStatus, ok := s.podResizeStatus[podUID]
62-
return resizeStatus, ok
63-
}
64-
65-
func (s *stateMemory) GetResizeStatus() PodResizeStatus {
66-
s.RLock()
67-
defer s.RUnlock()
68-
prs := make(map[string]v1.PodResizeStatus)
69-
for k, v := range s.podResizeStatus {
70-
prs[k] = v
71-
}
72-
return prs
61+
return s.podResizeStatus[podUID]
7362
}
7463

7564
func (s *stateMemory) SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceRequirements) error {
@@ -94,7 +83,7 @@ func (s *stateMemory) SetPodResourceAllocation(a PodResourceAllocation) error {
9483
return nil
9584
}
9685

97-
func (s *stateMemory) SetPodResizeStatus(podUID string, resizeStatus v1.PodResizeStatus) error {
86+
func (s *stateMemory) SetPodResizeStatus(podUID string, resizeStatus v1.PodResizeStatus) {
9887
s.Lock()
9988
defer s.Unlock()
10089

@@ -104,19 +93,6 @@ func (s *stateMemory) SetPodResizeStatus(podUID string, resizeStatus v1.PodResiz
10493
delete(s.podResizeStatus, podUID)
10594
}
10695
klog.V(3).InfoS("Updated pod resize state", "podUID", podUID, "resizeStatus", resizeStatus)
107-
return nil
108-
}
109-
110-
func (s *stateMemory) SetResizeStatus(rs PodResizeStatus) error {
111-
s.Lock()
112-
defer s.Unlock()
113-
prs := make(map[string]v1.PodResizeStatus)
114-
for k, v := range rs {
115-
prs[k] = v
116-
}
117-
s.podResizeStatus = prs
118-
klog.V(3).InfoS("Updated pod resize state", "resizes", rs)
119-
return nil
12096
}
12197

12298
func (s *stateMemory) deleteContainer(podUID string, containerName string) {

pkg/kubelet/status/status_manager.go

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,11 @@ type Manager interface {
143143
// the provided podUIDs.
144144
RemoveOrphanedStatuses(podUIDs map[types.UID]bool)
145145

146-
// GetPodResizeStatus returns checkpointed PodStatus.Resize value
147-
GetPodResizeStatus(podUID string) (v1.PodResizeStatus, bool)
146+
// GetPodResizeStatus returns cached PodStatus.Resize value
147+
GetPodResizeStatus(podUID types.UID) v1.PodResizeStatus
148148

149-
// SetPodResizeStatus checkpoints the last resizing decision for the pod.
150-
SetPodResizeStatus(podUID types.UID, resize v1.PodResizeStatus) error
149+
// SetPodResizeStatus caches the last resizing decision for the pod.
150+
SetPodResizeStatus(podUID types.UID, resize v1.PodResizeStatus)
151151

152152
allocationManager
153153
}
@@ -285,12 +285,11 @@ func updatePodFromAllocation(pod *v1.Pod, allocs state.PodResourceAllocation) (*
285285
return pod, updated
286286
}
287287

288-
// GetPodResizeStatus returns the last checkpointed ResizeStaus value
289-
// If checkpoint manager has not been initialized, it returns nil, false
290-
func (m *manager) GetPodResizeStatus(podUID string) (v1.PodResizeStatus, bool) {
288+
// GetPodResizeStatus returns the last cached ResizeStatus value.
289+
func (m *manager) GetPodResizeStatus(podUID types.UID) v1.PodResizeStatus {
291290
m.podStatusesLock.RLock()
292291
defer m.podStatusesLock.RUnlock()
293-
return m.state.GetPodResizeStatus(podUID)
292+
return m.state.GetPodResizeStatus(string(podUID))
294293
}
295294

296295
// SetPodAllocation checkpoints the resources allocated to a pod's containers
@@ -307,10 +306,10 @@ func (m *manager) SetPodAllocation(pod *v1.Pod) error {
307306
}
308307

309308
// SetPodResizeStatus checkpoints the last resizing decision for the pod.
310-
func (m *manager) SetPodResizeStatus(podUID types.UID, resizeStatus v1.PodResizeStatus) error {
309+
func (m *manager) SetPodResizeStatus(podUID types.UID, resizeStatus v1.PodResizeStatus) {
311310
m.podStatusesLock.RLock()
312311
defer m.podStatusesLock.RUnlock()
313-
return m.state.SetPodResizeStatus(string(podUID), resizeStatus)
312+
m.state.SetPodResizeStatus(string(podUID), resizeStatus)
314313
}
315314

316315
func (m *manager) GetPodStatus(uid types.UID) (v1.PodStatus, bool) {

0 commit comments

Comments
 (0)