Skip to content

Commit b00f221

Browse files
committed
fix InPlacePodVerticalScaling restore bug: the content wrote to and read from file pod_status are different
1 parent 114d4df commit b00f221

File tree

2 files changed

+46
-43
lines changed

2 files changed

+46
-43
lines changed

pkg/kubelet/status/state/checkpoint.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package state
1818

1919
import (
2020
"encoding/json"
21+
"fmt"
2122
"k8s.io/api/core/v1"
2223
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
2324
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
@@ -32,7 +33,7 @@ type PodResourceAllocationInfo struct {
3233

3334
// Checkpoint represents a structure to store pod resource allocation checkpoint data
3435
type Checkpoint struct {
35-
// Data is a JSON serialized checkpoint data
36+
// Data is a serialized PodResourceAllocationInfo
3637
Data string `json:"data"`
3738
// Checksum is a checksum of Data
3839
Checksum checksum.Checksum `json:"checksum"`
@@ -47,7 +48,7 @@ func NewCheckpoint(allocations *PodResourceAllocationInfo) (*Checkpoint, error)
4748
}
4849

4950
cp := &Checkpoint{
50-
Data: string(praData),
51+
Data: string(serializedAllocations),
5152
}
5253
cp.Checksum = checksum.New(cp.Data)
5354
return cp, nil

pkg/kubelet/status/state/state_checkpoint_test.go

Lines changed: 43 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -28,17 +28,19 @@ import (
2828
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
2929
)
3030

31+
const testCheckpoint = "pod_status_manager_state"
32+
3133
func newTestStateCheckpoint(t *testing.T) *stateCheckpoint {
3234
// create temp dir
3335
testingDir, err := os.MkdirTemp("", "pod_resource_allocation_state_test")
3436
if err != nil {
3537
t.Fatal(err)
3638
}
37-
defer func() {
39+
t.Cleanup(func() {
3840
if err := os.RemoveAll(testingDir); err != nil {
3941
t.Fatal(err)
4042
}
41-
}()
43+
})
4244
cache := NewStateMemory()
4345
checkpointManager, err := checkpointmanager.NewCheckpointManager(testingDir)
4446
require.NoError(t, err, "failed to create checkpoint manager")
@@ -51,8 +53,31 @@ func newTestStateCheckpoint(t *testing.T) *stateCheckpoint {
5153
return sc
5254
}
5355

56+
func getTestDir(t *testing.T) string {
57+
testingDir, err := os.MkdirTemp("", "pod_resource_allocation_state_test")
58+
if err != nil {
59+
t.Fatal(err)
60+
}
61+
t.Cleanup(func() {
62+
if err := os.RemoveAll(testingDir); err != nil {
63+
t.Fatal(err)
64+
}
65+
})
66+
return testingDir
67+
}
68+
69+
func verifyPodResourceAllocation(t *testing.T, expected, actual *PodResourceAllocation, msgAndArgs string) {
70+
for podUID, containerResourceList := range *expected {
71+
require.Equal(t, len(containerResourceList), len((*actual)[podUID]), msgAndArgs)
72+
for containerName, resourceList := range containerResourceList {
73+
for name, quantity := range resourceList.Requests {
74+
require.True(t, quantity.Equal((*actual)[podUID][containerName].Requests[name]), msgAndArgs)
75+
}
76+
}
77+
}
78+
}
79+
5480
func Test_stateCheckpoint_storeState(t *testing.T) {
55-
sc := newTestStateCheckpoint(t)
5681
type args struct {
5782
podResourceAllocation PodResourceAllocation
5883
}
@@ -92,44 +117,21 @@ func Test_stateCheckpoint_storeState(t *testing.T) {
92117
}
93118
for _, tt := range tests {
94119
t.Run(tt.name, func(t *testing.T) {
95-
err := sc.cache.ClearState()
96-
require.NoError(t, err, "failed to clear state")
97-
98-
defer func() {
99-
err = sc.checkpointManager.RemoveCheckpoint(sc.checkpointName)
100-
require.NoError(t, err, "failed to remove checkpoint")
101-
}()
102-
103-
err = sc.cache.SetPodResourceAllocation(tt.args.podResourceAllocation)
104-
require.NoError(t, err, "failed to set pod resource allocation")
105-
106-
err = sc.storeState()
107-
require.NoError(t, err, "failed to store state")
108-
109-
// deep copy cache
110-
originCache := NewStateMemory()
111-
podAllocation := sc.cache.GetPodResourceAllocation()
112-
err = originCache.SetPodResourceAllocation(podAllocation)
113-
require.NoError(t, err, "failed to set pod resource allocation")
114-
115-
err = sc.cache.ClearState()
116-
require.NoError(t, err, "failed to clear state")
117-
118-
err = sc.restoreState()
119-
require.NoError(t, err, "failed to restore state")
120-
121-
restoredCache := sc.cache
122-
require.Equal(t, len(originCache.GetPodResourceAllocation()), len(restoredCache.GetPodResourceAllocation()), "restored pod resource allocation is not equal to original pod resource allocation")
123-
for podUID, containerResourceList := range originCache.GetPodResourceAllocation() {
124-
require.Equal(t, len(containerResourceList), len(restoredCache.GetPodResourceAllocation()[podUID]), "restored pod resource allocation is not equal to original pod resource allocation")
125-
for containerName, resourceList := range containerResourceList {
126-
for name, quantity := range resourceList.Requests {
127-
if !quantity.Equal(restoredCache.GetPodResourceAllocation()[podUID][containerName].Requests[name]) {
128-
t.Errorf("restored pod resource allocation is not equal to original pod resource allocation")
129-
}
130-
}
131-
}
132-
}
120+
testDir := getTestDir(t)
121+
originalSC, err := NewStateCheckpoint(testDir, testCheckpoint)
122+
require.NoError(t, err)
123+
124+
err = originalSC.SetPodResourceAllocation(tt.args.podResourceAllocation)
125+
require.NoError(t, err)
126+
127+
actual := originalSC.GetPodResourceAllocation()
128+
verifyPodResourceAllocation(t, &tt.args.podResourceAllocation, &actual, "stored pod resource allocation is not equal to original pod resource allocation")
129+
130+
newSC, err := NewStateCheckpoint(testDir, testCheckpoint)
131+
require.NoError(t, err)
132+
133+
actual = newSC.GetPodResourceAllocation()
134+
verifyPodResourceAllocation(t, &tt.args.podResourceAllocation, &actual, "stored pod resource allocation is not equal to original pod resource allocation")
133135
})
134136
}
135137
}

0 commit comments

Comments
 (0)