@@ -42,50 +42,43 @@ func NewStateCheckpoint(stateDir, checkpointName string) (State, error) {
42
42
if err != nil {
43
43
return nil , fmt .Errorf ("failed to initialize checkpoint manager for pod allocation tracking: %v" , err )
44
44
}
45
+
46
+ praInfo , err := restoreState (checkpointManager , checkpointName )
47
+ if err != nil {
48
+ //lint:ignore ST1005 user-facing error message
49
+ return nil , fmt .Errorf ("could not restore state from checkpoint: %w, please drain this node and delete pod allocation checkpoint file %q before restarting Kubelet" ,
50
+ err , path .Join (stateDir , checkpointName ))
51
+ }
52
+
45
53
stateCheckpoint := & stateCheckpoint {
46
- cache : NewStateMemory (PodResourceAllocation {}, PodResizeStatus {} ),
54
+ cache : NewStateMemory (praInfo . AllocationEntries ),
47
55
checkpointManager : checkpointManager ,
48
56
checkpointName : checkpointName ,
49
57
}
50
58
51
- if err := stateCheckpoint .restoreState (); err != nil {
52
- //lint:ignore ST1005 user-facing error message
53
- return nil , fmt .Errorf ("could not restore state from checkpoint: %v, please drain this node and delete pod allocation checkpoint file %q before restarting Kubelet" , err , path .Join (stateDir , checkpointName ))
54
- }
59
+ klog .V (2 ).InfoS ("State checkpoint: restored pod resource allocation state from checkpoint" )
55
60
return stateCheckpoint , nil
56
61
}
57
62
58
63
// restores state from a checkpoint and creates it if it doesn't exist
59
- func (sc * stateCheckpoint ) restoreState () error {
60
- sc .mux .Lock ()
61
- defer sc .mux .Unlock ()
64
+ func restoreState (checkpointManager checkpointmanager.CheckpointManager , checkpointName string ) (* PodResourceAllocationInfo , error ) {
62
65
var err error
66
+ checkpoint := & Checkpoint {}
63
67
64
- checkpoint , err := NewCheckpoint (nil )
65
- if err != nil {
66
- return fmt .Errorf ("failed to create new checkpoint: %w" , err )
67
- }
68
-
69
- if err = sc .checkpointManager .GetCheckpoint (sc .checkpointName , checkpoint ); err != nil {
68
+ if err = checkpointManager .GetCheckpoint (checkpointName , checkpoint ); err != nil {
70
69
if err == errors .ErrCheckpointNotFound {
71
- return sc .storeState ()
70
+ return & PodResourceAllocationInfo {
71
+ AllocationEntries : make (map [string ]map [string ]v1.ResourceRequirements ),
72
+ }, nil
72
73
}
73
- return err
74
+ return nil , err
74
75
}
75
76
praInfo , err := checkpoint .GetPodResourceAllocationInfo ()
76
77
if err != nil {
77
- return fmt .Errorf ("failed to get pod resource allocation info: %w" , err )
78
- }
79
-
80
- for podUID , alloc := range praInfo .AllocationEntries {
81
- err = sc .cache .SetPodResourceAllocation (podUID , alloc )
82
- if err != nil {
83
- klog .ErrorS (err , "failed to set pod resource allocation" )
84
- }
78
+ return nil , fmt .Errorf ("failed to get pod resource allocation info: %w" , err )
85
79
}
86
80
87
- klog .V (2 ).InfoS ("State checkpoint: restored pod resource allocation state from checkpoint" )
88
- return nil
81
+ return praInfo , nil
89
82
}
90
83
91
84
// saves state to a checkpoint, caller is responsible for locking
@@ -135,19 +128,6 @@ func (sc *stateCheckpoint) SetContainerResourceAllocation(podUID string, contain
135
128
return sc .storeState ()
136
129
}
137
130
138
- // SetPodResourceAllocation sets pod resource allocation
139
- func (sc * stateCheckpoint ) SetPodResourceAllocation (podUID string , alloc map [string ]v1.ResourceRequirements ) error {
140
- sc .mux .Lock ()
141
- defer sc .mux .Unlock ()
142
-
143
- err := sc .cache .SetPodResourceAllocation (podUID , alloc )
144
- if err != nil {
145
- return err
146
- }
147
-
148
- return sc .storeState ()
149
- }
150
-
151
131
// SetPodResizeStatus sets the last resize decision for a pod
152
132
func (sc * stateCheckpoint ) SetPodResizeStatus (podUID string , resizeStatus v1.PodResizeStatus ) {
153
133
sc .mux .Lock ()
@@ -194,10 +174,6 @@ func (sc *noopStateCheckpoint) SetContainerResourceAllocation(_ string, _ string
194
174
return nil
195
175
}
196
176
197
- func (sc * noopStateCheckpoint ) SetPodResourceAllocation (_ string , _ map [string ]v1.ResourceRequirements ) error {
198
- return nil
199
- }
200
-
201
177
func (sc * noopStateCheckpoint ) SetPodResizeStatus (_ string , _ v1.PodResizeStatus ) {}
202
178
203
179
func (sc * noopStateCheckpoint ) Delete (_ string , _ string ) error {
0 commit comments