@@ -17,6 +17,8 @@ limitations under the License.
17
17
package allocation
18
18
19
19
import (
20
+ "path/filepath"
21
+
20
22
v1 "k8s.io/api/core/v1"
21
23
apiequality "k8s.io/apimachinery/pkg/api/equality"
22
24
"k8s.io/apimachinery/pkg/types"
@@ -29,7 +31,10 @@ import (
29
31
)
30
32
31
33
// podStatusManagerStateFile is the file name where status manager stores its state
32
- const podStatusManagerStateFile = "pod_status_manager_state"
34
+ const (
35
+ allocatedPodsStateFile = "allocated_pods_state"
36
+ actuatedPodsStateFile = "actuated_pods_state"
37
+ )
33
38
34
39
// AllocationManager tracks pod resource allocations.
35
40
type Manager interface {
@@ -41,57 +46,71 @@ type Manager interface {
41
46
// Returns the updated (or original) pod, and whether there was an allocation stored.
42
47
UpdatePodFromAllocation (pod * v1.Pod ) (* v1.Pod , bool )
43
48
44
- // SetPodAllocation checkpoints the resources allocated to a pod's containers.
45
- SetPodAllocation (pod * v1.Pod ) error
49
+ // SetAllocatedResources checkpoints the resources allocated to a pod's containers.
50
+ SetAllocatedResources (allocatedPod * v1.Pod ) error
51
+
52
+ // SetActuatedResources records the actuated resources of the given container (or the entire
53
+ // pod, if actuatedContainer is nil).
54
+ SetActuatedResources (allocatedPod * v1.Pod , actuatedContainer * v1.Container ) error
46
55
47
- // DeletePodAllocation removes any stored state for the given pod UID.
48
- DeletePodAllocation (uid types.UID )
56
+ // GetActuatedResources returns the stored actuated resources for the container, and whether they exist.
57
+ GetActuatedResources (podUID types.UID , containerName string ) (v1.ResourceRequirements , bool )
58
+
59
+ // RemovePod removes any stored state for the given pod UID.
60
+ RemovePod (uid types.UID )
49
61
50
62
// RemoveOrphanedPods removes the stored state for any pods not included in the set of remaining pods.
51
63
RemoveOrphanedPods (remainingPods sets.Set [types.UID ])
52
64
}
53
65
54
66
type manager struct {
55
- state state.State
67
+ allocated state.State
68
+ actuated state.State
56
69
}
57
70
58
71
func NewManager (checkpointDirectory string ) Manager {
59
- m := & manager {}
60
-
61
- if utilfeature .DefaultFeatureGate .Enabled (features .InPlacePodVerticalScaling ) {
62
- stateImpl , err := state .NewStateCheckpoint (checkpointDirectory , podStatusManagerStateFile )
63
- if err != nil {
64
- // This is a crictical, non-recoverable failure.
65
- klog .ErrorS (err , "Failed to initialize allocation checkpoint manager" )
66
- panic (err )
67
- }
68
- m .state = stateImpl
69
- } else {
70
- m .state = state .NewNoopStateCheckpoint ()
72
+ return & manager {
73
+ allocated : newStateImpl (checkpointDirectory , allocatedPodsStateFile ),
74
+ actuated : newStateImpl (checkpointDirectory , actuatedPodsStateFile ),
75
+ }
76
+ }
77
+
78
+ func newStateImpl (checkpointDirectory , checkpointName string ) state.State {
79
+ if ! utilfeature .DefaultFeatureGate .Enabled (features .InPlacePodVerticalScaling ) {
80
+ return state .NewNoopStateCheckpoint ()
71
81
}
72
82
73
- return m
83
+ stateImpl , err := state .NewStateCheckpoint (checkpointDirectory , checkpointName )
84
+ if err != nil {
85
+ // This is a critical, non-recoverable failure.
86
+ klog .ErrorS (err , "Failed to initialize allocation checkpoint manager" ,
87
+ "checkpointPath" , filepath .Join (checkpointDirectory , checkpointName ))
88
+ panic (err )
89
+ }
90
+
91
+ return stateImpl
74
92
}
75
93
76
94
// NewInMemoryManager returns an allocation manager that doesn't persist state.
77
95
// For testing purposes only!
78
96
func NewInMemoryManager () Manager {
79
97
return & manager {
80
- state : state .NewStateMemory (nil ),
98
+ allocated : state .NewStateMemory (nil ),
99
+ actuated : state .NewStateMemory (nil ),
81
100
}
82
101
}
83
102
84
103
// GetContainerResourceAllocation returns the last checkpointed AllocatedResources values
85
104
// If checkpoint manager has not been initialized, it returns nil, false
86
105
func (m * manager ) GetContainerResourceAllocation (podUID types.UID , containerName string ) (v1.ResourceRequirements , bool ) {
87
- return m .state .GetContainerResourceAllocation (podUID , containerName )
106
+ return m .allocated .GetContainerResourceAllocation (podUID , containerName )
88
107
}
89
108
90
109
// UpdatePodFromAllocation overwrites the pod spec with the allocation.
91
110
// This function does a deep copy only if updates are needed.
92
111
func (m * manager ) UpdatePodFromAllocation (pod * v1.Pod ) (* v1.Pod , bool ) {
93
112
// TODO(tallclair): This clones the whole cache, but we only need 1 pod.
94
- allocs := m .state .GetPodResourceAllocation ()
113
+ allocs := m .allocated .GetPodResourceAllocation ()
95
114
return updatePodFromAllocation (pod , allocs )
96
115
}
97
116
@@ -132,8 +151,12 @@ func updatePodFromAllocation(pod *v1.Pod, allocs state.PodResourceAllocation) (*
132
151
return pod , updated
133
152
}
134
153
135
- // SetPodAllocation checkpoints the resources allocated to a pod's containers
136
- func (m * manager ) SetPodAllocation (pod * v1.Pod ) error {
154
+ // SetAllocatedResources checkpoints the resources allocated to a pod's containers
155
+ func (m * manager ) SetAllocatedResources (pod * v1.Pod ) error {
156
+ return m .allocated .SetPodResourceAllocation (pod .UID , allocationFromPod (pod ))
157
+ }
158
+
159
+ func allocationFromPod (pod * v1.Pod ) map [string ]v1.ResourceRequirements {
137
160
podAlloc := make (map [string ]v1.ResourceRequirements )
138
161
for _ , container := range pod .Spec .Containers {
139
162
alloc := * container .Resources .DeepCopy ()
@@ -149,16 +172,35 @@ func (m *manager) SetPodAllocation(pod *v1.Pod) error {
149
172
}
150
173
}
151
174
152
- return m . state . SetPodResourceAllocation ( pod . UID , podAlloc )
175
+ return podAlloc
153
176
}
154
177
155
- func (m * manager ) DeletePodAllocation (uid types.UID ) {
156
- if err := m .state .Delete (uid , "" ); err != nil {
178
+ func (m * manager ) RemovePod (uid types.UID ) {
179
+ if err := m .allocated .Delete (uid , "" ); err != nil {
180
+ // If the deletion fails, it will be retried by RemoveOrphanedPods, so we can safely ignore the error.
181
+ klog .V (3 ).ErrorS (err , "Failed to delete pod allocation" , "podUID" , uid )
182
+ }
183
+
184
+ if err := m .actuated .Delete (uid , "" ); err != nil {
157
185
// If the deletion fails, it will be retried by RemoveOrphanedPods, so we can safely ignore the error.
158
186
klog .V (3 ).ErrorS (err , "Failed to delete pod allocation" , "podUID" , uid )
159
187
}
160
188
}
161
189
162
190
func (m * manager ) RemoveOrphanedPods (remainingPods sets.Set [types.UID ]) {
163
- m .state .RemoveOrphanedPods (remainingPods )
191
+ m .allocated .RemoveOrphanedPods (remainingPods )
192
+ m .actuated .RemoveOrphanedPods (remainingPods )
193
+ }
194
+
195
+ func (m * manager ) SetActuatedResources (allocatedPod * v1.Pod , actuatedContainer * v1.Container ) error {
196
+ if actuatedContainer == nil {
197
+ alloc := allocationFromPod (allocatedPod )
198
+ return m .actuated .SetPodResourceAllocation (allocatedPod .UID , alloc )
199
+ }
200
+
201
+ return m .actuated .SetContainerResourceAllocation (allocatedPod .UID , actuatedContainer .Name , actuatedContainer .Resources )
202
+ }
203
+
204
+ func (m * manager ) GetActuatedResources (podUID types.UID , containerName string ) (v1.ResourceRequirements , bool ) {
205
+ return m .actuated .GetContainerResourceAllocation (podUID , containerName )
164
206
}
0 commit comments