Skip to content

Commit e094e5e

Browse files
authored
Merge pull request kubernetes#129684 from swatisehgal/mm-mgr-logs-improvements
Memory Manager logging improvements
2 parents a02fe24 + ecd67e2 commit e094e5e

File tree

4 files changed

+31
-26
lines changed

4 files changed

+31
-26
lines changed

pkg/kubelet/cm/memorymanager/memory_manager.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ func (m *manager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesRe
205205

206206
m.allocatableMemory = m.policy.GetAllocatableMemory(m.state)
207207

208+
klog.V(4).InfoS("memorymanager started", "policy", m.policy.Name())
208209
return nil
209210
}
210211

@@ -248,7 +249,7 @@ func (m *manager) GetMemoryNUMANodes(pod *v1.Pod, container *v1.Container) sets.
248249
}
249250

250251
if numaNodes.Len() == 0 {
251-
klog.V(5).InfoS("No allocation is available", "pod", klog.KObj(pod), "containerName", container.Name)
252+
klog.V(5).InfoS("NUMA nodes not available for allocation", "pod", klog.KObj(pod), "containerName", container.Name)
252253
return nil
253254
}
254255

@@ -266,7 +267,7 @@ func (m *manager) Allocate(pod *v1.Pod, container *v1.Container) error {
266267

267268
// Call down into the policy to assign this container memory if required.
268269
if err := m.policy.Allocate(m.state, pod, container); err != nil {
269-
klog.ErrorS(err, "Allocate error")
270+
klog.ErrorS(err, "Allocate error", "pod", klog.KObj(pod), "containerName", container.Name)
270271
return err
271272
}
272273
return nil
@@ -280,7 +281,7 @@ func (m *manager) RemoveContainer(containerID string) error {
280281
// if error appears it means container entry already does not exist under the container map
281282
podUID, containerName, err := m.containerMap.GetContainerRef(containerID)
282283
if err != nil {
283-
klog.InfoS("Failed to get container from container map", "containerID", containerID, "err", err)
284+
klog.ErrorS(err, "Failed to get container from container map", "containerID", containerID)
284285
return nil
285286
}
286287

@@ -344,15 +345,15 @@ func (m *manager) removeStaleState() {
344345
for podUID := range assignments {
345346
for containerName := range assignments[podUID] {
346347
if _, ok := activeContainers[podUID][containerName]; !ok {
347-
klog.InfoS("RemoveStaleState removing state", "podUID", podUID, "containerName", containerName)
348+
klog.V(2).InfoS("RemoveStaleState removing state", "podUID", podUID, "containerName", containerName)
348349
m.policyRemoveContainerByRef(podUID, containerName)
349350
}
350351
}
351352
}
352353

353354
m.containerMap.Visit(func(podUID, containerName, containerID string) {
354355
if _, ok := activeContainers[podUID][containerName]; !ok {
355-
klog.InfoS("RemoveStaleState removing state", "podUID", podUID, "containerName", containerName)
356+
klog.V(2).InfoS("RemoveStaleState removing state", "podUID", podUID, "containerName", containerName)
356357
m.policyRemoveContainerByRef(podUID, containerName)
357358
}
358359
})

pkg/kubelet/cm/memorymanager/policy_static.go

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,9 @@ func (p *staticPolicy) Start(s state.State) error {
9696
// Allocate call is idempotent
9797
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) (rerr error) {
9898
// allocate the memory only for guaranteed pods
99-
if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed {
99+
qos := v1qos.GetPodQOS(pod)
100+
if qos != v1.PodQOSGuaranteed {
101+
klog.V(5).InfoS("Exclusive memory allocation skipped, pod QoS is not guaranteed", "pod", klog.KObj(pod), "containerName", container.Name, "qos", qos)
100102
return nil
101103
}
102104

@@ -196,6 +198,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
196198
// TODO: we should refactor our state structs to reflect the amount of the re-used memory
197199
p.updateInitContainersMemoryBlocks(s, pod, container, containerBlocks)
198200

201+
klog.V(4).InfoS("Allocated exclusive memory", "pod", klog.KObj(pod), "containerName", container.Name)
199202
return nil
200203
}
201204

@@ -304,24 +307,24 @@ func regenerateHints(pod *v1.Pod, ctn *v1.Container, ctnBlocks []state.Block, re
304307
}
305308

306309
if len(ctnBlocks) != len(reqRsrc) {
307-
klog.ErrorS(nil, "The number of requested resources by the container differs from the number of memory blocks", "containerName", ctn.Name)
310+
klog.InfoS("The number of requested resources by the container differs from the number of memory blocks", "pod", klog.KObj(pod), "containerName", ctn.Name)
308311
return nil
309312
}
310313

311314
for _, b := range ctnBlocks {
312315
if _, ok := reqRsrc[b.Type]; !ok {
313-
klog.ErrorS(nil, "Container requested resources do not have resource of this type", "containerName", ctn.Name, "type", b.Type)
316+
klog.InfoS("Container requested resources but none available of this type", "pod", klog.KObj(pod), "containerName", ctn.Name, "type", b.Type)
314317
return nil
315318
}
316319

317320
if b.Size != reqRsrc[b.Type] {
318-
klog.ErrorS(nil, "Memory already allocated with different numbers than requested", "podUID", pod.UID, "type", b.Type, "containerName", ctn.Name, "requestedResource", reqRsrc[b.Type], "allocatedSize", b.Size)
321+
klog.InfoS("Memory already allocated with different numbers than requested", "pod", klog.KObj(pod), "containerName", ctn.Name, "type", b.Type, "requestedResource", reqRsrc[b.Type], "allocatedSize", b.Size)
319322
return nil
320323
}
321324

322325
containerNUMAAffinity, err := bitmask.NewBitMask(b.NUMAAffinity...)
323326
if err != nil {
324-
klog.ErrorS(err, "Failed to generate NUMA bitmask")
327+
klog.ErrorS(err, "Failed to generate NUMA bitmask", "pod", klog.KObj(pod), "containerName", ctn.Name, "type", b.Type)
325328
return nil
326329
}
327330

@@ -660,36 +663,36 @@ func (p *staticPolicy) validateState(s state.State) error {
660663

661664
func areMachineStatesEqual(ms1, ms2 state.NUMANodeMap) bool {
662665
if len(ms1) != len(ms2) {
663-
klog.ErrorS(nil, "Node states are different", "lengthNode1", len(ms1), "lengthNode2", len(ms2))
666+
klog.InfoS("Node states were different", "lengthNode1", len(ms1), "lengthNode2", len(ms2))
664667
return false
665668
}
666669

667670
for nodeID, nodeState1 := range ms1 {
668671
nodeState2, ok := ms2[nodeID]
669672
if !ok {
670-
klog.ErrorS(nil, "Node state does not have node ID", "nodeID", nodeID)
673+
klog.InfoS("Node state didn't have node ID", "nodeID", nodeID)
671674
return false
672675
}
673676

674677
if nodeState1.NumberOfAssignments != nodeState2.NumberOfAssignments {
675-
klog.ErrorS(nil, "Node states number of assignments are different", "assignment1", nodeState1.NumberOfAssignments, "assignment2", nodeState2.NumberOfAssignments)
678+
klog.InfoS("Node state had a different number of memory assignments.", "assignment1", nodeState1.NumberOfAssignments, "assignment2", nodeState2.NumberOfAssignments)
676679
return false
677680
}
678681

679682
if !areGroupsEqual(nodeState1.Cells, nodeState2.Cells) {
680-
klog.ErrorS(nil, "Node states groups are different", "stateNode1", nodeState1.Cells, "stateNode2", nodeState2.Cells)
683+
klog.InfoS("Node states had different groups", "stateNode1", nodeState1.Cells, "stateNode2", nodeState2.Cells)
681684
return false
682685
}
683686

684687
if len(nodeState1.MemoryMap) != len(nodeState2.MemoryMap) {
685-
klog.ErrorS(nil, "Node states memory map have different lengths", "lengthNode1", len(nodeState1.MemoryMap), "lengthNode2", len(nodeState2.MemoryMap))
688+
klog.InfoS("Node state had memory maps of different lengths", "lengthNode1", len(nodeState1.MemoryMap), "lengthNode2", len(nodeState2.MemoryMap))
686689
return false
687690
}
688691

689692
for resourceName, memoryState1 := range nodeState1.MemoryMap {
690693
memoryState2, ok := nodeState2.MemoryMap[resourceName]
691694
if !ok {
692-
klog.ErrorS(nil, "Memory state does not have resource", "resource", resourceName)
695+
klog.InfoS("Memory state didn't have resource", "resource", resourceName)
693696
return false
694697
}
695698

@@ -707,11 +710,11 @@ func areMachineStatesEqual(ms1, ms2 state.NUMANodeMap) bool {
707710
}
708711

709712
if tmpState1.Free != tmpState2.Free {
710-
klog.InfoS("Memory states for the NUMA node and resource are different", "node", nodeID, "resource", resourceName, "field", "free", "free1", tmpState1.Free, "free2", tmpState2.Free, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
713+
klog.InfoS("NUMA node and resource had different memory states", "node", nodeID, "resource", resourceName, "field", "free", "free1", tmpState1.Free, "free2", tmpState2.Free, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
711714
return false
712715
}
713716
if tmpState1.Reserved != tmpState2.Reserved {
714-
klog.InfoS("Memory states for the NUMA node and resource are different", "node", nodeID, "resource", resourceName, "field", "reserved", "reserved1", tmpState1.Reserved, "reserved2", tmpState2.Reserved, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
717+
klog.InfoS("NUMA node and resource had different memory states", "node", nodeID, "resource", resourceName, "field", "reserved", "reserved1", tmpState1.Reserved, "reserved2", tmpState2.Reserved, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
715718
return false
716719
}
717720
}
@@ -721,17 +724,17 @@ func areMachineStatesEqual(ms1, ms2 state.NUMANodeMap) bool {
721724

722725
func areMemoryStatesEqual(memoryState1, memoryState2 *state.MemoryTable, nodeID int, resourceName v1.ResourceName) bool {
723726
if memoryState1.TotalMemSize != memoryState2.TotalMemSize {
724-
klog.ErrorS(nil, "Memory states for the NUMA node and resource are different", "node", nodeID, "resource", resourceName, "field", "TotalMemSize", "TotalMemSize1", memoryState1.TotalMemSize, "TotalMemSize2", memoryState2.TotalMemSize, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
727+
klog.InfoS("Memory states for the NUMA node and resource are different", "node", nodeID, "resource", resourceName, "field", "TotalMemSize", "TotalMemSize1", memoryState1.TotalMemSize, "TotalMemSize2", memoryState2.TotalMemSize, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
725728
return false
726729
}
727730

728731
if memoryState1.SystemReserved != memoryState2.SystemReserved {
729-
klog.ErrorS(nil, "Memory states for the NUMA node and resource are different", "node", nodeID, "resource", resourceName, "field", "SystemReserved", "SystemReserved1", memoryState1.SystemReserved, "SystemReserved2", memoryState2.SystemReserved, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
732+
klog.InfoS("Memory states for the NUMA node and resource are different", "node", nodeID, "resource", resourceName, "field", "SystemReserved", "SystemReserved1", memoryState1.SystemReserved, "SystemReserved2", memoryState2.SystemReserved, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
730733
return false
731734
}
732735

733736
if memoryState1.Allocatable != memoryState2.Allocatable {
734-
klog.ErrorS(nil, "Memory states for the NUMA node and resource are different", "node", nodeID, "resource", resourceName, "field", "Allocatable", "Allocatable1", memoryState1.Allocatable, "Allocatable2", memoryState2.Allocatable, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
737+
klog.InfoS("Memory states for the NUMA node and resource are different", "node", nodeID, "resource", resourceName, "field", "Allocatable", "Allocatable1", memoryState1.Allocatable, "Allocatable2", memoryState2.Allocatable, "memoryState1", *memoryState1, "memoryState2", *memoryState2)
735738
return false
736739
}
737740
return true

pkg/kubelet/cm/memorymanager/state/state_checkpoint.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ func (sc *stateCheckpoint) SetMachineState(memoryMap NUMANodeMap) {
131131
sc.cache.SetMachineState(memoryMap)
132132
err := sc.storeState()
133133
if err != nil {
134-
klog.InfoS("Store state to checkpoint error", "err", err)
134+
klog.ErrorS(err, "Failed to store state to checkpoint")
135135
}
136136
}
137137

@@ -143,7 +143,7 @@ func (sc *stateCheckpoint) SetMemoryBlocks(podUID string, containerName string,
143143
sc.cache.SetMemoryBlocks(podUID, containerName, blocks)
144144
err := sc.storeState()
145145
if err != nil {
146-
klog.InfoS("Store state to checkpoint error", "err", err)
146+
klog.ErrorS(err, "Failed to store state to checkpoint", "podUID", podUID, "containerName", containerName)
147147
}
148148
}
149149

@@ -155,7 +155,7 @@ func (sc *stateCheckpoint) SetMemoryAssignments(assignments ContainerMemoryAssig
155155
sc.cache.SetMemoryAssignments(assignments)
156156
err := sc.storeState()
157157
if err != nil {
158-
klog.InfoS("Store state to checkpoint error", "err", err)
158+
klog.ErrorS(err, "Failed to store state to checkpoint")
159159
}
160160
}
161161

@@ -167,7 +167,7 @@ func (sc *stateCheckpoint) Delete(podUID string, containerName string) {
167167
sc.cache.Delete(podUID, containerName)
168168
err := sc.storeState()
169169
if err != nil {
170-
klog.InfoS("Store state to checkpoint error", "err", err)
170+
klog.ErrorS(err, "Failed to store state to checkpoint", "podUID", podUID, "containerName", containerName)
171171
}
172172
}
173173

@@ -179,6 +179,6 @@ func (sc *stateCheckpoint) ClearState() {
179179
sc.cache.ClearState()
180180
err := sc.storeState()
181181
if err != nil {
182-
klog.InfoS("Store state to checkpoint error", "err", err)
182+
klog.ErrorS(err, "Failed to store state to checkpoint")
183183
}
184184
}

pkg/kubelet/cm/memorymanager/state/state_mem.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ func (s *stateMemory) SetMemoryAssignments(assignments ContainerMemoryAssignment
9494
defer s.Unlock()
9595

9696
s.assignments = assignments.Clone()
97+
klog.V(5).InfoS("Updated Memory assignments", "assignments", assignments)
9798
}
9899

99100
// Delete deletes corresponding Blocks from ContainerMemoryAssignments

0 commit comments

Comments
 (0)