Skip to content

Commit da98829

Browse files
committed
Work-around for missing metrics on CRI-O exited containers
HPA needs metrics for exited init containers before it will take action. By setting memory and CPU usage to zero for any containers that cAdvisor didn't provide statistics for, we are assured that HPA will be able to correctly calculate pod resource usage.
1 parent 0ad60b3 commit da98829

File tree

3 files changed

+82
-4
lines changed

3 files changed

+82
-4
lines changed

pkg/kubelet/stats/cadvisor_stats_provider_test.go

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,9 @@ func TestCadvisorListPodStats(t *testing.T) {
8989
seedPod1Container = 4000
9090
seedPod2Infra = 5000
9191
seedPod2Container = 6000
92+
seedPod3Infra = 7000
93+
seedPod3Container0 = 8000
94+
seedPod3Container1 = 8001
9295
seedEphemeralVolume1 = 10000
9396
seedEphemeralVolume2 = 10001
9497
seedPersistentVolume1 = 20000
@@ -98,12 +101,15 @@ func TestCadvisorListPodStats(t *testing.T) {
98101
pName0 = "pod0"
99102
pName1 = "pod1"
100103
pName2 = "pod0" // ensure pName2 conflicts with pName0, but is in a different namespace
104+
pName3 = "pod3"
101105
)
102106
const (
103107
cName00 = "c0"
104108
cName01 = "c1"
105109
cName10 = "c0" // ensure cName10 conflicts with cName02, but is in a different pod
106110
cName20 = "c1" // ensure cName20 conflicts with cName01, but is in a different pod + namespace
111+
cName30 = "c0-init"
112+
cName31 = "c1"
107113
)
108114
const (
109115
rootfsCapacity = uint64(10000000)
@@ -119,6 +125,7 @@ func TestCadvisorListPodStats(t *testing.T) {
119125
prf0 := statsapi.PodReference{Name: pName0, Namespace: namespace0, UID: "UID" + pName0}
120126
prf1 := statsapi.PodReference{Name: pName1, Namespace: namespace0, UID: "UID" + pName1}
121127
prf2 := statsapi.PodReference{Name: pName2, Namespace: namespace2, UID: "UID" + pName2}
128+
prf3 := statsapi.PodReference{Name: pName3, Namespace: namespace0, UID: "UID" + pName3}
122129
infos := map[string]cadvisorapiv2.ContainerInfo{
123130
"/": getTestContainerInfo(seedRoot, "", "", ""),
124131
"/docker-daemon": getTestContainerInfo(seedRuntime, "", "", ""),
@@ -136,6 +143,10 @@ func TestCadvisorListPodStats(t *testing.T) {
136143
"/pod2-c0": getTestContainerInfo(seedPod2Container, pName2, namespace2, cName20),
137144
"/kubepods/burstable/podUIDpod0": getTestContainerInfo(seedPod0Infra, pName0, namespace0, leaky.PodInfraContainerName),
138145
"/kubepods/podUIDpod1": getTestContainerInfo(seedPod1Infra, pName1, namespace0, leaky.PodInfraContainerName),
146+
// Pod3 - Namespace0
147+
"/pod3-i": getTestContainerInfo(seedPod3Infra, pName3, namespace0, leaky.PodInfraContainerName),
148+
"/pod3-c0-init": getTestContainerInfo(seedPod3Container0, pName3, namespace0, cName30),
149+
"/pod3-c1": getTestContainerInfo(seedPod3Container1, pName3, namespace0, cName31),
139150
}
140151

141152
freeRootfsInodes := rootfsInodesFree
@@ -169,6 +180,21 @@ func TestCadvisorListPodStats(t *testing.T) {
169180
info.Spec.Memory.Limit = memoryLimitOverride
170181
infos[name] = info
171182
}
183+
// any container for which cadvisor should return no stats (as might be the case for an exited init container)
184+
nostatsOverrides := []string{
185+
"/pod3-c0-init",
186+
}
187+
for _, name := range nostatsOverrides {
188+
info, found := infos[name]
189+
if !found {
190+
t.Errorf("No container defined with name %v", name)
191+
}
192+
info.Spec.Memory = cadvisorapiv2.MemorySpec{}
193+
info.Spec.Cpu = cadvisorapiv2.CpuSpec{}
194+
info.Spec.HasMemory = false
195+
info.Spec.HasCpu = false
196+
infos[name] = info
197+
}
172198

173199
options := cadvisorapiv2.RequestOptions{
174200
IdType: cadvisorapiv2.TypeName,
@@ -197,18 +223,20 @@ func TestCadvisorListPodStats(t *testing.T) {
197223
p0Time := metav1.Now()
198224
p1Time := metav1.Now()
199225
p2Time := metav1.Now()
226+
p3Time := metav1.Now()
200227
mockStatus := new(statustest.MockStatusProvider)
201228
mockStatus.On("GetPodStatus", types.UID("UID"+pName0)).Return(v1.PodStatus{StartTime: &p0Time}, true)
202229
mockStatus.On("GetPodStatus", types.UID("UID"+pName1)).Return(v1.PodStatus{StartTime: &p1Time}, true)
203230
mockStatus.On("GetPodStatus", types.UID("UID"+pName2)).Return(v1.PodStatus{StartTime: &p2Time}, true)
231+
mockStatus.On("GetPodStatus", types.UID("UID"+pName3)).Return(v1.PodStatus{StartTime: &p3Time}, true)
204232

205233
resourceAnalyzer := &fakeResourceAnalyzer{podVolumeStats: volumeStats}
206234

207235
p := NewCadvisorStatsProvider(mockCadvisor, resourceAnalyzer, nil, nil, mockRuntime, mockStatus)
208236
pods, err := p.ListPodStats()
209237
assert.NoError(t, err)
210238

211-
assert.Equal(t, 3, len(pods))
239+
assert.Equal(t, 4, len(pods))
212240
indexPods := make(map[statsapi.PodReference]statsapi.PodStats, len(pods))
213241
for _, pod := range pods {
214242
indexPods[pod.PodRef] = pod
@@ -261,6 +289,24 @@ func TestCadvisorListPodStats(t *testing.T) {
261289
checkCPUStats(t, "Pod2Container0", seedPod2Container, con.CPU)
262290
checkMemoryStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Memory)
263291
checkNetworkStats(t, "Pod2", seedPod2Infra, ps.Network)
292+
293+
// Validate Pod3 Results
294+
295+
ps, found = indexPods[prf3]
296+
assert.True(t, found)
297+
assert.Len(t, ps.Containers, 2)
298+
indexCon = make(map[string]statsapi.ContainerStats, len(ps.Containers))
299+
for _, con := range ps.Containers {
300+
indexCon[con.Name] = con
301+
}
302+
con = indexCon[cName31]
303+
assert.Equal(t, cName31, con.Name)
304+
checkCPUStats(t, "Pod3Container1", seedPod3Container1, con.CPU)
305+
checkMemoryStats(t, "Pod3Container1", seedPod3Container1, infos["/pod3-c1"], con.Memory)
306+
con = indexCon[cName30]
307+
assert.Equal(t, cName30, con.Name)
308+
checkEmptyCPUStats(t, "Pod3Container0", seedPod3Container0, con.CPU)
309+
checkEmptyMemoryStats(t, "Pod3Container0", seedPod3Container0, infos["/pod3-c0-init"], con.Memory)
264310
}
265311

266312
func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) {

pkg/kubelet/stats/helper.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,12 @@ func cadvisorInfoToCPUandMemoryStats(info *cadvisorapiv2.ContainerInfo) (*statsa
4040
}
4141
var cpuStats *statsapi.CPUStats
4242
var memoryStats *statsapi.MemoryStats
43+
cpuStats = &statsapi.CPUStats{
44+
Time: metav1.NewTime(cstat.Timestamp),
45+
UsageNanoCores: uint64Ptr(0),
46+
UsageCoreNanoSeconds: uint64Ptr(0),
47+
}
4348
if info.Spec.HasCpu {
44-
cpuStats = &statsapi.CPUStats{
45-
Time: metav1.NewTime(cstat.Timestamp),
46-
}
4749
if cstat.CpuInst != nil {
4850
cpuStats.UsageNanoCores = &cstat.CpuInst.Usage.Total
4951
}
@@ -67,6 +69,11 @@ func cadvisorInfoToCPUandMemoryStats(info *cadvisorapiv2.ContainerInfo) (*statsa
6769
availableBytes := info.Spec.Memory.Limit - cstat.Memory.WorkingSet
6870
memoryStats.AvailableBytes = &availableBytes
6971
}
72+
} else {
73+
memoryStats = &statsapi.MemoryStats{
74+
Time: metav1.NewTime(cstat.Timestamp),
75+
WorkingSetBytes: uint64Ptr(0),
76+
}
7077
}
7178
return cpuStats, memoryStats
7279
}

pkg/kubelet/stats/stats_provider_test.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,32 @@ func checkNetworkStats(t *testing.T, label string, seed int, stats *statsapi.Net
621621

622622
}
623623

624+
// container which had no stats should have zero-valued CPU usage
625+
func checkEmptyCPUStats(t *testing.T, label string, seed int, stats *statsapi.CPUStats) {
626+
require.NotNil(t, stats.Time, label+".CPU.Time")
627+
require.NotNil(t, stats.UsageNanoCores, label+".CPU.UsageNanoCores")
628+
require.NotNil(t, stats.UsageNanoCores, label+".CPU.UsageCoreSeconds")
629+
assert.EqualValues(t, testTime(timestamp, seed).Unix(), stats.Time.Time.Unix(), label+".CPU.Time")
630+
assert.EqualValues(t, 0, *stats.UsageNanoCores, label+".CPU.UsageCores")
631+
assert.EqualValues(t, 0, *stats.UsageCoreNanoSeconds, label+".CPU.UsageCoreSeconds")
632+
}
633+
634+
// container which had no stats should have zero-valued Memory usage
635+
func checkEmptyMemoryStats(t *testing.T, label string, seed int, info cadvisorapiv2.ContainerInfo, stats *statsapi.MemoryStats) {
636+
assert.EqualValues(t, testTime(timestamp, seed).Unix(), stats.Time.Time.Unix(), label+".Mem.Time")
637+
require.NotNil(t, stats.WorkingSetBytes, label+".Mem.WorkingSetBytes")
638+
assert.EqualValues(t, 0, *stats.WorkingSetBytes, label+".Mem.WorkingSetBytes")
639+
assert.Nil(t, stats.UsageBytes, label+".Mem.UsageBytes")
640+
assert.Nil(t, stats.RSSBytes, label+".Mem.RSSBytes")
641+
assert.Nil(t, stats.PageFaults, label+".Mem.PageFaults")
642+
assert.Nil(t, stats.MajorPageFaults, label+".Mem.MajorPageFaults")
643+
assert.Nil(t, stats.AvailableBytes, label+".Mem.AvailableBytes")
644+
}
645+
624646
func checkCPUStats(t *testing.T, label string, seed int, stats *statsapi.CPUStats) {
647+
require.NotNil(t, stats.Time, label+".CPU.Time")
648+
require.NotNil(t, stats.UsageNanoCores, label+".CPU.UsageNanoCores")
649+
require.NotNil(t, stats.UsageNanoCores, label+".CPU.UsageCoreSeconds")
625650
assert.EqualValues(t, testTime(timestamp, seed).Unix(), stats.Time.Time.Unix(), label+".CPU.Time")
626651
assert.EqualValues(t, seed+offsetCPUUsageCores, *stats.UsageNanoCores, label+".CPU.UsageCores")
627652
assert.EqualValues(t, seed+offsetCPUUsageCoreSeconds, *stats.UsageCoreNanoSeconds, label+".CPU.UsageCoreSeconds")

0 commit comments

Comments
 (0)