Skip to content

Commit f82439f

Browse files
authored
Merge pull request kubernetes#129486 from iholder101/bugfix/swap-container-cri-stats
[KEP-2400] [Bugfix]: Ensure container-level swap metrics are collected
2 parents 7f9fdd6 + 617c094 commit f82439f

File tree

6 files changed

+126
-1
lines changed

6 files changed

+126
-1
lines changed

pkg/kubelet/stats/cadvisor_stats_provider_test.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ func TestCadvisorListPodStats(t *testing.T) {
313313
}
314314
if ps.Swap != nil {
315315
checkSwapStats(t, "Pod0", seedPod0Infra, infos["/pod0-i"], ps.Swap)
316+
checkContainersSwapStats(t, ps, infos["/pod0-c0"], infos["/pod0-c1"])
316317
}
317318

318319
// Validate Pod1 Results
@@ -325,6 +326,7 @@ func TestCadvisorListPodStats(t *testing.T) {
325326
checkMemoryStats(t, "Pod1Container0", seedPod1Container, infos["/pod1-c0"], con.Memory)
326327
checkSwapStats(t, "Pod1Container0", seedPod1Container, infos["/pod1-c0"], con.Swap)
327328
checkNetworkStats(t, "Pod1", seedPod1Infra, ps.Network)
329+
checkContainersSwapStats(t, ps, infos["/pod1-c0"])
328330

329331
// Validate Pod2 Results
330332
ps, found = indexPods[prf2]
@@ -336,6 +338,7 @@ func TestCadvisorListPodStats(t *testing.T) {
336338
checkMemoryStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Memory)
337339
checkSwapStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Swap)
338340
checkNetworkStats(t, "Pod2", seedPod2Infra, ps.Network)
341+
checkContainersSwapStats(t, ps, infos["/pod2-c0"])
339342

340343
// Validate Pod3 Results
341344

@@ -352,6 +355,7 @@ func TestCadvisorListPodStats(t *testing.T) {
352355
checkCPUStats(t, "Pod3Container1", seedPod3Container1, con.CPU)
353356
checkMemoryStats(t, "Pod3Container1", seedPod3Container1, infos["/pod3-c1"], con.Memory)
354357
checkSwapStats(t, "Pod3Container1", seedPod3Container1, infos["/pod3-c1"], con.Swap)
358+
checkContainersSwapStats(t, ps, infos["/pod3-c1"])
355359
}
356360

357361
func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) {
@@ -467,6 +471,7 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) {
467471
assert.EqualValues(t, testTime(creationTime, seedPod0Container0).Unix(), con.StartTime.Time.Unix())
468472
checkCPUStats(t, "Pod0Container0", seedPod0Container0, con.CPU)
469473
checkMemoryStats(t, "Pod0Conainer0", seedPod0Container0, infos["/pod0-c0"], con.Memory)
474+
checkSwapStats(t, "Pod0Conainer0", seedPod0Container0, infos["/pod0-c0"], con.Swap)
470475
assert.Nil(t, con.Rootfs)
471476
assert.Nil(t, con.Logs)
472477
assert.Nil(t, con.Accelerators)
@@ -476,6 +481,7 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) {
476481
assert.EqualValues(t, testTime(creationTime, seedPod0Container1).Unix(), con.StartTime.Time.Unix())
477482
checkCPUStats(t, "Pod0Container1", seedPod0Container1, con.CPU)
478483
checkMemoryStats(t, "Pod0Container1", seedPod0Container1, infos["/pod0-c1"], con.Memory)
484+
checkSwapStats(t, "Pod0Container1", seedPod0Container1, infos["/pod0-c1"], con.Swap)
479485
assert.Nil(t, con.Rootfs)
480486
assert.Nil(t, con.Logs)
481487
assert.Nil(t, con.Accelerators)
@@ -491,6 +497,10 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) {
491497
if ps.Memory != nil {
492498
checkMemoryStats(t, "Pod0", seedPod0Infra, infos["/pod0-i"], ps.Memory)
493499
}
500+
if ps.Swap != nil {
501+
checkSwapStats(t, "Pod0", seedPod0Infra, infos["/pod0-i"], ps.Swap)
502+
checkContainersSwapStats(t, ps, infos["/pod0-c0"], infos["/pod0-c1"])
503+
}
494504

495505
// Validate Pod1 Results
496506
ps, found = indexPods[prf1]
@@ -500,6 +510,8 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) {
500510
assert.Equal(t, cName10, con.Name)
501511
checkCPUStats(t, "Pod1Container0", seedPod1Container, con.CPU)
502512
checkMemoryStats(t, "Pod1Container0", seedPod1Container, infos["/pod1-c0"], con.Memory)
513+
checkSwapStats(t, "Pod1Container0", seedPod1Container, infos["/pod1-c0"], con.Swap)
514+
checkContainersSwapStats(t, ps, infos["/pod1-c0"])
503515
assert.Nil(t, ps.EphemeralStorage)
504516
assert.Nil(t, ps.VolumeStats)
505517
assert.Nil(t, ps.Network)
@@ -512,6 +524,8 @@ func TestCadvisorListPodCPUAndMemoryStats(t *testing.T) {
512524
assert.Equal(t, cName20, con.Name)
513525
checkCPUStats(t, "Pod2Container0", seedPod2Container, con.CPU)
514526
checkMemoryStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Memory)
527+
checkSwapStats(t, "Pod2Container0", seedPod2Container, infos["/pod2-c0"], con.Swap)
528+
checkContainersSwapStats(t, ps, infos["/pod2-c0"])
515529
assert.Nil(t, ps.EphemeralStorage)
516530
assert.Nil(t, ps.VolumeStats)
517531
assert.Nil(t, ps.Network)

pkg/kubelet/stats/cri_stats_provider.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -905,6 +905,11 @@ func (p *criStatsProvider) addCadvisorContainerStats(
905905
if memory != nil {
906906
cs.Memory = memory
907907
}
908+
909+
swap := cadvisorInfoToSwapStats(caPodStats)
910+
if swap != nil {
911+
cs.Swap = swap
912+
}
908913
}
909914

910915
func (p *criStatsProvider) addCadvisorContainerCPUAndMemoryStats(

pkg/kubelet/stats/cri_stats_provider_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,8 @@ func TestCRIListPodStats(t *testing.T) {
279279
checkCRIPodCPUAndMemoryStats(assert, p0, infos[sandbox0Cgroup].Stats[0])
280280
checkCRIPodSwapStats(assert, p0, infos[sandbox0Cgroup].Stats[0])
281281

282+
checkContainersSwapStats(t, p0, infos[container0.Id], infos[container1.Id])
283+
282284
p1 := podStatsMap[statsapi.PodReference{Name: "sandbox1-name", UID: "sandbox1-uid", Namespace: "sandbox1-ns"}]
283285
assert.Equal(sandbox1.CreatedAt, p1.StartTime.UnixNano())
284286
assert.Len(p1.Containers, 1)
@@ -296,6 +298,8 @@ func TestCRIListPodStats(t *testing.T) {
296298
checkCRIPodCPUAndMemoryStats(assert, p1, infos[sandbox1Cgroup].Stats[0])
297299
checkCRIPodSwapStats(assert, p1, infos[sandbox1Cgroup].Stats[0])
298300

301+
checkContainersSwapStats(t, p1, infos[container2.Id])
302+
299303
p2 := podStatsMap[statsapi.PodReference{Name: "sandbox2-name", UID: "sandbox2-uid", Namespace: "sandbox2-ns"}]
300304
assert.Equal(sandbox2.CreatedAt, p2.StartTime.UnixNano())
301305
assert.Len(p2.Containers, 1)
@@ -315,6 +319,8 @@ func TestCRIListPodStats(t *testing.T) {
315319
checkCRIPodCPUAndMemoryStats(assert, p2, infos[sandbox2Cgroup].Stats[0])
316320
checkCRIPodSwapStats(assert, p2, infos[sandbox2Cgroup].Stats[0])
317321

322+
checkContainersSwapStats(t, p2, infos[container4.Id])
323+
318324
p3 := podStatsMap[statsapi.PodReference{Name: "sandbox3-name", UID: "sandbox3-uid", Namespace: "sandbox3-ns"}]
319325
assert.Equal(sandbox3.CreatedAt, p3.StartTime.UnixNano())
320326
assert.Len(p3.Containers, 1)

pkg/kubelet/stats/helper.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ func cadvisorInfoToContainerCPUAndMemoryStats(name string, info *cadvisorapiv2.C
155155
cpu, memory := cadvisorInfoToCPUandMemoryStats(info)
156156
result.CPU = cpu
157157
result.Memory = memory
158+
result.Swap = cadvisorInfoToSwapStats(info)
158159

159160
return result
160161
}

pkg/kubelet/stats/provider_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ package stats
1919
import (
2020
"context"
2121
"fmt"
22+
"runtime"
23+
"strings"
2224
"testing"
2325
"time"
2426

@@ -505,6 +507,39 @@ func checkFsStats(t *testing.T, label string, seed int, stats *statsapi.FsStats)
505507
assert.EqualValues(t, seed+offsetFsInodesFree, *stats.InodesFree, label+".InodesFree")
506508
}
507509

510+
func checkContainersSwapStats(t *testing.T, podStats statsapi.PodStats, containerStats ...cadvisorapiv2.ContainerInfo) {
511+
if runtime.GOOS != "linux" {
512+
return
513+
}
514+
515+
podContainers := make(map[string]struct{}, len(podStats.Containers))
516+
for _, container := range podStats.Containers {
517+
podContainers[container.Name] = struct{}{}
518+
}
519+
520+
for _, container := range containerStats {
521+
found := false
522+
containerName := container.Spec.Labels["io.kubernetes.container.name"]
523+
for _, containerPodStats := range podStats.Containers {
524+
if containerPodStats.Name == containerName {
525+
assert.Equal(t, container.Stats[0].Memory.Swap, *containerPodStats.Swap.SwapUsageBytes)
526+
found = true
527+
}
528+
}
529+
assert.True(t, found, "container %s not found in pod stats", container.Spec.Labels["io.kubernetes.container.name"])
530+
delete(podContainers, containerName)
531+
}
532+
533+
var missingContainerNames []string
534+
for containerName := range podContainers {
535+
missingContainerNames = append(missingContainerNames, containerName)
536+
}
537+
assert.Emptyf(t, podContainers, "containers not found in pod stats: %v", strings.Join(missingContainerNames, " "))
538+
if len(missingContainerNames) > 0 {
539+
assert.FailNow(t, "containers not found in pod stats")
540+
}
541+
}
542+
508543
func checkEphemeralStats(t *testing.T, label string, containerSeeds []int, volumeSeeds []int, containerLogStats []*volume.Metrics, stats *statsapi.FsStats) {
509544
var usedBytes, inodeUsage int
510545
for _, cseed := range containerSeeds {

test/e2e_node/swap_test.go

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333

3434
"github.com/onsi/ginkgo/v2"
3535
"github.com/onsi/gomega"
36+
"github.com/onsi/gomega/gstruct"
3637
v1 "k8s.io/api/core/v1"
3738
"k8s.io/apimachinery/pkg/api/resource"
3839
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -56,7 +57,7 @@ var (
5657
noLimits *resource.Quantity = nil
5758
)
5859

59-
var _ = SIGDescribe("Swap", "[LinuxOnly]", feature.Swap, framework.WithSerial(), func() {
60+
var _ = SIGDescribe("Swap", "[LinuxOnly]", ginkgo.Ordered, feature.Swap, framework.WithSerial(), func() {
6061
f := framework.NewDefaultFramework("swap-qos")
6162
addAfterEachForCleaningUpPods(f)
6263
f.NamespacePodSecurityLevel = admissionapi.LevelBaseline
@@ -287,6 +288,69 @@ var _ = SIGDescribe("Swap", "[LinuxOnly]", feature.Swap, framework.WithSerial(),
287288
err := podClient.Delete(context.Background(), stressPod.Name, metav1.DeleteOptions{})
288289
framework.ExpectNoError(err)
289290
})
291+
292+
ginkgo.It("ensure summary API properly reports swap", func() {
293+
stressSize := divideQuantity(nodeTotalMemory, 5)
294+
ginkgo.By("Creating a stress pod with stress size: " + stressSize.String())
295+
stressPod := getStressPod(stressSize)
296+
297+
memoryLimit := cloneQuantity(stressSize)
298+
memoryLimit.Sub(resource.MustParse("50Mi"))
299+
memoryRequest := divideQuantity(memoryLimit, 2)
300+
ginkgo.By("Adding memory request of " + memoryRequest.String() + " and memory limit of " + memoryLimit.String())
301+
setPodMemoryResources(stressPod, memoryRequest, memoryLimit)
302+
gomega.Expect(qos.GetPodQOS(stressPod)).To(gomega.Equal(v1.PodQOSBurstable))
303+
304+
stressPod = runPodAndWaitUntilScheduled(f, stressPod)
305+
306+
ginkgo.By("Ensuring the pod is using swap")
307+
var swapUsage *resource.Quantity
308+
gomega.Eventually(func() error {
309+
stressPod = getUpdatedPod(f, stressPod)
310+
gomega.Expect(stressPod.Status.Phase).To(gomega.Equal(v1.PodRunning), "pod should be running")
311+
312+
var err error
313+
swapUsage, err = getSwapUsage(f, stressPod)
314+
if err != nil {
315+
return err
316+
}
317+
318+
if swapUsage.IsZero() {
319+
return fmt.Errorf("swap usage is zero")
320+
}
321+
322+
return nil
323+
}, 5*time.Minute, 1*time.Second).Should(gomega.Succeed())
324+
325+
ginkgo.By("Waiting 15 seconds for cAdvisor to collect 2 stats points")
326+
time.Sleep(15 * time.Second)
327+
328+
getSwapExpectation := func() gomega.OmegaMatcher {
329+
return gstruct.PointTo(gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{
330+
"Time": recent(maxStatsAge),
331+
"SwapUsageBytes": bounded(1, memoryLimit.Value()),
332+
}))
333+
}
334+
335+
matchExpectations := gstruct.PointTo(gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{
336+
"Pods": gstruct.MatchElements(summaryObjectID, gstruct.IgnoreExtras, gstruct.Elements{
337+
fmt.Sprintf("%s::%s", f.Namespace.Name, stressPod.Name): gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{
338+
"Containers": gstruct.MatchElements(summaryObjectID, gstruct.IgnoreExtras, gstruct.Elements{
339+
stressPod.Spec.Containers[0].Name: gstruct.MatchFields(gstruct.IgnoreExtras, gstruct.Fields{
340+
"Swap": getSwapExpectation(),
341+
}),
342+
}),
343+
"Swap": getSwapExpectation(),
344+
}),
345+
}),
346+
}))
347+
348+
ginkgo.By("Validating /stats/summary")
349+
// Give pods a minute to actually start up.
350+
gomega.Eventually(context.Background(), getNodeSummary, 180*time.Second, 15*time.Second).Should(matchExpectations)
351+
// Then the summary should match the expectations a few more times.
352+
gomega.Consistently(context.Background(), getNodeSummary, 30*time.Second, 15*time.Second).Should(matchExpectations)
353+
})
290354
})
291355
})
292356
})

0 commit comments

Comments
 (0)