Skip to content

Commit 844c2ef

Browse files
committed
e2e: node: cpumgr: cleanup after each test case
Our CI machines happen to have 1 fully allocatable CPU for test workloads. This is really, really the minimal amount. But still should be sufficient for the tests to run the tests; the CFS quota pod, however, does create a series of pods (at time of writing, 6) and does the cleanup only at the very end the end. This means pods requiring resources accumulate on the CI machine node. The fix implemented here is to just clean up after each subcase. Doing so the cpu test footprint is equal to the higher requirement (say, 1000 millicores) vs the sum of all the subcases requirements. Doing like this doesn't change the test behavior, and make it possible to run it on very barebones machines.
1 parent c26c59a commit 844c2ef

File tree

2 files changed

+39
-19
lines changed

2 files changed

+39
-19
lines changed

test/e2e_node/cpu_manager_test.go

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -596,18 +596,27 @@ func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQ
596596
var err error
597597
var ctnAttrs []ctnAttribute
598598
var pod1, pod2, pod3 *v1.Pod
599-
var cleanupPods []*v1.Pod
600-
ginkgo.DeferCleanup(func() {
599+
podsToClean := make(map[string]*v1.Pod) // pod.UID -> pod
600+
601+
deleteTestPod := func(pod *v1.Pod) {
601602
// waitForContainerRemoval takes "long" to complete; if we use the parent ctx we get a
602603
// 'deadline expired' message and the cleanup aborts, which we don't want.
603-
ctx2 := context.TODO()
604+
// So let's use a separate and more generous timeout (determined by trial and error)
605+
ctx2, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
606+
defer cancel()
607+
deletePodSyncAndWait(ctx2, f, pod.Namespace, pod.Name)
608+
delete(podsToClean, string(pod.UID))
609+
}
610+
611+
// cleanup leftovers on test failure. The happy path is covered by `deleteTestPod` calls
612+
ginkgo.DeferCleanup(func() {
604613
ginkgo.By("by deleting the pods and waiting for container removal")
605-
for _, cleanupPod := range cleanupPods {
606-
framework.Logf("deleting pod: %s/%s", cleanupPod.Namespace, cleanupPod.Name)
607-
deletePodSyncByName(ctx2, f, cleanupPod.Name)
608-
waitForContainerRemoval(ctx2, cleanupPod.Spec.Containers[0].Name, cleanupPod.Name, cleanupPod.Namespace)
609-
framework.Logf("deleted pod: %s/%s", cleanupPod.Namespace, cleanupPod.Name)
610-
}
614+
// waitForContainerRemoval takes "long" to complete; if we use the parent ctx we get a
615+
// 'deadline expired' message and the cleanup aborts, which we don't want.
616+
// So let's use a separate and more generous timeout (determined by trial and error)
617+
ctx2, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
618+
defer cancel()
619+
deletePodsAsync(ctx2, f, podsToClean)
611620
})
612621

613622
cfsCheckCommand := []string{"sh", "-c", "cat /sys/fs/cgroup/cpu.max && sleep 1d"}
@@ -623,7 +632,7 @@ func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQ
623632
pod1 = makeCPUManagerPod("gu-pod1", ctnAttrs)
624633
pod1.Spec.Containers[0].Command = cfsCheckCommand
625634
pod1 = e2epod.NewPodClient(f).CreateSync(ctx, pod1)
626-
cleanupPods = append(cleanupPods, pod1)
635+
podsToClean[string(pod1.UID)] = pod1
627636

628637
ginkgo.By("checking if the expected cfs quota was assigned (GU pod, exclusive CPUs, unlimited)")
629638

@@ -635,6 +644,7 @@ func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQ
635644
err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod1.Name, pod1.Spec.Containers[0].Name, expCFSQuotaRegex)
636645
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
637646
pod1.Spec.Containers[0].Name, pod1.Name)
647+
deleteTestPod(pod1)
638648

639649
ctnAttrs = []ctnAttribute{
640650
{
@@ -646,7 +656,7 @@ func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQ
646656
pod2 = makeCPUManagerPod("gu-pod2", ctnAttrs)
647657
pod2.Spec.Containers[0].Command = cfsCheckCommand
648658
pod2 = e2epod.NewPodClient(f).CreateSync(ctx, pod2)
649-
cleanupPods = append(cleanupPods, pod2)
659+
podsToClean[string(pod2.UID)] = pod2
650660

651661
ginkgo.By("checking if the expected cfs quota was assigned (GU pod, limited)")
652662

@@ -655,6 +665,7 @@ func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQ
655665
err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod2.Name, pod2.Spec.Containers[0].Name, expCFSQuotaRegex)
656666
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
657667
pod2.Spec.Containers[0].Name, pod2.Name)
668+
deleteTestPod(pod2)
658669

659670
ctnAttrs = []ctnAttribute{
660671
{
@@ -666,7 +677,7 @@ func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQ
666677
pod3 = makeCPUManagerPod("non-gu-pod3", ctnAttrs)
667678
pod3.Spec.Containers[0].Command = cfsCheckCommand
668679
pod3 = e2epod.NewPodClient(f).CreateSync(ctx, pod3)
669-
cleanupPods = append(cleanupPods, pod3)
680+
podsToClean[string(pod3.UID)] = pod3
670681

671682
ginkgo.By("checking if the expected cfs quota was assigned (BU pod, limited)")
672683

@@ -675,6 +686,7 @@ func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQ
675686
err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod3.Name, pod3.Spec.Containers[0].Name, expCFSQuotaRegex)
676687
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
677688
pod3.Spec.Containers[0].Name, pod3.Name)
689+
deleteTestPod(pod3)
678690

679691
ctnAttrs = []ctnAttribute{
680692
{
@@ -692,7 +704,7 @@ func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQ
692704
pod4.Spec.Containers[0].Command = cfsCheckCommand
693705
pod4.Spec.Containers[1].Command = cfsCheckCommand
694706
pod4 = e2epod.NewPodClient(f).CreateSync(ctx, pod4)
695-
cleanupPods = append(cleanupPods, pod4)
707+
podsToClean[string(pod4.UID)] = pod4
696708

697709
ginkgo.By("checking if the expected cfs quota was assigned (GU pod, container 0 exclusive CPUs unlimited, container 1 limited)")
698710

@@ -709,6 +721,7 @@ func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQ
709721
err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod4.Name, pod4.Spec.Containers[1].Name, expCFSQuotaRegex)
710722
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]",
711723
pod4.Spec.Containers[1].Name, pod4.Name)
724+
deleteTestPod(pod4)
712725

713726
ctnAttrs = []ctnAttribute{
714727
{
@@ -728,7 +741,8 @@ func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQ
728741
pod5 := makeCPUManagerPod("gu-pod5", ctnAttrs)
729742
pod5.Spec.Containers[0].Command = podCFSCheckCommand
730743
pod5 = e2epod.NewPodClient(f).CreateSync(ctx, pod5)
731-
cleanupPods = append(cleanupPods, pod5)
744+
podsToClean[string(pod5.UID)] = pod5
745+
732746
ginkgo.By("checking if the expected cfs quota was assigned to pod (GU pod, unlimited)")
733747

734748
expectedQuota = "150000"
@@ -741,6 +755,7 @@ func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQ
741755

742756
err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod5.Name, pod5.Spec.Containers[0].Name, expCFSQuotaRegex)
743757
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", pod5.Spec.Containers[0].Name, pod5.Name)
758+
deleteTestPod(pod5)
744759

745760
ctnAttrs = []ctnAttribute{
746761
{
@@ -753,15 +768,15 @@ func runCfsQuotaGuPods(ctx context.Context, f *framework.Framework, disabledCPUQ
753768
pod6 := makeCPUManagerPod("gu-pod6", ctnAttrs)
754769
pod6.Spec.Containers[0].Command = podCFSCheckCommand
755770
pod6 = e2epod.NewPodClient(f).CreateSync(ctx, pod6)
756-
cleanupPods = append(cleanupPods, pod6)
771+
podsToClean[string(pod6.UID)] = pod6
757772

758773
ginkgo.By("checking if the expected cfs quota was assigned to pod (GU pod, limited)")
759774

760775
expectedQuota = "10000"
761776
expCFSQuotaRegex = fmt.Sprintf("^%s %s\n$", expectedQuota, defaultPeriod)
762777
err = e2epod.NewPodClient(f).MatchContainerOutput(ctx, pod6.Name, pod6.Spec.Containers[0].Name, expCFSQuotaRegex)
763778
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", pod6.Spec.Containers[0].Name, pod6.Name)
764-
779+
deleteTestPod(pod6)
765780
}
766781

767782
func runMultipleGuPods(ctx context.Context, f *framework.Framework) {

test/e2e_node/topology_manager_test.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -463,14 +463,19 @@ func deletePodsAsync(ctx context.Context, f *framework.Framework, podMap map[str
463463
go func(podNS, podName string) {
464464
defer ginkgo.GinkgoRecover()
465465
defer wg.Done()
466-
467-
deletePodSyncByName(ctx, f, podName)
468-
waitForAllContainerRemoval(ctx, podName, podNS)
466+
deletePodSyncAndWait(ctx, f, podNS, podName)
469467
}(pod.Namespace, pod.Name)
470468
}
471469
wg.Wait()
472470
}
473471

472+
func deletePodSyncAndWait(ctx context.Context, f *framework.Framework, podNS, podName string) {
473+
framework.Logf("deleting pod: %s/%s", podNS, podName)
474+
deletePodSyncByName(ctx, f, podName)
475+
waitForAllContainerRemoval(ctx, podName, podNS)
476+
framework.Logf("deleted pod: %s/%s", podNS, podName)
477+
}
478+
474479
func runTopologyManagerNegativeTest(ctx context.Context, f *framework.Framework, ctnAttrs, initCtnAttrs []tmCtnAttribute, envInfo *testEnvInfo) {
475480
podName := "gu-pod"
476481
framework.Logf("creating pod %s attrs %v", podName, ctnAttrs)

0 commit comments

Comments
 (0)