Skip to content

Commit 833519f

Browse files
committed
e2e: topomgr: properly clean up after completion
Due to an oversight, the e2e topology manager tests were leaking a configmap and a serviceaccount. This patch ensures a proper cleanup Signed-off-by: Francesco Romani <[email protected]>
1 parent 7c12251 commit 833519f

File tree

2 files changed

+85
-65
lines changed

2 files changed

+85
-65
lines changed

test/e2e_node/numa_alignment.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -177,18 +177,23 @@ func makeEnvMap(logs string) (map[string]string, error) {
177177
return envMap, nil
178178
}
179179

180-
func containerWantsDevices(cnt *v1.Container, hwinfo testEnvHWInfo) bool {
181-
_, found := cnt.Resources.Requests[v1.ResourceName(hwinfo.sriovResourceName)]
180+
type testEnvInfo struct {
181+
numaNodes int
182+
sriovResourceName string
183+
}
184+
185+
func containerWantsDevices(cnt *v1.Container, envInfo testEnvInfo) bool {
186+
_, found := cnt.Resources.Requests[v1.ResourceName(envInfo.sriovResourceName)]
182187
return found
183188
}
184189

185-
func checkNUMAAlignment(f *framework.Framework, pod *v1.Pod, cnt *v1.Container, logs string, hwinfo testEnvHWInfo) (numaPodResources, error) {
190+
func checkNUMAAlignment(f *framework.Framework, pod *v1.Pod, cnt *v1.Container, logs string, envInfo testEnvInfo) (numaPodResources, error) {
186191
podEnv, err := makeEnvMap(logs)
187192
if err != nil {
188193
return numaPodResources{}, err
189194
}
190195

191-
CPUToNUMANode, err := getCPUToNUMANodeMapFromEnv(f, pod, cnt, podEnv, hwinfo.numaNodes)
196+
CPUToNUMANode, err := getCPUToNUMANodeMapFromEnv(f, pod, cnt, podEnv, envInfo.numaNodes)
192197
if err != nil {
193198
return numaPodResources{}, err
194199
}
@@ -198,7 +203,7 @@ func checkNUMAAlignment(f *framework.Framework, pod *v1.Pod, cnt *v1.Container,
198203
return numaPodResources{}, err
199204
}
200205

201-
if containerWantsDevices(cnt, hwinfo) && len(PCIDevsToNUMANode) == 0 {
206+
if containerWantsDevices(cnt, envInfo) && len(PCIDevsToNUMANode) == 0 {
202207
return numaPodResources{}, fmt.Errorf("no PCI devices found in environ")
203208
}
204209
numaRes := numaPodResources{

test/e2e_node/topology_manager_test.go

Lines changed: 75 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -297,24 +297,15 @@ func findSRIOVResource(node *v1.Node) (string, int64) {
297297
return "", 0
298298
}
299299

300-
func deletePodInNamespace(f *framework.Framework, namespace, name string) {
301-
gp := int64(0)
302-
deleteOptions := metav1.DeleteOptions{
303-
GracePeriodSeconds: &gp,
304-
}
305-
err := f.ClientSet.CoreV1().Pods(namespace).Delete(context.TODO(), name, &deleteOptions)
306-
framework.ExpectNoError(err)
307-
}
308-
309-
func validatePodAlignment(f *framework.Framework, pod *v1.Pod, hwinfo testEnvHWInfo) {
300+
func validatePodAlignment(f *framework.Framework, pod *v1.Pod, envInfo testEnvInfo) {
310301
for _, cnt := range pod.Spec.Containers {
311302
ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name))
312303

313304
logs, err := e2epod.GetPodLogs(f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name)
314305
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name)
315306

316307
framework.Logf("got pod logs: %v", logs)
317-
numaRes, err := checkNUMAAlignment(f, pod, &cnt, logs, hwinfo)
308+
numaRes, err := checkNUMAAlignment(f, pod, &cnt, logs, envInfo)
318309
framework.ExpectNoError(err, "NUMA Alignment check failed for [%s] of pod [%s]: %s", cnt.Name, pod.Name, numaRes.String())
319310
}
320311
}
@@ -562,7 +553,7 @@ func waitForAllContainerRemoval(podName, podNS string) {
562553
}, 2*time.Minute, 1*time.Second).Should(gomega.BeTrue())
563554
}
564555

565-
func runTopologyManagerPositiveTest(f *framework.Framework, numPods int, ctnAttrs []tmCtnAttribute, hwinfo testEnvHWInfo) {
556+
func runTopologyManagerPositiveTest(f *framework.Framework, numPods int, ctnAttrs []tmCtnAttribute, envInfo testEnvInfo) {
566557
var pods []*v1.Pod
567558

568559
for podID := 0; podID < numPods; podID++ {
@@ -575,7 +566,7 @@ func runTopologyManagerPositiveTest(f *framework.Framework, numPods int, ctnAttr
575566
}
576567

577568
for podID := 0; podID < numPods; podID++ {
578-
validatePodAlignment(f, pods[podID], hwinfo)
569+
validatePodAlignment(f, pods[podID], envInfo)
579570
}
580571

581572
for podID := 0; podID < numPods; podID++ {
@@ -587,7 +578,7 @@ func runTopologyManagerPositiveTest(f *framework.Framework, numPods int, ctnAttr
587578
}
588579
}
589580

590-
func runTopologyManagerNegativeTest(f *framework.Framework, numPods int, ctnAttrs []tmCtnAttribute, hwinfo testEnvHWInfo) {
581+
func runTopologyManagerNegativeTest(f *framework.Framework, numPods int, ctnAttrs []tmCtnAttribute, envInfo testEnvInfo) {
591582
podName := "gu-pod"
592583
framework.Logf("creating pod %s attrs %v", podName, ctnAttrs)
593584
pod := makeTopologyManagerTestPod(podName, numalignCmd, ctnAttrs)
@@ -636,7 +627,16 @@ func getSRIOVDevicePluginConfigMap(cmFile string) *v1.ConfigMap {
636627
return readConfigMapV1OrDie(cmData)
637628
}
638629

639-
func setupSRIOVConfigOrFail(f *framework.Framework, configMap *v1.ConfigMap) (*v1.Pod, string, int64) {
630+
type sriovData struct {
631+
configMap *v1.ConfigMap
632+
serviceAccount *v1.ServiceAccount
633+
pod *v1.Pod
634+
635+
resourceName string
636+
resourceAmount int64
637+
}
638+
639+
func setupSRIOVConfigOrFail(f *framework.Framework, configMap *v1.ConfigMap) sriovData {
640640
var err error
641641

642642
ginkgo.By(fmt.Sprintf("Creating configMap %v/%v", metav1.NamespaceSystem, configMap.Name))
@@ -670,19 +670,34 @@ func setupSRIOVConfigOrFail(f *framework.Framework, configMap *v1.ConfigMap) (*v
670670
}, 2*time.Minute, framework.Poll).Should(gomega.BeTrue())
671671
framework.Logf("Successfully created device plugin pod, detected %d SRIOV device %q", sriovResourceAmount, sriovResourceName)
672672

673-
return dpPod, sriovResourceName, sriovResourceAmount
673+
return sriovData{
674+
configMap: configMap,
675+
serviceAccount: serviceAccount,
676+
pod: dpPod,
677+
resourceName: sriovResourceName,
678+
resourceAmount: sriovResourceAmount,
679+
}
674680
}
675681

676-
func teardownSRIOVConfigOrFail(f *framework.Framework, dpPod *v1.Pod) {
677-
framework.Logf("deleting the SRIOV device plugin pod %s/%s and waiting for container %s removal",
678-
dpPod.Namespace, dpPod.Name, dpPod.Spec.Containers[0].Name)
679-
deletePodInNamespace(f, dpPod.Namespace, dpPod.Name)
680-
waitForContainerRemoval(dpPod.Spec.Containers[0].Name, dpPod.Name, dpPod.Namespace)
681-
}
682+
func teardownSRIOVConfigOrFail(f *framework.Framework, sd sriovData) {
683+
var err error
684+
gp := int64(0)
685+
deleteOptions := metav1.DeleteOptions{
686+
GracePeriodSeconds: &gp,
687+
}
682688

683-
type testEnvHWInfo struct {
684-
numaNodes int
685-
sriovResourceName string
689+
ginkgo.By("Delete SRIOV device plugin pod %s/%s")
690+
err = f.ClientSet.CoreV1().Pods(sd.pod.Namespace).Delete(context.TODO(), sd.pod.Name, &deleteOptions)
691+
framework.ExpectNoError(err)
692+
waitForContainerRemoval(sd.pod.Spec.Containers[0].Name, sd.pod.Name, sd.pod.Namespace)
693+
694+
ginkgo.By(fmt.Sprintf("Deleting configMap %v/%v", metav1.NamespaceSystem, sd.configMap.Name))
695+
err = f.ClientSet.CoreV1().ConfigMaps(metav1.NamespaceSystem).Delete(context.TODO(), sd.configMap.Name, &deleteOptions)
696+
framework.ExpectNoError(err)
697+
698+
ginkgo.By(fmt.Sprintf("Deleting serviceAccount %v/%v", metav1.NamespaceSystem, sd.serviceAccount.Name))
699+
err = f.ClientSet.CoreV1().ServiceAccounts(metav1.NamespaceSystem).Delete(context.TODO(), sd.serviceAccount.Name, &deleteOptions)
700+
framework.ExpectNoError(err)
686701
}
687702

688703
func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, configMap *v1.ConfigMap, reservedSystemCPUs string, numaNodes, coreCount int) {
@@ -691,144 +706,144 @@ func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, configMap
691706
threadsPerCore = 2
692707
}
693708

694-
dpPod, sriovResourceName, sriovResourceAmount := setupSRIOVConfigOrFail(f, configMap)
695-
hwinfo := testEnvHWInfo{
709+
sd := setupSRIOVConfigOrFail(f, configMap)
710+
envInfo := testEnvInfo{
696711
numaNodes: numaNodes,
697-
sriovResourceName: sriovResourceName,
712+
sriovResourceName: sd.resourceName,
698713
}
699714

700715
// could have been a loop, we unroll it to explain the testcases
701716
var ctnAttrs []tmCtnAttribute
702717

703718
// simplest case
704-
ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pod with 1 core, 1 %s device", sriovResourceName))
719+
ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pod with 1 core, 1 %s device", sd.resourceName))
705720
ctnAttrs = []tmCtnAttribute{
706721
{
707722
ctnName: "gu-container",
708723
cpuRequest: "1000m",
709724
cpuLimit: "1000m",
710-
deviceName: sriovResourceName,
725+
deviceName: sd.resourceName,
711726
deviceRequest: "1",
712727
deviceLimit: "1",
713728
},
714729
}
715-
runTopologyManagerPositiveTest(f, 1, ctnAttrs, hwinfo)
730+
runTopologyManagerPositiveTest(f, 1, ctnAttrs, envInfo)
716731

717-
ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pod with 2 cores, 1 %s device", sriovResourceName))
732+
ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pod with 2 cores, 1 %s device", sd.resourceName))
718733
ctnAttrs = []tmCtnAttribute{
719734
{
720735
ctnName: "gu-container",
721736
cpuRequest: "2000m",
722737
cpuLimit: "2000m",
723-
deviceName: sriovResourceName,
738+
deviceName: sd.resourceName,
724739
deviceRequest: "1",
725740
deviceLimit: "1",
726741
},
727742
}
728-
runTopologyManagerPositiveTest(f, 1, ctnAttrs, hwinfo)
743+
runTopologyManagerPositiveTest(f, 1, ctnAttrs, envInfo)
729744

730745
if reservedSystemCPUs != "" {
731746
// to avoid false negatives, we have put reserved CPUs in such a way there is at least a NUMA node
732747
// with 1+ SRIOV devices and not reserved CPUs.
733748
numCores := threadsPerCore * coreCount
734749
allCoresReq := fmt.Sprintf("%dm", numCores*1000)
735-
ginkgo.By(fmt.Sprintf("Successfully admit an entire socket (%d cores), 1 %s device", numCores, sriovResourceName))
750+
ginkgo.By(fmt.Sprintf("Successfully admit an entire socket (%d cores), 1 %s device", numCores, sd.resourceName))
736751
ctnAttrs = []tmCtnAttribute{
737752
{
738753
ctnName: "gu-container",
739754
cpuRequest: allCoresReq,
740755
cpuLimit: allCoresReq,
741-
deviceName: sriovResourceName,
756+
deviceName: sd.resourceName,
742757
deviceRequest: "1",
743758
deviceLimit: "1",
744759
},
745760
}
746-
runTopologyManagerPositiveTest(f, 1, ctnAttrs, hwinfo)
761+
runTopologyManagerPositiveTest(f, 1, ctnAttrs, envInfo)
747762
}
748763

749-
if sriovResourceAmount > 1 {
764+
if sd.resourceAmount > 1 {
750765
// no matter how busses are connected to NUMA nodes and SRIOV devices are installed, this function
751766
// preconditions must ensure the following can be fulfilled
752-
ginkgo.By(fmt.Sprintf("Successfully admit two guaranteed pods, each with 1 core, 1 %s device", sriovResourceName))
767+
ginkgo.By(fmt.Sprintf("Successfully admit two guaranteed pods, each with 1 core, 1 %s device", sd.resourceName))
753768
ctnAttrs = []tmCtnAttribute{
754769
{
755770
ctnName: "gu-container",
756771
cpuRequest: "1000m",
757772
cpuLimit: "1000m",
758-
deviceName: sriovResourceName,
773+
deviceName: sd.resourceName,
759774
deviceRequest: "1",
760775
deviceLimit: "1",
761776
},
762777
}
763-
runTopologyManagerPositiveTest(f, 2, ctnAttrs, hwinfo)
778+
runTopologyManagerPositiveTest(f, 2, ctnAttrs, envInfo)
764779

765-
ginkgo.By(fmt.Sprintf("Successfully admit two guaranteed pods, each with 2 cores, 1 %s device", sriovResourceName))
780+
ginkgo.By(fmt.Sprintf("Successfully admit two guaranteed pods, each with 2 cores, 1 %s device", sd.resourceName))
766781
ctnAttrs = []tmCtnAttribute{
767782
{
768783
ctnName: "gu-container",
769784
cpuRequest: "2000m",
770785
cpuLimit: "2000m",
771-
deviceName: sriovResourceName,
786+
deviceName: sd.resourceName,
772787
deviceRequest: "1",
773788
deviceLimit: "1",
774789
},
775790
}
776-
runTopologyManagerPositiveTest(f, 2, ctnAttrs, hwinfo)
791+
runTopologyManagerPositiveTest(f, 2, ctnAttrs, envInfo)
777792

778793
// testing more complex conditions require knowledge about the system cpu+bus topology
779794
}
780795

781796
// multi-container tests
782-
if sriovResourceAmount >= 4 {
783-
ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pods, each with two containers, each with 2 cores, 1 %s device", sriovResourceName))
797+
if sd.resourceAmount >= 4 {
798+
ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pods, each with two containers, each with 2 cores, 1 %s device", sd.resourceName))
784799
ctnAttrs = []tmCtnAttribute{
785800
{
786801
ctnName: "gu-container-0",
787802
cpuRequest: "2000m",
788803
cpuLimit: "2000m",
789-
deviceName: sriovResourceName,
804+
deviceName: sd.resourceName,
790805
deviceRequest: "1",
791806
deviceLimit: "1",
792807
},
793808
{
794809
ctnName: "gu-container-1",
795810
cpuRequest: "2000m",
796811
cpuLimit: "2000m",
797-
deviceName: sriovResourceName,
812+
deviceName: sd.resourceName,
798813
deviceRequest: "1",
799814
deviceLimit: "1",
800815
},
801816
}
802-
runTopologyManagerPositiveTest(f, 1, ctnAttrs, hwinfo)
817+
runTopologyManagerPositiveTest(f, 1, ctnAttrs, envInfo)
803818

804-
ginkgo.By(fmt.Sprintf("Successfully admit two guaranteed pods, each with two containers, each with 1 core, 1 %s device", sriovResourceName))
819+
ginkgo.By(fmt.Sprintf("Successfully admit two guaranteed pods, each with two containers, each with 1 core, 1 %s device", sd.resourceName))
805820
ctnAttrs = []tmCtnAttribute{
806821
{
807822
ctnName: "gu-container-0",
808823
cpuRequest: "1000m",
809824
cpuLimit: "1000m",
810-
deviceName: sriovResourceName,
825+
deviceName: sd.resourceName,
811826
deviceRequest: "1",
812827
deviceLimit: "1",
813828
},
814829
{
815830
ctnName: "gu-container-1",
816831
cpuRequest: "1000m",
817832
cpuLimit: "1000m",
818-
deviceName: sriovResourceName,
833+
deviceName: sd.resourceName,
819834
deviceRequest: "1",
820835
deviceLimit: "1",
821836
},
822837
}
823-
runTopologyManagerPositiveTest(f, 2, ctnAttrs, hwinfo)
838+
runTopologyManagerPositiveTest(f, 2, ctnAttrs, envInfo)
824839

825-
ginkgo.By(fmt.Sprintf("Successfully admit two guaranteed pods, each with two containers, both with with 2 cores, one with 1 %s device", sriovResourceName))
840+
ginkgo.By(fmt.Sprintf("Successfully admit two guaranteed pods, each with two containers, both with with 2 cores, one with 1 %s device", sd.resourceName))
826841
ctnAttrs = []tmCtnAttribute{
827842
{
828843
ctnName: "gu-container-dev",
829844
cpuRequest: "2000m",
830845
cpuLimit: "2000m",
831-
deviceName: sriovResourceName,
846+
deviceName: sd.resourceName,
832847
deviceRequest: "1",
833848
deviceLimit: "1",
834849
},
@@ -838,26 +853,26 @@ func runTopologyManagerNodeAlignmentSuiteTests(f *framework.Framework, configMap
838853
cpuLimit: "2000m",
839854
},
840855
}
841-
runTopologyManagerPositiveTest(f, 2, ctnAttrs, hwinfo)
856+
runTopologyManagerPositiveTest(f, 2, ctnAttrs, envInfo)
842857
}
843858

844859
// overflow NUMA node capacity: cores
845860
numCores := 1 + (threadsPerCore * coreCount)
846861
excessCoresReq := fmt.Sprintf("%dm", numCores*1000)
847-
ginkgo.By(fmt.Sprintf("Trying to admit a guaranteed pods, with %d cores, 1 %s device - and it should be rejected", numCores, sriovResourceName))
862+
ginkgo.By(fmt.Sprintf("Trying to admit a guaranteed pods, with %d cores, 1 %s device - and it should be rejected", numCores, sd.resourceName))
848863
ctnAttrs = []tmCtnAttribute{
849864
{
850865
ctnName: "gu-container",
851866
cpuRequest: excessCoresReq,
852867
cpuLimit: excessCoresReq,
853-
deviceName: sriovResourceName,
868+
deviceName: sd.resourceName,
854869
deviceRequest: "1",
855870
deviceLimit: "1",
856871
},
857872
}
858-
runTopologyManagerNegativeTest(f, 1, ctnAttrs, hwinfo)
873+
runTopologyManagerNegativeTest(f, 1, ctnAttrs, envInfo)
859874

860-
teardownSRIOVConfigOrFail(f, dpPod)
875+
teardownSRIOVConfigOrFail(f, sd)
861876
}
862877

863878
func runTopologyManagerTests(f *framework.Framework) {

0 commit comments

Comments
 (0)