@@ -31,10 +31,12 @@ import (
31
31
32
32
"k8s.io/apimachinery/pkg/api/resource"
33
33
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
34
+ runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
34
35
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
35
36
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
36
37
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
37
38
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
39
+ "k8s.io/kubernetes/pkg/kubelet/types"
38
40
"k8s.io/kubernetes/test/e2e/framework"
39
41
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
40
42
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
@@ -304,16 +306,17 @@ func deletePodInNamespace(f *framework.Framework, namespace, name string) {
304
306
framework .ExpectNoError (err )
305
307
}
306
308
307
- func validatePodAlignment (f * framework.Framework , pod * v1.Pod , numaNodes int ) {
308
- ginkgo .By ("validating the Gu pod" )
309
- logs , err := e2epod .GetPodLogs (f .ClientSet , f .Namespace .Name , pod .Name , pod .Spec .Containers [0 ].Name )
310
- framework .ExpectNoError (err , "expected log not found in container [%s] of pod [%s]" ,
311
- pod .Spec .Containers [0 ].Name , pod .Name )
309
+ func validatePodAlignment (f * framework.Framework , pod * v1.Pod , hwinfo testEnvHWInfo ) {
310
+ for _ , cnt := range pod .Spec .Containers {
311
+ ginkgo .By (fmt .Sprintf ("validating the container %s on Gu pod %s" , cnt .Name , pod .Name ))
312
312
313
- framework .Logf ("got pod logs: %v" , logs )
314
- numaRes , err := checkNUMAAlignment (f , pod , logs , numaNodes )
315
- framework .ExpectNoError (err , "NUMA Alignment check failed for [%s] of pod [%s]: %s" ,
316
- pod .Spec .Containers [0 ].Name , pod .Name , numaRes .String ())
313
+ logs , err := e2epod .GetPodLogs (f .ClientSet , f .Namespace .Name , pod .Name , cnt .Name )
314
+ framework .ExpectNoError (err , "expected log not found in container [%s] of pod [%s]" , cnt .Name , pod .Name )
315
+
316
+ framework .Logf ("got pod logs: %v" , logs )
317
+ numaRes , err := checkNUMAAlignment (f , pod , & cnt , logs , hwinfo )
318
+ framework .ExpectNoError (err , "NUMA Alignment check failed for [%s] of pod [%s]: %s" , cnt .Name , pod .Name , numaRes .String ())
319
+ }
317
320
}
318
321
319
322
func runTopologyManagerPolicySuiteTests (f * framework.Framework ) {
@@ -542,21 +545,27 @@ func runTopologyManagerPolicySuiteTests(f *framework.Framework) {
542
545
waitForContainerRemoval (pod2 .Spec .Containers [0 ].Name , pod2 .Name , pod2 .Namespace )
543
546
}
544
547
545
- func runTopologyManagerPositiveTest (f * framework.Framework , numaNodes , numPods int , cpuAmount , sriovResourceName , deviceAmount string ) {
546
- var pods []* v1.Pod
547
-
548
- for podID := 0 ; podID < numPods ; podID ++ {
549
- ctnAttrs := []tmCtnAttribute {
550
- {
551
- ctnName : "gu-container" ,
552
- cpuRequest : cpuAmount ,
553
- cpuLimit : cpuAmount ,
554
- deviceName : sriovResourceName ,
555
- deviceRequest : deviceAmount ,
556
- deviceLimit : deviceAmount ,
548
+ func waitForAllContainerRemoval (podName , podNS string ) {
549
+ rs , _ , err := getCRIClient ()
550
+ framework .ExpectNoError (err )
551
+ gomega .Eventually (func () bool {
552
+ containers , err := rs .ListContainers (& runtimeapi.ContainerFilter {
553
+ LabelSelector : map [string ]string {
554
+ types .KubernetesPodNameLabel : podName ,
555
+ types .KubernetesPodNamespaceLabel : podNS ,
557
556
},
557
+ })
558
+ if err != nil {
559
+ return false
558
560
}
561
+ return len (containers ) == 0
562
+ }, 2 * time .Minute , 1 * time .Second ).Should (gomega .BeTrue ())
563
+ }
564
+
565
+ func runTopologyManagerPositiveTest (f * framework.Framework , numPods int , ctnAttrs []tmCtnAttribute , hwinfo testEnvHWInfo ) {
566
+ var pods []* v1.Pod
559
567
568
+ for podID := 0 ; podID < numPods ; podID ++ {
560
569
podName := fmt .Sprintf ("gu-pod-%d" , podID )
561
570
framework .Logf ("creating pod %s attrs %v" , podName , ctnAttrs )
562
571
pod := makeTopologyManagerTestPod (podName , numalignCmd , ctnAttrs )
@@ -566,30 +575,19 @@ func runTopologyManagerPositiveTest(f *framework.Framework, numaNodes, numPods i
566
575
}
567
576
568
577
for podID := 0 ; podID < numPods ; podID ++ {
569
- validatePodAlignment (f , pods [podID ], numaNodes )
578
+ validatePodAlignment (f , pods [podID ], hwinfo )
570
579
}
571
580
572
581
for podID := 0 ; podID < numPods ; podID ++ {
573
582
pod := pods [podID ]
574
- framework .Logf ("deleting the pod %s/%s and waiting for container %s removal" ,
575
- pod .Namespace , pod .Name , pod . Spec . Containers [ 0 ]. Name )
583
+ framework .Logf ("deleting the pod %s/%s and waiting for container removal" ,
584
+ pod .Namespace , pod .Name )
576
585
deletePods (f , []string {pod .Name })
577
- waitForContainerRemoval ( pod . Spec . Containers [ 0 ]. Name , pod .Name , pod .Namespace )
586
+ waitForAllContainerRemoval ( pod .Name , pod .Namespace )
578
587
}
579
588
}
580
589
581
- func runTopologyManagerNegativeTest (f * framework.Framework , numaNodes , numPods int , cpuAmount , sriovResourceName , deviceAmount string ) {
582
- ctnAttrs := []tmCtnAttribute {
583
- {
584
- ctnName : "gu-container" ,
585
- cpuRequest : cpuAmount ,
586
- cpuLimit : cpuAmount ,
587
- deviceName : sriovResourceName ,
588
- deviceRequest : deviceAmount ,
589
- deviceLimit : deviceAmount ,
590
- },
591
- }
592
-
590
+ func runTopologyManagerNegativeTest (f * framework.Framework , numPods int , ctnAttrs []tmCtnAttribute , hwinfo testEnvHWInfo ) {
593
591
podName := "gu-pod"
594
592
framework .Logf ("creating pod %s attrs %v" , podName , ctnAttrs )
595
593
pod := makeTopologyManagerTestPod (podName , numalignCmd , ctnAttrs )
@@ -682,47 +680,119 @@ func teardownSRIOVConfigOrFail(f *framework.Framework, dpPod *v1.Pod) {
682
680
waitForContainerRemoval (dpPod .Spec .Containers [0 ].Name , dpPod .Name , dpPod .Namespace )
683
681
}
684
682
683
+ type testEnvHWInfo struct {
684
+ numaNodes int
685
+ sriovResourceName string
686
+ }
687
+
685
688
func runTopologyManagerNodeAlignmentSuiteTests (f * framework.Framework , configMap * v1.ConfigMap , reservedSystemCPUs string , numaNodes , coreCount int ) {
686
689
threadsPerCore := 1
687
690
if isHTEnabled () {
688
691
threadsPerCore = 2
689
692
}
690
693
691
694
dpPod , sriovResourceName , sriovResourceAmount := setupSRIOVConfigOrFail (f , configMap )
695
+ hwinfo := testEnvHWInfo {
696
+ numaNodes : numaNodes ,
697
+ sriovResourceName : sriovResourceName ,
698
+ }
692
699
693
700
// could have been a loop, we unroll it to explain the testcases
701
+ var ctnAttrs []tmCtnAttribute
694
702
695
703
// simplest case
696
704
ginkgo .By (fmt .Sprintf ("Successfully admit one guaranteed pod with 1 core, 1 %s device" , sriovResourceName ))
697
- runTopologyManagerPositiveTest (f , numaNodes , 1 , "1000m" , sriovResourceName , "1" )
705
+ ctnAttrs = []tmCtnAttribute {
706
+ {
707
+ ctnName : "gu-container" ,
708
+ cpuRequest : "1000m" ,
709
+ cpuLimit : "1000m" ,
710
+ deviceName : sriovResourceName ,
711
+ deviceRequest : "1" ,
712
+ deviceLimit : "1" ,
713
+ },
714
+ }
715
+ runTopologyManagerPositiveTest (f , 1 , ctnAttrs , hwinfo )
698
716
699
717
ginkgo .By (fmt .Sprintf ("Successfully admit one guaranteed pod with 2 cores, 1 %s device" , sriovResourceName ))
700
- runTopologyManagerPositiveTest (f , numaNodes , 1 , "2000m" , sriovResourceName , "1" )
718
+ ctnAttrs = []tmCtnAttribute {
719
+ {
720
+ ctnName : "gu-container" ,
721
+ cpuRequest : "2000m" ,
722
+ cpuLimit : "2000m" ,
723
+ deviceName : sriovResourceName ,
724
+ deviceRequest : "1" ,
725
+ deviceLimit : "1" ,
726
+ },
727
+ }
728
+ runTopologyManagerPositiveTest (f , 1 , ctnAttrs , hwinfo )
701
729
702
730
if reservedSystemCPUs != "" {
703
731
// to avoid false negatives, we have put reserved CPUs in such a way there is at least a NUMA node
704
732
// with 1+ SRIOV devices and not reserved CPUs.
705
733
numCores := threadsPerCore * coreCount
734
+ allCoresReq := fmt .Sprintf ("%dm" , numCores * 1000 )
706
735
ginkgo .By (fmt .Sprintf ("Successfully admit an entire socket (%d cores), 1 %s device" , numCores , sriovResourceName ))
707
- runTopologyManagerPositiveTest (f , numaNodes , 1 , fmt .Sprintf ("%dm" , numCores * 1000 ), sriovResourceName , "1" )
736
+ ctnAttrs = []tmCtnAttribute {
737
+ {
738
+ ctnName : "gu-container" ,
739
+ cpuRequest : allCoresReq ,
740
+ cpuLimit : allCoresReq ,
741
+ deviceName : sriovResourceName ,
742
+ deviceRequest : "1" ,
743
+ deviceLimit : "1" ,
744
+ },
745
+ }
746
+ runTopologyManagerPositiveTest (f , 1 , ctnAttrs , hwinfo )
708
747
}
709
748
710
749
if sriovResourceAmount > 1 {
711
750
// no matter how busses are connected to NUMA nodes and SRIOV devices are installed, this function
712
751
// preconditions must ensure the following can be fulfilled
713
752
ginkgo .By (fmt .Sprintf ("Successfully admit two guaranteed pods, each with 1 core, 1 %s device" , sriovResourceName ))
714
- runTopologyManagerPositiveTest (f , numaNodes , 2 , "1000m" , sriovResourceName , "1" )
753
+ ctnAttrs = []tmCtnAttribute {
754
+ {
755
+ ctnName : "gu-container" ,
756
+ cpuRequest : "1000m" ,
757
+ cpuLimit : "1000m" ,
758
+ deviceName : sriovResourceName ,
759
+ deviceRequest : "1" ,
760
+ deviceLimit : "1" ,
761
+ },
762
+ }
763
+ runTopologyManagerPositiveTest (f , 2 , ctnAttrs , hwinfo )
715
764
716
765
ginkgo .By (fmt .Sprintf ("Successfully admit two guaranteed pods, each with 2 cores, 1 %s device" , sriovResourceName ))
717
- runTopologyManagerPositiveTest (f , numaNodes , 2 , "2000m" , sriovResourceName , "1" )
766
+ ctnAttrs = []tmCtnAttribute {
767
+ {
768
+ ctnName : "gu-container" ,
769
+ cpuRequest : "2000m" ,
770
+ cpuLimit : "2000m" ,
771
+ deviceName : sriovResourceName ,
772
+ deviceRequest : "1" ,
773
+ deviceLimit : "1" ,
774
+ },
775
+ }
776
+ runTopologyManagerPositiveTest (f , 2 , ctnAttrs , hwinfo )
718
777
719
778
// testing more complex conditions require knowledge about the system cpu+bus topology
720
779
}
721
780
722
781
// overflow NUMA node capacity: cores
723
782
numCores := 1 + (threadsPerCore * coreCount )
783
+ excessCoresReq := fmt .Sprintf ("%dm" , numCores * 1000 )
724
784
ginkgo .By (fmt .Sprintf ("Trying to admit a guaranteed pods, with %d cores, 1 %s device - and it should be rejected" , numCores , sriovResourceName ))
725
- runTopologyManagerNegativeTest (f , numaNodes , 1 , fmt .Sprintf ("%dm" , numCores * 1000 ), sriovResourceName , "1" )
785
+ ctnAttrs = []tmCtnAttribute {
786
+ {
787
+ ctnName : "gu-container" ,
788
+ cpuRequest : excessCoresReq ,
789
+ cpuLimit : excessCoresReq ,
790
+ deviceName : sriovResourceName ,
791
+ deviceRequest : "1" ,
792
+ deviceLimit : "1" ,
793
+ },
794
+ }
795
+ runTopologyManagerNegativeTest (f , 1 , ctnAttrs , hwinfo )
726
796
727
797
teardownSRIOVConfigOrFail (f , dpPod )
728
798
}
0 commit comments