@@ -31,6 +31,7 @@ import (
31
31
"k8s.io/apimachinery/pkg/api/resource"
32
32
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
33
33
"k8s.io/apimachinery/pkg/runtime"
34
+ "k8s.io/apimachinery/pkg/util/sets"
34
35
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
35
36
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
36
37
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
@@ -54,9 +55,10 @@ const (
54
55
podScopeTopology = "pod"
55
56
containerScopeTopology = "container"
56
57
57
- minNumaNodes = 2
58
- minCoreCount = 4
59
- minSriovResource = 7 // This is the min number of SRIOV VFs needed on the system under test.
58
+ minNumaNodes = 2
59
+ minNumaNodesPreferClosestNUMA = 4
60
+ minCoreCount = 4
61
+ minSriovResource = 7 // This is the min number of SRIOV VFs needed on the system under test.
60
62
)
61
63
62
64
// Helper for makeTopologyManagerPod().
@@ -99,6 +101,46 @@ func detectThreadPerCore() int {
99
101
return threadCount
100
102
}
101
103
104
+ // for now we only look for pairs of nodes with minimum distance, we also assume that distance table is symmetric.
105
+ func getMinRemoteDistanceForNode (nodeToDistances map [int ][]int ) int {
106
+ var minDistance int = 99
107
+ for myID , distances := range nodeToDistances {
108
+ if len (distances ) == 0 {
109
+ continue
110
+ }
111
+ for nodeID , distance := range distances {
112
+ // it'll always equal 10, which means local access.
113
+ if myID == nodeID {
114
+ continue
115
+ }
116
+ if distance < minDistance {
117
+ minDistance = distance
118
+ }
119
+ }
120
+ }
121
+
122
+ return minDistance
123
+ }
124
+
125
+ func detectNUMADistances (numaNodes int ) map [int ][]int {
126
+ nodeToDistances := make (map [int ][]int )
127
+ for i := 0 ; i < numaNodes ; i ++ {
128
+ outData , err := exec .Command ("/bin/sh" , "-c" , fmt .Sprintf ("cat /sys/devices/system/node/node%d/distance" , i )).Output ()
129
+ framework .ExpectNoError (err )
130
+
131
+ nodeToDistances [i ] = make ([]int , 0 , numaNodes )
132
+
133
+ for _ , distance := range strings .Split (strings .TrimSpace (string (outData )), " " ) {
134
+ distanceValue , err := strconv .Atoi (strings .TrimSpace (distance ))
135
+ framework .ExpectNoError (err )
136
+
137
+ nodeToDistances [i ] = append (nodeToDistances [i ], distanceValue )
138
+ }
139
+ }
140
+
141
+ return nodeToDistances
142
+ }
143
+
102
144
func makeContainers (ctnCmd string , ctnAttributes []tmCtnAttribute ) (ctns []v1.Container ) {
103
145
for _ , ctnAttr := range ctnAttributes {
104
146
ctn := v1.Container {
@@ -862,6 +904,68 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
862
904
}
863
905
}
864
906
907
+ func runPreferClosestNUMATestSuite (ctx context.Context , f * framework.Framework , numaNodes int , distances map [int ][]int ) {
908
+ ginkgo .By ("Admit two guaranteed pods. Both consist of 1 containers, each pod asks for cpus from 2 NUMA nodes. CPUs should be assigned from closest NUMA" )
909
+
910
+ podMap := make (map [string ]* v1.Pod )
911
+ for podID := 0 ; podID < 2 ; podID ++ {
912
+ numCores := 0
913
+ for nodeNum := 0 + 2 * podID ; nodeNum <= 1 + 2 * podID ; nodeNum ++ {
914
+ cpus , err := getCPUsPerNUMANode (nodeNum )
915
+ framework .ExpectNoError (err )
916
+ // subtract one to accommodate reservedCPUs. It'll only work if more than 2 cpus per NUMA node.
917
+ cpusPerNUMA := len (cpus )
918
+ if cpusPerNUMA < 3 {
919
+ e2eskipper .Skipf ("Less than 2 cpus per NUMA node on this system. Skipping test." )
920
+ }
921
+ numCores += cpusPerNUMA - 1
922
+ }
923
+ coresReq := fmt .Sprintf ("%dm" , numCores * 1000 )
924
+ ctnAttrs := []tmCtnAttribute {
925
+ {
926
+ ctnName : "ps-container-0" ,
927
+ cpuRequest : coresReq ,
928
+ cpuLimit : coresReq ,
929
+ },
930
+ }
931
+ podName := fmt .Sprintf ("gu-pod-%d" , podID )
932
+ framework .Logf ("creating pod %s attrs %v" , podName , nil )
933
+ pod := makeTopologyManagerTestPod (podName , ctnAttrs , nil )
934
+ pod = e2epod .NewPodClient (f ).CreateSync (ctx , pod )
935
+ framework .Logf ("created pod %s" , podName )
936
+ podMap [podName ] = pod
937
+ }
938
+
939
+ for _ , pod := range podMap {
940
+ for _ , cnt := range pod .Spec .Containers {
941
+ ginkgo .By (fmt .Sprintf ("validating the container %s on Gu pod %s" , cnt .Name , pod .Name ))
942
+
943
+ logs , err := e2epod .GetPodLogs (ctx , f .ClientSet , f .Namespace .Name , pod .Name , cnt .Name )
944
+ framework .ExpectNoError (err , "expected log not found in container [%s] of pod [%s]" , cnt .Name , pod .Name )
945
+
946
+ framework .Logf ("got pod logs: %v" , logs )
947
+ podEnv , err := makeEnvMap (logs )
948
+ framework .ExpectNoError (err , "expected log not found in container [%s] of pod [%s]" , cnt .Name , pod .Name )
949
+
950
+ CPUToNUMANode , err := getCPUToNUMANodeMapFromEnv (f , pod , & cnt , podEnv , numaNodes )
951
+ framework .ExpectNoError (err , "expected log not found in container [%s] of pod [%s]" , cnt .Name , pod .Name )
952
+
953
+ numaUsed := sets .New [int ]()
954
+ for _ , numa := range CPUToNUMANode {
955
+ numaUsed .Insert (numa )
956
+ }
957
+
958
+ numaList := numaUsed .UnsortedList ()
959
+ gomega .Expect (numaList ).To (gomega .HaveLen (2 ))
960
+
961
+ distance := getMinRemoteDistanceForNode (distances )
962
+ gomega .Expect (distance ).To (gomega .Equal (distances [numaList [0 ]][numaList [1 ]]))
963
+ }
964
+ }
965
+
966
+ deletePodsAsync (ctx , f , podMap )
967
+ }
968
+
865
969
func runTopologyManagerTests (f * framework.Framework , topologyOptions map [string ]string ) {
866
970
var oldCfg * kubeletconfig.KubeletConfiguration
867
971
var err error
@@ -939,6 +1043,39 @@ func runTopologyManagerTests(f *framework.Framework, topologyOptions map[string]
939
1043
})
940
1044
}
941
1045
1046
+ func runPreferClosestNUMATests (f * framework.Framework ) {
1047
+ var oldCfg * kubeletconfig.KubeletConfiguration
1048
+ var err error
1049
+
1050
+ ginkgo .It ("run the Topology Manager prefer-closest-numa policy option test suite" , func (ctx context.Context ) {
1051
+ numaNodes := detectNUMANodes ()
1052
+ if numaNodes < minNumaNodesPreferClosestNUMA {
1053
+ e2eskipper .Skipf ("this test is intended to be run on at least 4 NUMA node system" )
1054
+ }
1055
+
1056
+ numaDistances := detectNUMADistances (numaNodes )
1057
+
1058
+ oldCfg , err = getCurrentKubeletConfig (ctx )
1059
+ framework .ExpectNoError (err )
1060
+
1061
+ policy := topologymanager .PolicyBestEffort
1062
+ scope := containerScopeTopology
1063
+ options := map [string ]string {topologymanager .PreferClosestNUMANodes : "true" }
1064
+
1065
+ newCfg , _ := configureTopologyManagerInKubelet (oldCfg , policy , scope , options , & v1.ConfigMap {}, numaNodes )
1066
+ updateKubeletConfig (ctx , f , newCfg , true )
1067
+
1068
+ runPreferClosestNUMATestSuite (ctx , f , numaNodes , numaDistances )
1069
+ })
1070
+
1071
+ ginkgo .AfterEach (func (ctx context.Context ) {
1072
+ if oldCfg != nil {
1073
+ // restore kubelet config
1074
+ updateKubeletConfig (ctx , f , oldCfg , true )
1075
+ }
1076
+ })
1077
+ }
1078
+
942
1079
func hostPrecheck () (int , int ) {
943
1080
// this is a very rough check. We just want to rule out system that does NOT have
944
1081
// any SRIOV device. A more proper check will be done in runTopologyManagerPositiveTest
@@ -973,4 +1110,7 @@ var _ = SIGDescribe("Topology Manager", framework.WithSerial(), feature.Topology
973
1110
doubleDefaultMaxAllowableNUMANodes := strconv .Itoa (8 * 2 )
974
1111
runTopologyManagerTests (f , map [string ]string {topologymanager .MaxAllowableNUMANodes : doubleDefaultMaxAllowableNUMANodes })
975
1112
})
1113
+ ginkgo .Context ("With kubeconfig's prefer-closes-numa-nodes topologyOptions enabled run the Topology Manager tests" , ginkgo .Label ("PreferClosestNUMANodes" ), func () {
1114
+ runPreferClosestNUMATests (f )
1115
+ })
976
1116
})
0 commit comments