@@ -31,6 +31,7 @@ import (
31
31
"k8s.io/apimachinery/pkg/api/resource"
32
32
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
33
33
"k8s.io/apimachinery/pkg/runtime"
34
+ "k8s.io/apimachinery/pkg/util/sets"
34
35
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
35
36
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
36
37
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
@@ -54,9 +55,10 @@ const (
54
55
podScopeTopology = "pod"
55
56
containerScopeTopology = "container"
56
57
57
- minNumaNodes = 2
58
- minCoreCount = 4
59
- minSriovResource = 7 // This is the min number of SRIOV VFs needed on the system under test.
58
+ minNumaNodes = 2
59
+ minNumaNodesPreferClosestNUMA = 4
60
+ minCoreCount = 4
61
+ minSriovResource = 7 // This is the min number of SRIOV VFs needed on the system under test.
60
62
)
61
63
62
64
// Helper for makeTopologyManagerPod().
@@ -99,6 +101,48 @@ func detectThreadPerCore() int {
99
101
return threadCount
100
102
}
101
103
104
+ // for now we only look for pairs of nodes with minimum distance, we also assume that distance table is symmetric.
105
+ func getMinRemoteDistanceForNode (nodeToDistances map [int ][]int ) int {
106
+ var minDistance int = 99
107
+ for myID , distances := range nodeToDistances {
108
+ if len (distances ) == 0 {
109
+ continue
110
+ }
111
+ for nodeID , distance := range distances {
112
+ // it'll always equal 10, which means local access.
113
+ if myID == nodeID {
114
+ continue
115
+ }
116
+ if distance < minDistance {
117
+ minDistance = distance
118
+ }
119
+ }
120
+ }
121
+
122
+ return minDistance
123
+ }
124
+
125
+ func detectNUMADistances (numaNodes int ) map [int ][]int {
126
+ ginkgo .GinkgoHelper ()
127
+
128
+ nodeToDistances := make (map [int ][]int )
129
+ for i := 0 ; i < numaNodes ; i ++ {
130
+ outData , err := os .ReadFile (fmt .Sprintf ("/sys/devices/system/node/node%d/distance" , i ))
131
+ framework .ExpectNoError (err )
132
+
133
+ nodeToDistances [i ] = make ([]int , 0 , numaNodes )
134
+
135
+ for _ , distance := range strings .Split (strings .TrimSpace (string (outData )), " " ) {
136
+ distanceValue , err := strconv .Atoi (strings .TrimSpace (distance ))
137
+ framework .ExpectNoError (err )
138
+
139
+ nodeToDistances [i ] = append (nodeToDistances [i ], distanceValue )
140
+ }
141
+ }
142
+
143
+ return nodeToDistances
144
+ }
145
+
102
146
func makeContainers (ctnCmd string , ctnAttributes []tmCtnAttribute ) (ctns []v1.Container ) {
103
147
for _ , ctnAttr := range ctnAttributes {
104
148
ctn := v1.Container {
@@ -862,6 +906,153 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
862
906
}
863
907
}
864
908
909
+ func runPreferClosestNUMATestSuite (ctx context.Context , f * framework.Framework , numaNodes int , distances map [int ][]int ) {
910
+ runPreferClosestNUMAOptimalAllocationTest (ctx , f , numaNodes , distances )
911
+ runPreferClosestNUMASubOptimalAllocationTest (ctx , f , numaNodes , distances )
912
+ }
913
+
914
+ func runPreferClosestNUMAOptimalAllocationTest (ctx context.Context , f * framework.Framework , numaNodes int , distances map [int ][]int ) {
915
+ ginkgo .By ("Admit two guaranteed pods. Both consist of 1 containers, each pod asks for cpus from 2 NUMA nodes. CPUs should be assigned from closest NUMA" )
916
+ podMap := make (map [string ]* v1.Pod )
917
+ for podID := 0 ; podID < 2 ; podID ++ {
918
+ numCores := 0
919
+ for nodeNum := 0 + 2 * podID ; nodeNum <= 1 + 2 * podID ; nodeNum ++ {
920
+ cpus , err := getCPUsPerNUMANode (nodeNum )
921
+ framework .ExpectNoError (err )
922
+ // subtract one to accommodate reservedCPUs. It'll only work if more than 2 cpus per NUMA node.
923
+ cpusPerNUMA := len (cpus )
924
+ if cpusPerNUMA < 3 {
925
+ e2eskipper .Skipf ("Less than 3 cpus per NUMA node on this system. Skipping test." )
926
+ }
927
+ numCores += cpusPerNUMA - 1
928
+ }
929
+ coresReq := fmt .Sprintf ("%dm" , numCores * 1000 )
930
+ ctnAttrs := []tmCtnAttribute {
931
+ {
932
+ ctnName : "ps-container-0" ,
933
+ cpuRequest : coresReq ,
934
+ cpuLimit : coresReq ,
935
+ },
936
+ }
937
+ podName := fmt .Sprintf ("gu-pod-%d" , podID )
938
+ framework .Logf ("creating pod %s attrs %v" , podName , nil )
939
+ pod := makeTopologyManagerTestPod (podName , ctnAttrs , nil )
940
+ pod = e2epod .NewPodClient (f ).CreateSync (ctx , pod )
941
+ framework .Logf ("created pod %s" , podName )
942
+ podMap [podName ] = pod
943
+ }
944
+
945
+ valiidatePreferClosestNUMAOptimalAllocation (ctx , f , podMap , numaNodes , distances )
946
+
947
+ deletePodsAsync (ctx , f , podMap )
948
+ }
949
+
950
+ func runPreferClosestNUMASubOptimalAllocationTest (ctx context.Context , f * framework.Framework , numaNodes int , distances map [int ][]int ) {
951
+ ginkgo .By ("Admit two guaranteed pods. Both consist of 1 containers, each pod asks for cpus from 2 NUMA nodes. CPUs should be assigned from closest NUMA" )
952
+ cntName := "ps-container-0"
953
+
954
+ // expect same amount of cpus per NUMA
955
+ cpusPerNUMA , err := getCPUsPerNUMANode (0 )
956
+ framework .ExpectNoError (err )
957
+ if len (cpusPerNUMA ) < 5 {
958
+ e2eskipper .Skipf ("Less than 5 cpus per NUMA node on this system. Skipping test." )
959
+ }
960
+ podMap := make (map [string ]* v1.Pod )
961
+ for podID := 0 ; podID < 2 ; podID ++ {
962
+ // asks for all but one cpus from one less than half NUMA nodes, and half from the other
963
+ // plus add one less than half NUMA nodes, to accommodate for reserved cpus
964
+ numCores := ((numaNodes / 2 )- 1 )* (len (cpusPerNUMA )- 1 ) + (len (cpusPerNUMA ) / 2 ) + (numaNodes / 2 - 1 )
965
+ framework .ExpectNoError (err )
966
+
967
+ coresReq := fmt .Sprintf ("%dm" , numCores * 1000 )
968
+ ctnAttrs := []tmCtnAttribute {
969
+ {
970
+ ctnName : "ps-container-0" ,
971
+ cpuRequest : coresReq ,
972
+ cpuLimit : coresReq ,
973
+ },
974
+ }
975
+ podName := fmt .Sprintf ("gu-pod-%d" , podID )
976
+ framework .Logf ("creating pod %s" , podName )
977
+ pod := makeTopologyManagerTestPod (podName , ctnAttrs , nil )
978
+ pod = e2epod .NewPodClient (f ).CreateSync (ctx , pod )
979
+ framework .Logf ("created pod %s" , podName )
980
+ podMap [podName ] = pod
981
+ }
982
+
983
+ valiidatePreferClosestNUMAOptimalAllocation (ctx , f , podMap , numaNodes , distances )
984
+
985
+ ginkgo .By ("Admit one guaranteed pod. Asks for cpus from 2 NUMA nodes. CPUs should be assigned from non closest NUMA" )
986
+ // ask for remaining cpus, it should only fit on sub-optimal NUMA placement.
987
+ coresReq := fmt .Sprintf ("%dm" , 2 * (len (cpusPerNUMA )/ 2 )* 1000 )
988
+ ctnAttrs := []tmCtnAttribute {
989
+ {
990
+ ctnName : cntName ,
991
+ cpuRequest : coresReq ,
992
+ cpuLimit : coresReq ,
993
+ },
994
+ }
995
+ podName := "gu-pod-2"
996
+ framework .Logf ("creating pod %s attrs %v" , podName , nil )
997
+ pod := makeTopologyManagerTestPod (podName , ctnAttrs , nil )
998
+ pod = e2epod .NewPodClient (f ).CreateSync (ctx , pod )
999
+ framework .Logf ("created pod %s" , podName )
1000
+
1001
+ ginkgo .By (fmt .Sprintf ("validating the container %s on Gu pod %s" , cntName , pod .Name ))
1002
+
1003
+ logs , err := e2epod .GetPodLogs (ctx , f .ClientSet , f .Namespace .Name , pod .Name , cntName )
1004
+ framework .ExpectNoError (err , "expected log not found in container [%s] of pod [%s]" , cntName , pod .Name )
1005
+
1006
+ framework .Logf ("got pod logs: %v" , logs )
1007
+ podEnv , err := makeEnvMap (logs )
1008
+ framework .ExpectNoError (err , "expected log not found in container [%s] of pod [%s]" , cntName , pod .Name )
1009
+
1010
+ CPUToNUMANode , err := getCPUToNUMANodeMapFromEnv (f , pod , & pod .Spec .Containers [0 ], podEnv , numaNodes )
1011
+ framework .ExpectNoError (err , "expected log not found in container [%s] of pod [%s]" , cntName , pod .Name )
1012
+
1013
+ numaUsed := sets .New [int ]()
1014
+ for _ , numa := range CPUToNUMANode {
1015
+ numaUsed .Insert (numa )
1016
+ }
1017
+
1018
+ numaList := numaUsed .UnsortedList ()
1019
+ gomega .Expect (numaList ).To (gomega .HaveLen (2 ))
1020
+
1021
+ distance := getMinRemoteDistanceForNode (distances )
1022
+ gomega .Expect (distance ).NotTo (gomega .Equal (distances [numaList [0 ]][numaList [1 ]]))
1023
+
1024
+ deletePodsAsync (ctx , f , podMap )
1025
+ }
1026
+
1027
+ func valiidatePreferClosestNUMAOptimalAllocation (ctx context.Context , f * framework.Framework , podMap map [string ]* v1.Pod , numaNodes int , distances map [int ][]int ) {
1028
+ for _ , pod := range podMap {
1029
+ for _ , cnt := range pod .Spec .Containers {
1030
+ ginkgo .By (fmt .Sprintf ("validating the container %s on Gu pod %s" , cnt .Name , pod .Name ))
1031
+
1032
+ logs , err := e2epod .GetPodLogs (ctx , f .ClientSet , f .Namespace .Name , pod .Name , cnt .Name )
1033
+ framework .ExpectNoError (err , "expected log not found in container [%s] of pod [%s]" , cnt .Name , pod .Name )
1034
+
1035
+ framework .Logf ("got pod logs: %v" , logs )
1036
+ podEnv , err := makeEnvMap (logs )
1037
+ framework .ExpectNoError (err , "expected log not found in container [%s] of pod [%s]" , cnt .Name , pod .Name )
1038
+
1039
+ CPUToNUMANode , err := getCPUToNUMANodeMapFromEnv (f , pod , & cnt , podEnv , numaNodes )
1040
+ framework .ExpectNoError (err , "expected log not found in container [%s] of pod [%s]" , cnt .Name , pod .Name )
1041
+
1042
+ numaUsed := sets .New [int ]()
1043
+ for _ , numa := range CPUToNUMANode {
1044
+ numaUsed .Insert (numa )
1045
+ }
1046
+
1047
+ numaList := numaUsed .UnsortedList ()
1048
+ gomega .Expect (numaList ).To (gomega .HaveLen (2 ))
1049
+
1050
+ distance := getMinRemoteDistanceForNode (distances )
1051
+ gomega .Expect (distance ).To (gomega .Equal (distances [numaList [0 ]][numaList [1 ]]))
1052
+ }
1053
+ }
1054
+ }
1055
+
865
1056
func runTopologyManagerTests (f * framework.Framework , topologyOptions map [string ]string ) {
866
1057
var oldCfg * kubeletconfig.KubeletConfiguration
867
1058
var err error
@@ -939,6 +1130,39 @@ func runTopologyManagerTests(f *framework.Framework, topologyOptions map[string]
939
1130
})
940
1131
}
941
1132
1133
+ func runPreferClosestNUMATests (f * framework.Framework ) {
1134
+ var oldCfg * kubeletconfig.KubeletConfiguration
1135
+ var err error
1136
+
1137
+ ginkgo .It ("run the Topology Manager prefer-closest-numa policy option test suite" , func (ctx context.Context ) {
1138
+ numaNodes := detectNUMANodes ()
1139
+ if numaNodes < minNumaNodesPreferClosestNUMA {
1140
+ e2eskipper .Skipf ("this test is intended to be run on at least 4 NUMA node system" )
1141
+ }
1142
+
1143
+ numaDistances := detectNUMADistances (numaNodes )
1144
+
1145
+ oldCfg , err = getCurrentKubeletConfig (ctx )
1146
+ framework .ExpectNoError (err )
1147
+
1148
+ policy := topologymanager .PolicyBestEffort
1149
+ scope := containerScopeTopology
1150
+ options := map [string ]string {topologymanager .PreferClosestNUMANodes : "true" }
1151
+
1152
+ newCfg , _ := configureTopologyManagerInKubelet (oldCfg , policy , scope , options , & v1.ConfigMap {}, numaNodes )
1153
+ updateKubeletConfig (ctx , f , newCfg , true )
1154
+
1155
+ runPreferClosestNUMATestSuite (ctx , f , numaNodes , numaDistances )
1156
+ })
1157
+
1158
+ ginkgo .AfterEach (func (ctx context.Context ) {
1159
+ if oldCfg != nil {
1160
+ // restore kubelet config
1161
+ updateKubeletConfig (ctx , f , oldCfg , true )
1162
+ }
1163
+ })
1164
+ }
1165
+
942
1166
func hostPrecheck () (int , int ) {
943
1167
// this is a very rough check. We just want to rule out system that does NOT have
944
1168
// any SRIOV device. A more proper check will be done in runTopologyManagerPositiveTest
@@ -973,4 +1197,7 @@ var _ = SIGDescribe("Topology Manager", framework.WithSerial(), feature.Topology
973
1197
doubleDefaultMaxAllowableNUMANodes := strconv .Itoa (8 * 2 )
974
1198
runTopologyManagerTests (f , map [string ]string {topologymanager .MaxAllowableNUMANodes : doubleDefaultMaxAllowableNUMANodes })
975
1199
})
1200
+ ginkgo .Context ("With kubeconfig's prefer-closes-numa-nodes topologyOptions enabled run the Topology Manager tests" , ginkgo .Label ("PreferClosestNUMANodes" ), func () {
1201
+ runPreferClosestNUMATests (f )
1202
+ })
976
1203
})
0 commit comments