Skip to content

Commit 9bc3a2f

Browse files
committed
add e2e tests for prefer-closest-numa-nodes TopologyManagerPolicyOption
Signed-off-by: PiotrProkop <[email protected]>
1 parent 4aeaf1e commit 9bc3a2f

File tree

1 file changed

+143
-3
lines changed

1 file changed

+143
-3
lines changed

test/e2e_node/topology_manager_test.go

Lines changed: 143 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"k8s.io/apimachinery/pkg/api/resource"
3232
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3333
"k8s.io/apimachinery/pkg/runtime"
34+
"k8s.io/apimachinery/pkg/util/sets"
3435
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
3536
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
3637
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
@@ -54,9 +55,10 @@ const (
5455
podScopeTopology = "pod"
5556
containerScopeTopology = "container"
5657

57-
minNumaNodes = 2
58-
minCoreCount = 4
59-
minSriovResource = 7 // This is the min number of SRIOV VFs needed on the system under test.
58+
minNumaNodes = 2
59+
minNumaNodesPreferClosestNUMA = 4
60+
minCoreCount = 4
61+
minSriovResource = 7 // This is the min number of SRIOV VFs needed on the system under test.
6062
)
6163

6264
// Helper for makeTopologyManagerPod().
@@ -99,6 +101,46 @@ func detectThreadPerCore() int {
99101
return threadCount
100102
}
101103

104+
// for now we only look for pairs of nodes with minimum distance, we also assume that distance table is symmetric.
105+
func getMinRemoteDistanceForNode(nodeToDistances map[int][]int) int {
106+
var minDistance int = 99
107+
for myID, distances := range nodeToDistances {
108+
if len(distances) == 0 {
109+
continue
110+
}
111+
for nodeID, distance := range distances {
112+
// it'll always equal 10, which means local access.
113+
if myID == nodeID {
114+
continue
115+
}
116+
if distance < minDistance {
117+
minDistance = distance
118+
}
119+
}
120+
}
121+
122+
return minDistance
123+
}
124+
125+
func detectNUMADistances(numaNodes int) map[int][]int {
126+
nodeToDistances := make(map[int][]int)
127+
for i := 0; i < numaNodes; i++ {
128+
outData, err := exec.Command("/bin/sh", "-c", fmt.Sprintf("cat /sys/devices/system/node/node%d/distance", i)).Output()
129+
framework.ExpectNoError(err)
130+
131+
nodeToDistances[i] = make([]int, 0, numaNodes)
132+
133+
for _, distance := range strings.Split(strings.TrimSpace(string(outData)), " ") {
134+
distanceValue, err := strconv.Atoi(strings.TrimSpace(distance))
135+
framework.ExpectNoError(err)
136+
137+
nodeToDistances[i] = append(nodeToDistances[i], distanceValue)
138+
}
139+
}
140+
141+
return nodeToDistances
142+
}
143+
102144
func makeContainers(ctnCmd string, ctnAttributes []tmCtnAttribute) (ctns []v1.Container) {
103145
for _, ctnAttr := range ctnAttributes {
104146
ctn := v1.Container{
@@ -862,6 +904,68 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
862904
}
863905
}
864906

907+
func runPreferClosestNUMATestSuite(ctx context.Context, f *framework.Framework, numaNodes int, distances map[int][]int) {
908+
ginkgo.By("Admit two guaranteed pods. Both consist of 1 containers, each pod asks for cpus from 2 NUMA nodes. CPUs should be assigned from closest NUMA")
909+
910+
podMap := make(map[string]*v1.Pod)
911+
for podID := 0; podID < 2; podID++ {
912+
numCores := 0
913+
for nodeNum := 0 + 2*podID; nodeNum <= 1+2*podID; nodeNum++ {
914+
cpus, err := getCPUsPerNUMANode(nodeNum)
915+
framework.ExpectNoError(err)
916+
// subtract one to accommodate reservedCPUs. It'll only work if more than 2 cpus per NUMA node.
917+
cpusPerNUMA := len(cpus)
918+
if cpusPerNUMA < 3 {
919+
e2eskipper.Skipf("Less than 2 cpus per NUMA node on this system. Skipping test.")
920+
}
921+
numCores += cpusPerNUMA - 1
922+
}
923+
coresReq := fmt.Sprintf("%dm", numCores*1000)
924+
ctnAttrs := []tmCtnAttribute{
925+
{
926+
ctnName: "ps-container-0",
927+
cpuRequest: coresReq,
928+
cpuLimit: coresReq,
929+
},
930+
}
931+
podName := fmt.Sprintf("gu-pod-%d", podID)
932+
framework.Logf("creating pod %s attrs %v", podName, nil)
933+
pod := makeTopologyManagerTestPod(podName, ctnAttrs, nil)
934+
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
935+
framework.Logf("created pod %s", podName)
936+
podMap[podName] = pod
937+
}
938+
939+
for _, pod := range podMap {
940+
for _, cnt := range pod.Spec.Containers {
941+
ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name))
942+
943+
logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name)
944+
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name)
945+
946+
framework.Logf("got pod logs: %v", logs)
947+
podEnv, err := makeEnvMap(logs)
948+
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name)
949+
950+
CPUToNUMANode, err := getCPUToNUMANodeMapFromEnv(f, pod, &cnt, podEnv, numaNodes)
951+
framework.ExpectNoError(err, "expected log not found in container [%s] of pod [%s]", cnt.Name, pod.Name)
952+
953+
numaUsed := sets.New[int]()
954+
for _, numa := range CPUToNUMANode {
955+
numaUsed.Insert(numa)
956+
}
957+
958+
numaList := numaUsed.UnsortedList()
959+
gomega.Expect(numaList).To(gomega.HaveLen(2))
960+
961+
distance := getMinRemoteDistanceForNode(distances)
962+
gomega.Expect(distance).To(gomega.Equal(distances[numaList[0]][numaList[1]]))
963+
}
964+
}
965+
966+
deletePodsAsync(ctx, f, podMap)
967+
}
968+
865969
func runTopologyManagerTests(f *framework.Framework, topologyOptions map[string]string) {
866970
var oldCfg *kubeletconfig.KubeletConfiguration
867971
var err error
@@ -939,6 +1043,39 @@ func runTopologyManagerTests(f *framework.Framework, topologyOptions map[string]
9391043
})
9401044
}
9411045

1046+
func runPreferClosestNUMATests(f *framework.Framework) {
1047+
var oldCfg *kubeletconfig.KubeletConfiguration
1048+
var err error
1049+
1050+
ginkgo.It("run the Topology Manager prefer-closest-numa policy option test suite", func(ctx context.Context) {
1051+
numaNodes := detectNUMANodes()
1052+
if numaNodes < minNumaNodesPreferClosestNUMA {
1053+
e2eskipper.Skipf("this test is intended to be run on at least 4 NUMA node system")
1054+
}
1055+
1056+
numaDistances := detectNUMADistances(numaNodes)
1057+
1058+
oldCfg, err = getCurrentKubeletConfig(ctx)
1059+
framework.ExpectNoError(err)
1060+
1061+
policy := topologymanager.PolicyBestEffort
1062+
scope := containerScopeTopology
1063+
options := map[string]string{topologymanager.PreferClosestNUMANodes: "true"}
1064+
1065+
newCfg, _ := configureTopologyManagerInKubelet(oldCfg, policy, scope, options, &v1.ConfigMap{}, numaNodes)
1066+
updateKubeletConfig(ctx, f, newCfg, true)
1067+
1068+
runPreferClosestNUMATestSuite(ctx, f, numaNodes, numaDistances)
1069+
})
1070+
1071+
ginkgo.AfterEach(func(ctx context.Context) {
1072+
if oldCfg != nil {
1073+
// restore kubelet config
1074+
updateKubeletConfig(ctx, f, oldCfg, true)
1075+
}
1076+
})
1077+
}
1078+
9421079
func hostPrecheck() (int, int) {
9431080
// this is a very rough check. We just want to rule out system that does NOT have
9441081
// any SRIOV device. A more proper check will be done in runTopologyManagerPositiveTest
@@ -973,4 +1110,7 @@ var _ = SIGDescribe("Topology Manager", framework.WithSerial(), feature.Topology
9731110
doubleDefaultMaxAllowableNUMANodes := strconv.Itoa(8 * 2)
9741111
runTopologyManagerTests(f, map[string]string{topologymanager.MaxAllowableNUMANodes: doubleDefaultMaxAllowableNUMANodes})
9751112
})
1113+
ginkgo.Context("With kubeconfig's prefer-closes-numa-nodes topologyOptions enabled run the Topology Manager tests", ginkgo.Label("PreferClosestNUMANodes"), func() {
1114+
runPreferClosestNUMATests(f)
1115+
})
9761116
})

0 commit comments

Comments
 (0)