Skip to content

Commit 88d2355

Browse files
authored
Merge pull request kubernetes#129951 from parkjeongryul/add-e2e-topology-manager-for-init-ctn
Add e2e test for topology manager with restartable init containers
2 parents 803e9d6 + dca3f56 commit 88d2355

File tree

1 file changed

+250
-4
lines changed

1 file changed

+250
-4
lines changed

test/e2e_node/topology_manager_test.go

Lines changed: 250 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ type tmCtnAttribute struct {
6969
deviceName string
7070
deviceRequest string
7171
deviceLimit string
72+
restartPolicy *v1.ContainerRestartPolicy
7273
}
7374

7475
func detectNUMANodes() int {
@@ -158,7 +159,8 @@ func makeContainers(ctnCmd string, ctnAttributes []tmCtnAttribute) (ctns []v1.Co
158159
v1.ResourceName(v1.ResourceMemory): resource.MustParse("100Mi"),
159160
},
160161
},
161-
Command: []string{"sh", "-c", ctnCmd},
162+
Command: []string{"sh", "-c", ctnCmd},
163+
RestartPolicy: ctnAttr.restartPolicy,
162164
}
163165
if ctnAttr.deviceName != "" {
164166
ctn.Resources.Requests[v1.ResourceName(ctnAttr.deviceName)] = resource.MustParse(ctnAttr.deviceRequest)
@@ -171,8 +173,12 @@ func makeContainers(ctnCmd string, ctnAttributes []tmCtnAttribute) (ctns []v1.Co
171173

172174
func makeTopologyManagerTestPod(podName string, tmCtnAttributes, tmInitCtnAttributes []tmCtnAttribute) *v1.Pod {
173175
var containers, initContainers []v1.Container
174-
if len(tmInitCtnAttributes) > 0 {
175-
initContainers = makeContainers(numaAlignmentCommand, tmInitCtnAttributes)
176+
for _, attr := range tmInitCtnAttributes {
177+
cmd := numaAlignmentCommand
178+
if attr.restartPolicy != nil && *attr.restartPolicy == v1.ContainerRestartPolicyAlways {
179+
cmd = numaAlignmentSleepCommand
180+
}
181+
initContainers = append(initContainers, makeContainers(cmd, []tmCtnAttribute{attr})...)
176182
}
177183
containers = makeContainers(numaAlignmentSleepCommand, tmCtnAttributes)
178184

@@ -346,6 +352,25 @@ func findSRIOVResource(node *v1.Node) (string, int64) {
346352
}
347353

348354
func validatePodAlignment(ctx context.Context, f *framework.Framework, pod *v1.Pod, envInfo *testEnvInfo) {
355+
for _, cnt := range pod.Spec.InitContainers {
356+
// only check restartable init containers, skip regular init containers
357+
if cnt.RestartPolicy == nil || *cnt.RestartPolicy != v1.ContainerRestartPolicyAlways {
358+
continue
359+
}
360+
361+
ginkgo.By(fmt.Sprintf("validating the init container %s on Gu pod %s", cnt.Name, pod.Name))
362+
363+
logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name)
364+
framework.ExpectNoError(err, "expected log not found in init container [%s] of pod [%s]", cnt.Name, pod.Name)
365+
366+
framework.Logf("got init container logs: %v", logs)
367+
numaRes, err := checkNUMAAlignment(f, pod, &cnt, logs, envInfo)
368+
framework.ExpectNoError(err, "NUMA Alignment check failed for init container [%s] of pod [%s]", cnt.Name, pod.Name)
369+
if numaRes != nil {
370+
framework.Logf("NUMA resources for init container %s/%s: %s", pod.Name, cnt.Name, numaRes.String())
371+
}
372+
}
373+
349374
for _, cnt := range pod.Spec.Containers {
350375
ginkgo.By(fmt.Sprintf("validating the container %s on Gu pod %s", cnt.Name, pod.Name))
351376

@@ -367,6 +392,23 @@ func validatePodAlignmentWithPodScope(ctx context.Context, f *framework.Framewor
367392
podsNUMA := make(map[int]int)
368393

369394
ginkgo.By(fmt.Sprintf("validate pod scope alignment for %s pod", pod.Name))
395+
for _, cnt := range pod.Spec.InitContainers {
396+
// only check restartable init containers, skip regular init containers
397+
if cnt.RestartPolicy == nil || *cnt.RestartPolicy != v1.ContainerRestartPolicyAlways {
398+
continue
399+
}
400+
401+
logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name)
402+
framework.ExpectNoError(err, "NUMA alignment failed for init container [%s] of pod [%s]", cnt.Name, pod.Name)
403+
envMap, err := makeEnvMap(logs)
404+
framework.ExpectNoError(err, "NUMA alignment failed for init container [%s] of pod [%s]", cnt.Name, pod.Name)
405+
cpuToNUMA, err := getCPUToNUMANodeMapFromEnv(f, pod, &cnt, envMap, envInfo.numaNodes)
406+
framework.ExpectNoError(err, "NUMA alignment failed for init container [%s] of pod [%s]", cnt.Name, pod.Name)
407+
for cpuID, numaID := range cpuToNUMA {
408+
podsNUMA[cpuID] = numaID
409+
}
410+
}
411+
370412
for _, cnt := range pod.Spec.Containers {
371413
logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, f.Namespace.Name, pod.Name, cnt.Name)
372414
framework.ExpectNoError(err, "NUMA alignment failed for container [%s] of pod [%s]", cnt.Name, pod.Name)
@@ -440,7 +482,7 @@ func runTopologyManagerPositiveTest(ctx context.Context, f *framework.Framework,
440482
}
441483

442484
// per https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/693-topology-manager/README.md#multi-numa-systems-tests
443-
// we can do a menaingful validation only when using the single-numa node policy
485+
// we can do a meaningful validation only when using the single-numa node policy
444486
if envInfo.policy == topologymanager.PolicySingleNumaNode {
445487
for _, pod := range podMap {
446488
validatePodAlignment(ctx, f, pod, envInfo)
@@ -733,6 +775,94 @@ func runTMScopeResourceAlignmentTestSuite(ctx context.Context, f *framework.Fram
733775
}
734776
runTopologyManagerPositiveTest(ctx, f, 2, ctnAttrs, initCtnAttrs, envInfo)
735777

778+
ginkgo.By(fmt.Sprintf("Admit one guaranteed pod with restartable init container, 1 core and 1 %s device", sd.resourceName))
779+
initCtnAttrs = []tmCtnAttribute{
780+
{
781+
ctnName: "restartable-init-container",
782+
cpuRequest: "1000m",
783+
cpuLimit: "1000m",
784+
deviceName: sd.resourceName,
785+
deviceRequest: "1",
786+
deviceLimit: "1",
787+
restartPolicy: &containerRestartPolicyAlways,
788+
},
789+
}
790+
ctnAttrs = []tmCtnAttribute{
791+
{
792+
ctnName: "gu-container",
793+
cpuRequest: "1000m",
794+
cpuLimit: "1000m",
795+
deviceName: sd.resourceName,
796+
deviceRequest: "1",
797+
deviceLimit: "1",
798+
},
799+
}
800+
runTopologyManagerPositiveTest(ctx, f, 1, ctnAttrs, initCtnAttrs, envInfo)
801+
802+
ginkgo.By(fmt.Sprintf("Admit one guaranteed pod with multiple restartable init containers, each container with 1 CPU core. Use 1 %s device", sd.resourceName))
803+
initCtnAttrs = []tmCtnAttribute{
804+
{
805+
ctnName: "restartable-init-container-1",
806+
cpuRequest: "1000m",
807+
cpuLimit: "1000m",
808+
deviceName: sd.resourceName,
809+
deviceRequest: "1",
810+
deviceLimit: "1",
811+
restartPolicy: &containerRestartPolicyAlways,
812+
},
813+
{
814+
ctnName: "restartable-init-container-2",
815+
cpuRequest: "1000m",
816+
cpuLimit: "1000m",
817+
deviceName: sd.resourceName,
818+
deviceRequest: "1",
819+
deviceLimit: "1",
820+
restartPolicy: &containerRestartPolicyAlways,
821+
},
822+
}
823+
ctnAttrs = []tmCtnAttribute{
824+
{
825+
ctnName: "gu-container",
826+
cpuRequest: "1000m",
827+
cpuLimit: "1000m",
828+
deviceName: sd.resourceName,
829+
deviceRequest: "1",
830+
deviceLimit: "1",
831+
},
832+
}
833+
runTopologyManagerPositiveTest(ctx, f, 1, ctnAttrs, initCtnAttrs, envInfo)
834+
835+
coresReq = fmt.Sprintf("%dm", (numCores/2+1)*1000)
836+
ginkgo.By(fmt.Sprintf("Trying to admin guaranteed pod with two restartable init containers where sum of their CPU requests (%d cores) exceeds NUMA capacity. The request should be rejected", (numCores/2+1)*2))
837+
initCtnAttrs = []tmCtnAttribute{
838+
{
839+
ctnName: "restartable-init-container-1",
840+
cpuRequest: coresReq,
841+
cpuLimit: coresReq,
842+
deviceRequest: "1",
843+
deviceLimit: "1",
844+
restartPolicy: &containerRestartPolicyAlways,
845+
},
846+
{
847+
ctnName: "restartable-init-container-2",
848+
cpuRequest: coresReq,
849+
cpuLimit: coresReq,
850+
deviceRequest: "1",
851+
deviceLimit: "1",
852+
restartPolicy: &containerRestartPolicyAlways,
853+
},
854+
}
855+
ctnAttrs = []tmCtnAttribute{
856+
{
857+
ctnName: "gu-container",
858+
cpuRequest: "1000m",
859+
cpuLimit: "1000m",
860+
deviceRequest: "1",
861+
deviceLimit: "1",
862+
},
863+
}
864+
runTopologyManagerNegativeTest(ctx, f, ctnAttrs, initCtnAttrs, envInfo)
865+
736866
teardownSRIOVConfigOrFail(ctx, f, sd)
737867
}
738868

@@ -825,6 +955,30 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
825955
}
826956
runTopologyManagerPositiveTest(ctx, f, 2, ctnAttrs, initCtnAttrs, envInfo)
827957

958+
ginkgo.By(fmt.Sprintf("Successfully admit one guaranteed pod with restartable init container - each with 1 core, 1 %s device", sd.resourceName))
959+
initCtnAttrs = []tmCtnAttribute{
960+
{
961+
ctnName: "restartable-init-container",
962+
cpuRequest: "1000m",
963+
cpuLimit: "1000m",
964+
deviceName: sd.resourceName,
965+
deviceRequest: "1",
966+
deviceLimit: "1",
967+
restartPolicy: &containerRestartPolicyAlways,
968+
},
969+
}
970+
ctnAttrs = []tmCtnAttribute{
971+
{
972+
ctnName: "gu-container",
973+
cpuRequest: "1000m",
974+
cpuLimit: "1000m",
975+
deviceName: sd.resourceName,
976+
deviceRequest: "1",
977+
deviceLimit: "1",
978+
},
979+
}
980+
runTopologyManagerPositiveTest(ctx, f, 1, ctnAttrs, initCtnAttrs, envInfo)
981+
828982
// testing more complex conditions require knowledge about the system cpu+bus topology
829983
}
830984

@@ -889,6 +1043,39 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
8891043
},
8901044
}
8911045
runTopologyManagerPositiveTest(ctx, f, 2, ctnAttrs, initCtnAttrs, envInfo)
1046+
1047+
ginkgo.By(fmt.Sprintf("Successfully admit pod with multiple restartable init containers, each with 1 core, 1 %s device", sd.resourceName))
1048+
initCtnAttrs = []tmCtnAttribute{
1049+
{
1050+
ctnName: "restartable-init-container-1",
1051+
cpuRequest: "1000m",
1052+
cpuLimit: "1000m",
1053+
deviceName: sd.resourceName,
1054+
deviceRequest: "1",
1055+
deviceLimit: "1",
1056+
restartPolicy: &containerRestartPolicyAlways,
1057+
},
1058+
{
1059+
ctnName: "restartable-init-container-2",
1060+
cpuRequest: "1000m",
1061+
cpuLimit: "1000m",
1062+
deviceName: sd.resourceName,
1063+
deviceRequest: "1",
1064+
deviceLimit: "1",
1065+
restartPolicy: &containerRestartPolicyAlways,
1066+
},
1067+
}
1068+
ctnAttrs = []tmCtnAttribute{
1069+
{
1070+
ctnName: "gu-container",
1071+
cpuRequest: "1000m",
1072+
cpuLimit: "1000m",
1073+
deviceName: sd.resourceName,
1074+
deviceRequest: "1",
1075+
deviceLimit: "1",
1076+
},
1077+
}
1078+
runTopologyManagerPositiveTest(ctx, f, 1, ctnAttrs, initCtnAttrs, envInfo)
8921079
}
8931080

8941081
// this is the only policy that can guarantee reliable rejects
@@ -908,6 +1095,65 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
9081095
},
9091096
}
9101097
runTopologyManagerNegativeTest(ctx, f, ctnAttrs, initCtnAttrs, envInfo)
1098+
1099+
if sd.resourceAmount >= 3 {
1100+
ginkgo.By(fmt.Sprintf("Trying to admit a guaranteed pod with a restartable init container demanding %d cores, 1 %s device - and it should be rejected", numCores, sd.resourceName))
1101+
initCtnAttrs = []tmCtnAttribute{
1102+
{
1103+
ctnName: "restartable-init-container",
1104+
cpuRequest: excessCoresReq,
1105+
cpuLimit: excessCoresReq,
1106+
deviceName: sd.resourceName,
1107+
deviceRequest: "1",
1108+
deviceLimit: "1",
1109+
restartPolicy: &containerRestartPolicyAlways,
1110+
},
1111+
}
1112+
ctnAttrs = []tmCtnAttribute{
1113+
{
1114+
ctnName: "gu-container",
1115+
cpuRequest: "1000m",
1116+
cpuLimit: "1000m",
1117+
deviceName: sd.resourceName,
1118+
deviceRequest: "1",
1119+
deviceLimit: "1",
1120+
},
1121+
}
1122+
runTopologyManagerNegativeTest(ctx, f, ctnAttrs, initCtnAttrs, envInfo)
1123+
1124+
ginkgo.By("Trying to admit a guaranteed pod with two restartable init containers where the second one cannot achieve NUMA alignment - and it should be rejected")
1125+
initCtnAttrs = []tmCtnAttribute{
1126+
{
1127+
ctnName: "restartable-init-container-1",
1128+
cpuRequest: "1000m",
1129+
cpuLimit: "1000m",
1130+
deviceName: sd.resourceName,
1131+
deviceRequest: "1",
1132+
deviceLimit: "1",
1133+
restartPolicy: &containerRestartPolicyAlways,
1134+
},
1135+
{
1136+
ctnName: "restartable-init-container-2",
1137+
cpuRequest: excessCoresReq,
1138+
cpuLimit: excessCoresReq,
1139+
deviceName: sd.resourceName,
1140+
deviceRequest: "1",
1141+
deviceLimit: "1",
1142+
restartPolicy: &containerRestartPolicyAlways,
1143+
},
1144+
}
1145+
ctnAttrs = []tmCtnAttribute{
1146+
{
1147+
ctnName: "gu-container",
1148+
cpuRequest: "1000m",
1149+
cpuLimit: "1000m",
1150+
deviceName: sd.resourceName,
1151+
deviceRequest: "1",
1152+
deviceLimit: "1",
1153+
},
1154+
}
1155+
runTopologyManagerNegativeTest(ctx, f, ctnAttrs, initCtnAttrs, envInfo)
1156+
}
9111157
}
9121158
}
9131159

0 commit comments

Comments
 (0)