@@ -69,6 +69,7 @@ type tmCtnAttribute struct {
69
69
deviceName string
70
70
deviceRequest string
71
71
deviceLimit string
72
+ restartPolicy * v1.ContainerRestartPolicy
72
73
}
73
74
74
75
func detectNUMANodes () int {
@@ -158,7 +159,8 @@ func makeContainers(ctnCmd string, ctnAttributes []tmCtnAttribute) (ctns []v1.Co
158
159
v1 .ResourceName (v1 .ResourceMemory ): resource .MustParse ("100Mi" ),
159
160
},
160
161
},
161
- Command : []string {"sh" , "-c" , ctnCmd },
162
+ Command : []string {"sh" , "-c" , ctnCmd },
163
+ RestartPolicy : ctnAttr .restartPolicy ,
162
164
}
163
165
if ctnAttr .deviceName != "" {
164
166
ctn .Resources .Requests [v1 .ResourceName (ctnAttr .deviceName )] = resource .MustParse (ctnAttr .deviceRequest )
@@ -171,8 +173,12 @@ func makeContainers(ctnCmd string, ctnAttributes []tmCtnAttribute) (ctns []v1.Co
171
173
172
174
func makeTopologyManagerTestPod (podName string , tmCtnAttributes , tmInitCtnAttributes []tmCtnAttribute ) * v1.Pod {
173
175
var containers , initContainers []v1.Container
174
- if len (tmInitCtnAttributes ) > 0 {
175
- initContainers = makeContainers (numaAlignmentCommand , tmInitCtnAttributes )
176
+ for _ , attr := range tmInitCtnAttributes {
177
+ cmd := numaAlignmentCommand
178
+ if attr .restartPolicy != nil && * attr .restartPolicy == v1 .ContainerRestartPolicyAlways {
179
+ cmd = numaAlignmentSleepCommand
180
+ }
181
+ initContainers = append (initContainers , makeContainers (cmd , []tmCtnAttribute {attr })... )
176
182
}
177
183
containers = makeContainers (numaAlignmentSleepCommand , tmCtnAttributes )
178
184
@@ -346,6 +352,25 @@ func findSRIOVResource(node *v1.Node) (string, int64) {
346
352
}
347
353
348
354
func validatePodAlignment (ctx context.Context , f * framework.Framework , pod * v1.Pod , envInfo * testEnvInfo ) {
355
+ for _ , cnt := range pod .Spec .InitContainers {
356
+ // only check restartable init containers, skip regular init containers
357
+ if cnt .RestartPolicy == nil || * cnt .RestartPolicy != v1 .ContainerRestartPolicyAlways {
358
+ continue
359
+ }
360
+
361
+ ginkgo .By (fmt .Sprintf ("validating the init container %s on Gu pod %s" , cnt .Name , pod .Name ))
362
+
363
+ logs , err := e2epod .GetPodLogs (ctx , f .ClientSet , f .Namespace .Name , pod .Name , cnt .Name )
364
+ framework .ExpectNoError (err , "expected log not found in init container [%s] of pod [%s]" , cnt .Name , pod .Name )
365
+
366
+ framework .Logf ("got init container logs: %v" , logs )
367
+ numaRes , err := checkNUMAAlignment (f , pod , & cnt , logs , envInfo )
368
+ framework .ExpectNoError (err , "NUMA Alignment check failed for init container [%s] of pod [%s]" , cnt .Name , pod .Name )
369
+ if numaRes != nil {
370
+ framework .Logf ("NUMA resources for init container %s/%s: %s" , pod .Name , cnt .Name , numaRes .String ())
371
+ }
372
+ }
373
+
349
374
for _ , cnt := range pod .Spec .Containers {
350
375
ginkgo .By (fmt .Sprintf ("validating the container %s on Gu pod %s" , cnt .Name , pod .Name ))
351
376
@@ -367,6 +392,23 @@ func validatePodAlignmentWithPodScope(ctx context.Context, f *framework.Framewor
367
392
podsNUMA := make (map [int ]int )
368
393
369
394
ginkgo .By (fmt .Sprintf ("validate pod scope alignment for %s pod" , pod .Name ))
395
+ for _ , cnt := range pod .Spec .InitContainers {
396
+ // only check restartable init containers, skip regular init containers
397
+ if cnt .RestartPolicy == nil || * cnt .RestartPolicy != v1 .ContainerRestartPolicyAlways {
398
+ continue
399
+ }
400
+
401
+ logs , err := e2epod .GetPodLogs (ctx , f .ClientSet , f .Namespace .Name , pod .Name , cnt .Name )
402
+ framework .ExpectNoError (err , "NUMA alignment failed for init container [%s] of pod [%s]" , cnt .Name , pod .Name )
403
+ envMap , err := makeEnvMap (logs )
404
+ framework .ExpectNoError (err , "NUMA alignment failed for init container [%s] of pod [%s]" , cnt .Name , pod .Name )
405
+ cpuToNUMA , err := getCPUToNUMANodeMapFromEnv (f , pod , & cnt , envMap , envInfo .numaNodes )
406
+ framework .ExpectNoError (err , "NUMA alignment failed for init container [%s] of pod [%s]" , cnt .Name , pod .Name )
407
+ for cpuID , numaID := range cpuToNUMA {
408
+ podsNUMA [cpuID ] = numaID
409
+ }
410
+ }
411
+
370
412
for _ , cnt := range pod .Spec .Containers {
371
413
logs , err := e2epod .GetPodLogs (ctx , f .ClientSet , f .Namespace .Name , pod .Name , cnt .Name )
372
414
framework .ExpectNoError (err , "NUMA alignment failed for container [%s] of pod [%s]" , cnt .Name , pod .Name )
@@ -440,7 +482,7 @@ func runTopologyManagerPositiveTest(ctx context.Context, f *framework.Framework,
440
482
}
441
483
442
484
// per https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/693-topology-manager/README.md#multi-numa-systems-tests
443
- // we can do a menaingful validation only when using the single-numa node policy
485
+ // we can do a meaningful validation only when using the single-numa node policy
444
486
if envInfo .policy == topologymanager .PolicySingleNumaNode {
445
487
for _ , pod := range podMap {
446
488
validatePodAlignment (ctx , f , pod , envInfo )
@@ -733,6 +775,94 @@ func runTMScopeResourceAlignmentTestSuite(ctx context.Context, f *framework.Fram
733
775
}
734
776
runTopologyManagerPositiveTest (ctx , f , 2 , ctnAttrs , initCtnAttrs , envInfo )
735
777
778
+ ginkgo .By (fmt .Sprintf ("Admit one guaranteed pod with restartable init container, 1 core and 1 %s device" , sd .resourceName ))
779
+ initCtnAttrs = []tmCtnAttribute {
780
+ {
781
+ ctnName : "restartable-init-container" ,
782
+ cpuRequest : "1000m" ,
783
+ cpuLimit : "1000m" ,
784
+ deviceName : sd .resourceName ,
785
+ deviceRequest : "1" ,
786
+ deviceLimit : "1" ,
787
+ restartPolicy : & containerRestartPolicyAlways ,
788
+ },
789
+ }
790
+ ctnAttrs = []tmCtnAttribute {
791
+ {
792
+ ctnName : "gu-container" ,
793
+ cpuRequest : "1000m" ,
794
+ cpuLimit : "1000m" ,
795
+ deviceName : sd .resourceName ,
796
+ deviceRequest : "1" ,
797
+ deviceLimit : "1" ,
798
+ },
799
+ }
800
+ runTopologyManagerPositiveTest (ctx , f , 1 , ctnAttrs , initCtnAttrs , envInfo )
801
+
802
+ ginkgo .By (fmt .Sprintf ("Admit one guaranteed pod with multiple restartable init containers, each container with 1 CPU core. Use 1 %s device" , sd .resourceName ))
803
+ initCtnAttrs = []tmCtnAttribute {
804
+ {
805
+ ctnName : "restartable-init-container-1" ,
806
+ cpuRequest : "1000m" ,
807
+ cpuLimit : "1000m" ,
808
+ deviceName : sd .resourceName ,
809
+ deviceRequest : "1" ,
810
+ deviceLimit : "1" ,
811
+ restartPolicy : & containerRestartPolicyAlways ,
812
+ },
813
+ {
814
+ ctnName : "restartable-init-container-2" ,
815
+ cpuRequest : "1000m" ,
816
+ cpuLimit : "1000m" ,
817
+ deviceName : sd .resourceName ,
818
+ deviceRequest : "1" ,
819
+ deviceLimit : "1" ,
820
+ restartPolicy : & containerRestartPolicyAlways ,
821
+ },
822
+ }
823
+ ctnAttrs = []tmCtnAttribute {
824
+ {
825
+ ctnName : "gu-container" ,
826
+ cpuRequest : "1000m" ,
827
+ cpuLimit : "1000m" ,
828
+ deviceName : sd .resourceName ,
829
+ deviceRequest : "1" ,
830
+ deviceLimit : "1" ,
831
+ },
832
+ }
833
+ runTopologyManagerPositiveTest (ctx , f , 1 , ctnAttrs , initCtnAttrs , envInfo )
834
+
835
+ coresReq = fmt .Sprintf ("%dm" , (numCores / 2 + 1 )* 1000 )
836
+ ginkgo .By (fmt .Sprintf ("Trying to admin guaranteed pod with two restartable init containers where sum of their CPU requests (%d cores) exceeds NUMA capacity. The request should be rejected" , (numCores / 2 + 1 )* 2 ))
837
+ initCtnAttrs = []tmCtnAttribute {
838
+ {
839
+ ctnName : "restartable-init-container-1" ,
840
+ cpuRequest : coresReq ,
841
+ cpuLimit : coresReq ,
842
+ deviceRequest : "1" ,
843
+ deviceLimit : "1" ,
844
+ restartPolicy : & containerRestartPolicyAlways ,
845
+ },
846
+ {
847
+ ctnName : "restartable-init-container-2" ,
848
+ cpuRequest : coresReq ,
849
+ cpuLimit : coresReq ,
850
+ deviceRequest : "1" ,
851
+ deviceLimit : "1" ,
852
+ restartPolicy : & containerRestartPolicyAlways ,
853
+ },
854
+ }
855
+ ctnAttrs = []tmCtnAttribute {
856
+ {
857
+ ctnName : "gu-container" ,
858
+ cpuRequest : "1000m" ,
859
+ cpuLimit : "1000m" ,
860
+ deviceRequest : "1" ,
861
+ deviceLimit : "1" ,
862
+ },
863
+ }
864
+ runTopologyManagerNegativeTest (ctx , f , ctnAttrs , initCtnAttrs , envInfo )
865
+
736
866
teardownSRIOVConfigOrFail (ctx , f , sd )
737
867
}
738
868
@@ -825,6 +955,30 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
825
955
}
826
956
runTopologyManagerPositiveTest (ctx , f , 2 , ctnAttrs , initCtnAttrs , envInfo )
827
957
958
+ ginkgo .By (fmt .Sprintf ("Successfully admit one guaranteed pod with restartable init container - each with 1 core, 1 %s device" , sd .resourceName ))
959
+ initCtnAttrs = []tmCtnAttribute {
960
+ {
961
+ ctnName : "restartable-init-container" ,
962
+ cpuRequest : "1000m" ,
963
+ cpuLimit : "1000m" ,
964
+ deviceName : sd .resourceName ,
965
+ deviceRequest : "1" ,
966
+ deviceLimit : "1" ,
967
+ restartPolicy : & containerRestartPolicyAlways ,
968
+ },
969
+ }
970
+ ctnAttrs = []tmCtnAttribute {
971
+ {
972
+ ctnName : "gu-container" ,
973
+ cpuRequest : "1000m" ,
974
+ cpuLimit : "1000m" ,
975
+ deviceName : sd .resourceName ,
976
+ deviceRequest : "1" ,
977
+ deviceLimit : "1" ,
978
+ },
979
+ }
980
+ runTopologyManagerPositiveTest (ctx , f , 1 , ctnAttrs , initCtnAttrs , envInfo )
981
+
828
982
// testing more complex conditions require knowledge about the system cpu+bus topology
829
983
}
830
984
@@ -889,6 +1043,39 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
889
1043
},
890
1044
}
891
1045
runTopologyManagerPositiveTest (ctx , f , 2 , ctnAttrs , initCtnAttrs , envInfo )
1046
+
1047
+ ginkgo .By (fmt .Sprintf ("Successfully admit pod with multiple restartable init containers, each with 1 core, 1 %s device" , sd .resourceName ))
1048
+ initCtnAttrs = []tmCtnAttribute {
1049
+ {
1050
+ ctnName : "restartable-init-container-1" ,
1051
+ cpuRequest : "1000m" ,
1052
+ cpuLimit : "1000m" ,
1053
+ deviceName : sd .resourceName ,
1054
+ deviceRequest : "1" ,
1055
+ deviceLimit : "1" ,
1056
+ restartPolicy : & containerRestartPolicyAlways ,
1057
+ },
1058
+ {
1059
+ ctnName : "restartable-init-container-2" ,
1060
+ cpuRequest : "1000m" ,
1061
+ cpuLimit : "1000m" ,
1062
+ deviceName : sd .resourceName ,
1063
+ deviceRequest : "1" ,
1064
+ deviceLimit : "1" ,
1065
+ restartPolicy : & containerRestartPolicyAlways ,
1066
+ },
1067
+ }
1068
+ ctnAttrs = []tmCtnAttribute {
1069
+ {
1070
+ ctnName : "gu-container" ,
1071
+ cpuRequest : "1000m" ,
1072
+ cpuLimit : "1000m" ,
1073
+ deviceName : sd .resourceName ,
1074
+ deviceRequest : "1" ,
1075
+ deviceLimit : "1" ,
1076
+ },
1077
+ }
1078
+ runTopologyManagerPositiveTest (ctx , f , 1 , ctnAttrs , initCtnAttrs , envInfo )
892
1079
}
893
1080
894
1081
// this is the only policy that can guarantee reliable rejects
@@ -908,6 +1095,65 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
908
1095
},
909
1096
}
910
1097
runTopologyManagerNegativeTest (ctx , f , ctnAttrs , initCtnAttrs , envInfo )
1098
+
1099
+ if sd .resourceAmount >= 3 {
1100
+ ginkgo .By (fmt .Sprintf ("Trying to admit a guaranteed pod with a restartable init container demanding %d cores, 1 %s device - and it should be rejected" , numCores , sd .resourceName ))
1101
+ initCtnAttrs = []tmCtnAttribute {
1102
+ {
1103
+ ctnName : "restartable-init-container" ,
1104
+ cpuRequest : excessCoresReq ,
1105
+ cpuLimit : excessCoresReq ,
1106
+ deviceName : sd .resourceName ,
1107
+ deviceRequest : "1" ,
1108
+ deviceLimit : "1" ,
1109
+ restartPolicy : & containerRestartPolicyAlways ,
1110
+ },
1111
+ }
1112
+ ctnAttrs = []tmCtnAttribute {
1113
+ {
1114
+ ctnName : "gu-container" ,
1115
+ cpuRequest : "1000m" ,
1116
+ cpuLimit : "1000m" ,
1117
+ deviceName : sd .resourceName ,
1118
+ deviceRequest : "1" ,
1119
+ deviceLimit : "1" ,
1120
+ },
1121
+ }
1122
+ runTopologyManagerNegativeTest (ctx , f , ctnAttrs , initCtnAttrs , envInfo )
1123
+
1124
+ ginkgo .By ("Trying to admit a guaranteed pod with two restartable init containers where the second one cannot achieve NUMA alignment - and it should be rejected" )
1125
+ initCtnAttrs = []tmCtnAttribute {
1126
+ {
1127
+ ctnName : "restartable-init-container-1" ,
1128
+ cpuRequest : "1000m" ,
1129
+ cpuLimit : "1000m" ,
1130
+ deviceName : sd .resourceName ,
1131
+ deviceRequest : "1" ,
1132
+ deviceLimit : "1" ,
1133
+ restartPolicy : & containerRestartPolicyAlways ,
1134
+ },
1135
+ {
1136
+ ctnName : "restartable-init-container-2" ,
1137
+ cpuRequest : excessCoresReq ,
1138
+ cpuLimit : excessCoresReq ,
1139
+ deviceName : sd .resourceName ,
1140
+ deviceRequest : "1" ,
1141
+ deviceLimit : "1" ,
1142
+ restartPolicy : & containerRestartPolicyAlways ,
1143
+ },
1144
+ }
1145
+ ctnAttrs = []tmCtnAttribute {
1146
+ {
1147
+ ctnName : "gu-container" ,
1148
+ cpuRequest : "1000m" ,
1149
+ cpuLimit : "1000m" ,
1150
+ deviceName : sd .resourceName ,
1151
+ deviceRequest : "1" ,
1152
+ deviceLimit : "1" ,
1153
+ },
1154
+ }
1155
+ runTopologyManagerNegativeTest (ctx , f , ctnAttrs , initCtnAttrs , envInfo )
1156
+ }
911
1157
}
912
1158
}
913
1159
0 commit comments