@@ -69,6 +69,7 @@ type tmCtnAttribute struct {
69
69
deviceName string
70
70
deviceRequest string
71
71
deviceLimit string
72
+ restartPolicy * v1.ContainerRestartPolicy
72
73
}
73
74
74
75
func detectNUMANodes () int {
@@ -158,7 +159,8 @@ func makeContainers(ctnCmd string, ctnAttributes []tmCtnAttribute) (ctns []v1.Co
158
159
v1 .ResourceName (v1 .ResourceMemory ): resource .MustParse ("100Mi" ),
159
160
},
160
161
},
161
- Command : []string {"sh" , "-c" , ctnCmd },
162
+ Command : []string {"sh" , "-c" , ctnCmd },
163
+ RestartPolicy : ctnAttr .restartPolicy ,
162
164
}
163
165
if ctnAttr .deviceName != "" {
164
166
ctn .Resources .Requests [v1 .ResourceName (ctnAttr .deviceName )] = resource .MustParse (ctnAttr .deviceRequest )
@@ -171,8 +173,12 @@ func makeContainers(ctnCmd string, ctnAttributes []tmCtnAttribute) (ctns []v1.Co
171
173
172
174
func makeTopologyManagerTestPod (podName string , tmCtnAttributes , tmInitCtnAttributes []tmCtnAttribute ) * v1.Pod {
173
175
var containers , initContainers []v1.Container
174
- if len (tmInitCtnAttributes ) > 0 {
175
- initContainers = makeContainers (numaAlignmentCommand , tmInitCtnAttributes )
176
+ for _ , attr := range tmInitCtnAttributes {
177
+ cmd := numaAlignmentCommand
178
+ if attr .restartPolicy != nil && * attr .restartPolicy == v1 .ContainerRestartPolicyAlways {
179
+ cmd = numaAlignmentSleepCommand
180
+ }
181
+ initContainers = append (initContainers , makeContainers (cmd , []tmCtnAttribute {attr })... )
176
182
}
177
183
containers = makeContainers (numaAlignmentSleepCommand , tmCtnAttributes )
178
184
@@ -346,6 +352,25 @@ func findSRIOVResource(node *v1.Node) (string, int64) {
346
352
}
347
353
348
354
func validatePodAlignment (ctx context.Context , f * framework.Framework , pod * v1.Pod , envInfo * testEnvInfo ) {
355
+ for _ , cnt := range pod .Spec .InitContainers {
356
+ // only check restartable init containers, skip regular init containers
357
+ if cnt .RestartPolicy == nil || * cnt .RestartPolicy != v1 .ContainerRestartPolicyAlways {
358
+ continue
359
+ }
360
+
361
+ ginkgo .By (fmt .Sprintf ("validating the init container %s on Gu pod %s" , cnt .Name , pod .Name ))
362
+
363
+ logs , err := e2epod .GetPodLogs (ctx , f .ClientSet , f .Namespace .Name , pod .Name , cnt .Name )
364
+ framework .ExpectNoError (err , "expected log not found in init container [%s] of pod [%s]" , cnt .Name , pod .Name )
365
+
366
+ framework .Logf ("got init container logs: %v" , logs )
367
+ numaRes , err := checkNUMAAlignment (f , pod , & cnt , logs , envInfo )
368
+ framework .ExpectNoError (err , "NUMA Alignment check failed for init container [%s] of pod [%s]" , cnt .Name , pod .Name )
369
+ if numaRes != nil {
370
+ framework .Logf ("NUMA resources for init container %s/%s: %s" , pod .Name , cnt .Name , numaRes .String ())
371
+ }
372
+ }
373
+
349
374
for _ , cnt := range pod .Spec .Containers {
350
375
ginkgo .By (fmt .Sprintf ("validating the container %s on Gu pod %s" , cnt .Name , pod .Name ))
351
376
@@ -367,6 +392,23 @@ func validatePodAlignmentWithPodScope(ctx context.Context, f *framework.Framewor
367
392
podsNUMA := make (map [int ]int )
368
393
369
394
ginkgo .By (fmt .Sprintf ("validate pod scope alignment for %s pod" , pod .Name ))
395
+ for _ , cnt := range pod .Spec .InitContainers {
396
+ // only check restartable init containers, skip regular init containers
397
+ if cnt .RestartPolicy == nil || * cnt .RestartPolicy != v1 .ContainerRestartPolicyAlways {
398
+ continue
399
+ }
400
+
401
+ logs , err := e2epod .GetPodLogs (ctx , f .ClientSet , f .Namespace .Name , pod .Name , cnt .Name )
402
+ framework .ExpectNoError (err , "NUMA alignment failed for init container [%s] of pod [%s]" , cnt .Name , pod .Name )
403
+ envMap , err := makeEnvMap (logs )
404
+ framework .ExpectNoError (err , "NUMA alignment failed for init container [%s] of pod [%s]" , cnt .Name , pod .Name )
405
+ cpuToNUMA , err := getCPUToNUMANodeMapFromEnv (f , pod , & cnt , envMap , envInfo .numaNodes )
406
+ framework .ExpectNoError (err , "NUMA alignment failed for init container [%s] of pod [%s]" , cnt .Name , pod .Name )
407
+ for cpuID , numaID := range cpuToNUMA {
408
+ podsNUMA [cpuID ] = numaID
409
+ }
410
+ }
411
+
370
412
for _ , cnt := range pod .Spec .Containers {
371
413
logs , err := e2epod .GetPodLogs (ctx , f .ClientSet , f .Namespace .Name , pod .Name , cnt .Name )
372
414
framework .ExpectNoError (err , "NUMA alignment failed for container [%s] of pod [%s]" , cnt .Name , pod .Name )
@@ -440,7 +482,7 @@ func runTopologyManagerPositiveTest(ctx context.Context, f *framework.Framework,
440
482
}
441
483
442
484
// per https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/693-topology-manager/README.md#multi-numa-systems-tests
443
- // we can do a menaingful validation only when using the single-numa node policy
485
+ // we can do a meaningful validation only when using the single-numa node policy
444
486
if envInfo .policy == topologymanager .PolicySingleNumaNode {
445
487
for _ , pod := range podMap {
446
488
validatePodAlignment (ctx , f , pod , envInfo )
@@ -728,6 +770,94 @@ func runTMScopeResourceAlignmentTestSuite(ctx context.Context, f *framework.Fram
728
770
}
729
771
runTopologyManagerPositiveTest (ctx , f , 2 , ctnAttrs , initCtnAttrs , envInfo )
730
772
773
+ ginkgo .By (fmt .Sprintf ("Admit one guaranteed pod with restartable init container, 1 core and 1 %s device" , sd .resourceName ))
774
+ initCtnAttrs = []tmCtnAttribute {
775
+ {
776
+ ctnName : "restartable-init-container" ,
777
+ cpuRequest : "1000m" ,
778
+ cpuLimit : "1000m" ,
779
+ deviceName : sd .resourceName ,
780
+ deviceRequest : "1" ,
781
+ deviceLimit : "1" ,
782
+ restartPolicy : & containerRestartPolicyAlways ,
783
+ },
784
+ }
785
+ ctnAttrs = []tmCtnAttribute {
786
+ {
787
+ ctnName : "gu-container" ,
788
+ cpuRequest : "1000m" ,
789
+ cpuLimit : "1000m" ,
790
+ deviceName : sd .resourceName ,
791
+ deviceRequest : "1" ,
792
+ deviceLimit : "1" ,
793
+ },
794
+ }
795
+ runTopologyManagerPositiveTest (ctx , f , 1 , ctnAttrs , initCtnAttrs , envInfo )
796
+
797
+ ginkgo .By (fmt .Sprintf ("Admit one guaranteed pod with multiple restartable init containers, each container with 1 CPU core. Use 1 %s device" , sd .resourceName ))
798
+ initCtnAttrs = []tmCtnAttribute {
799
+ {
800
+ ctnName : "restartable-init-container-1" ,
801
+ cpuRequest : "1000m" ,
802
+ cpuLimit : "1000m" ,
803
+ deviceName : sd .resourceName ,
804
+ deviceRequest : "1" ,
805
+ deviceLimit : "1" ,
806
+ restartPolicy : & containerRestartPolicyAlways ,
807
+ },
808
+ {
809
+ ctnName : "restartable-init-container-2" ,
810
+ cpuRequest : "1000m" ,
811
+ cpuLimit : "1000m" ,
812
+ deviceName : sd .resourceName ,
813
+ deviceRequest : "1" ,
814
+ deviceLimit : "1" ,
815
+ restartPolicy : & containerRestartPolicyAlways ,
816
+ },
817
+ }
818
+ ctnAttrs = []tmCtnAttribute {
819
+ {
820
+ ctnName : "gu-container" ,
821
+ cpuRequest : "1000m" ,
822
+ cpuLimit : "1000m" ,
823
+ deviceName : sd .resourceName ,
824
+ deviceRequest : "1" ,
825
+ deviceLimit : "1" ,
826
+ },
827
+ }
828
+ runTopologyManagerPositiveTest (ctx , f , 1 , ctnAttrs , initCtnAttrs , envInfo )
829
+
830
+ coresReq = fmt .Sprintf ("%dm" , (numCores / 2 + 1 )* 1000 )
831
+ ginkgo .By (fmt .Sprintf ("Trying to admin guaranteed pod with two restartable init containers where sum of their CPU requests (%d cores) exceeds NUMA capacity. The request should be rejected" , (numCores / 2 + 1 )* 2 ))
832
+ initCtnAttrs = []tmCtnAttribute {
833
+ {
834
+ ctnName : "restartable-init-container-1" ,
835
+ cpuRequest : coresReq ,
836
+ cpuLimit : coresReq ,
837
+ deviceRequest : "1" ,
838
+ deviceLimit : "1" ,
839
+ restartPolicy : & containerRestartPolicyAlways ,
840
+ },
841
+ {
842
+ ctnName : "restartable-init-container-2" ,
843
+ cpuRequest : coresReq ,
844
+ cpuLimit : coresReq ,
845
+ deviceRequest : "1" ,
846
+ deviceLimit : "1" ,
847
+ restartPolicy : & containerRestartPolicyAlways ,
848
+ },
849
+ }
850
+ ctnAttrs = []tmCtnAttribute {
851
+ {
852
+ ctnName : "gu-container" ,
853
+ cpuRequest : "1000m" ,
854
+ cpuLimit : "1000m" ,
855
+ deviceRequest : "1" ,
856
+ deviceLimit : "1" ,
857
+ },
858
+ }
859
+ runTopologyManagerNegativeTest (ctx , f , ctnAttrs , initCtnAttrs , envInfo )
860
+
731
861
teardownSRIOVConfigOrFail (ctx , f , sd )
732
862
}
733
863
@@ -820,6 +950,30 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
820
950
}
821
951
runTopologyManagerPositiveTest (ctx , f , 2 , ctnAttrs , initCtnAttrs , envInfo )
822
952
953
+ ginkgo .By (fmt .Sprintf ("Successfully admit one guaranteed pod with restartable init container - each with 1 core, 1 %s device" , sd .resourceName ))
954
+ initCtnAttrs = []tmCtnAttribute {
955
+ {
956
+ ctnName : "restartable-init-container" ,
957
+ cpuRequest : "1000m" ,
958
+ cpuLimit : "1000m" ,
959
+ deviceName : sd .resourceName ,
960
+ deviceRequest : "1" ,
961
+ deviceLimit : "1" ,
962
+ restartPolicy : & containerRestartPolicyAlways ,
963
+ },
964
+ }
965
+ ctnAttrs = []tmCtnAttribute {
966
+ {
967
+ ctnName : "gu-container" ,
968
+ cpuRequest : "1000m" ,
969
+ cpuLimit : "1000m" ,
970
+ deviceName : sd .resourceName ,
971
+ deviceRequest : "1" ,
972
+ deviceLimit : "1" ,
973
+ },
974
+ }
975
+ runTopologyManagerPositiveTest (ctx , f , 1 , ctnAttrs , initCtnAttrs , envInfo )
976
+
823
977
// testing more complex conditions require knowledge about the system cpu+bus topology
824
978
}
825
979
@@ -884,6 +1038,39 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
884
1038
},
885
1039
}
886
1040
runTopologyManagerPositiveTest (ctx , f , 2 , ctnAttrs , initCtnAttrs , envInfo )
1041
+
1042
+ ginkgo .By (fmt .Sprintf ("Successfully admit pod with multiple restartable init containers, each with 1 core, 1 %s device" , sd .resourceName ))
1043
+ initCtnAttrs = []tmCtnAttribute {
1044
+ {
1045
+ ctnName : "restartable-init-container-1" ,
1046
+ cpuRequest : "1000m" ,
1047
+ cpuLimit : "1000m" ,
1048
+ deviceName : sd .resourceName ,
1049
+ deviceRequest : "1" ,
1050
+ deviceLimit : "1" ,
1051
+ restartPolicy : & containerRestartPolicyAlways ,
1052
+ },
1053
+ {
1054
+ ctnName : "restartable-init-container-2" ,
1055
+ cpuRequest : "1000m" ,
1056
+ cpuLimit : "1000m" ,
1057
+ deviceName : sd .resourceName ,
1058
+ deviceRequest : "1" ,
1059
+ deviceLimit : "1" ,
1060
+ restartPolicy : & containerRestartPolicyAlways ,
1061
+ },
1062
+ }
1063
+ ctnAttrs = []tmCtnAttribute {
1064
+ {
1065
+ ctnName : "gu-container" ,
1066
+ cpuRequest : "1000m" ,
1067
+ cpuLimit : "1000m" ,
1068
+ deviceName : sd .resourceName ,
1069
+ deviceRequest : "1" ,
1070
+ deviceLimit : "1" ,
1071
+ },
1072
+ }
1073
+ runTopologyManagerPositiveTest (ctx , f , 1 , ctnAttrs , initCtnAttrs , envInfo )
887
1074
}
888
1075
889
1076
// this is the only policy that can guarantee reliable rejects
@@ -903,6 +1090,65 @@ func runTopologyManagerNodeAlignmentSuiteTests(ctx context.Context, f *framework
903
1090
},
904
1091
}
905
1092
runTopologyManagerNegativeTest (ctx , f , ctnAttrs , initCtnAttrs , envInfo )
1093
+
1094
+ if sd .resourceAmount >= 3 {
1095
+ ginkgo .By (fmt .Sprintf ("Trying to admit a guaranteed pod with a restartable init container demanding %d cores, 1 %s device - and it should be rejected" , numCores , sd .resourceName ))
1096
+ initCtnAttrs = []tmCtnAttribute {
1097
+ {
1098
+ ctnName : "restartable-init-container" ,
1099
+ cpuRequest : excessCoresReq ,
1100
+ cpuLimit : excessCoresReq ,
1101
+ deviceName : sd .resourceName ,
1102
+ deviceRequest : "1" ,
1103
+ deviceLimit : "1" ,
1104
+ restartPolicy : & containerRestartPolicyAlways ,
1105
+ },
1106
+ }
1107
+ ctnAttrs = []tmCtnAttribute {
1108
+ {
1109
+ ctnName : "gu-container" ,
1110
+ cpuRequest : "1000m" ,
1111
+ cpuLimit : "1000m" ,
1112
+ deviceName : sd .resourceName ,
1113
+ deviceRequest : "1" ,
1114
+ deviceLimit : "1" ,
1115
+ },
1116
+ }
1117
+ runTopologyManagerNegativeTest (ctx , f , ctnAttrs , initCtnAttrs , envInfo )
1118
+
1119
+ ginkgo .By ("Trying to admit a guaranteed pod with two restartable init containers where the second one cannot achieve NUMA alignment - and it should be rejected" )
1120
+ initCtnAttrs = []tmCtnAttribute {
1121
+ {
1122
+ ctnName : "restartable-init-container-1" ,
1123
+ cpuRequest : "1000m" ,
1124
+ cpuLimit : "1000m" ,
1125
+ deviceName : sd .resourceName ,
1126
+ deviceRequest : "1" ,
1127
+ deviceLimit : "1" ,
1128
+ restartPolicy : & containerRestartPolicyAlways ,
1129
+ },
1130
+ {
1131
+ ctnName : "restartable-init-container-2" ,
1132
+ cpuRequest : excessCoresReq ,
1133
+ cpuLimit : excessCoresReq ,
1134
+ deviceName : sd .resourceName ,
1135
+ deviceRequest : "1" ,
1136
+ deviceLimit : "1" ,
1137
+ restartPolicy : & containerRestartPolicyAlways ,
1138
+ },
1139
+ }
1140
+ ctnAttrs = []tmCtnAttribute {
1141
+ {
1142
+ ctnName : "gu-container" ,
1143
+ cpuRequest : "1000m" ,
1144
+ cpuLimit : "1000m" ,
1145
+ deviceName : sd .resourceName ,
1146
+ deviceRequest : "1" ,
1147
+ deviceLimit : "1" ,
1148
+ },
1149
+ }
1150
+ runTopologyManagerNegativeTest (ctx , f , ctnAttrs , initCtnAttrs , envInfo )
1151
+ }
906
1152
}
907
1153
}
908
1154
0 commit comments