@@ -23,6 +23,7 @@ import (
23
23
"io/fs"
24
24
"os"
25
25
"os/exec"
26
+ "path/filepath"
26
27
"regexp"
27
28
"strconv"
28
29
"strings"
@@ -40,6 +41,8 @@ import (
40
41
41
42
"github.com/onsi/ginkgo/v2"
42
43
"github.com/onsi/gomega"
44
+ "github.com/onsi/gomega/gcustom"
45
+ gomegatypes "github.com/onsi/gomega/types"
43
46
"k8s.io/kubernetes/test/e2e/feature"
44
47
"k8s.io/kubernetes/test/e2e/framework"
45
48
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
@@ -1066,6 +1069,153 @@ func runCPUManagerTests(f *framework.Framework) {
1066
1069
waitForContainerRemoval (ctx , pod .Spec .Containers [0 ].Name , pod .Name , pod .Namespace )
1067
1070
})
1068
1071
1072
+ ginkgo .It ("should assign packed CPUs with distribute-cpus-across-numa disabled and pcpu-only policy options enabled" , func (ctx context.Context ) {
1073
+ fullCPUsOnlyOpt := fmt .Sprintf ("option=%s" , cpumanager .FullPCPUsOnlyOption )
1074
+ _ , cpuAlloc , _ = getLocalNodeCPUDetails (ctx , f )
1075
+ smtLevel := getSMTLevel ()
1076
+
1077
+ // strict SMT alignment is trivially verified and granted on non-SMT systems
1078
+ if smtLevel < 2 {
1079
+ e2eskipper .Skipf ("Skipping CPU Manager %s tests since SMT disabled" , fullCPUsOnlyOpt )
1080
+ }
1081
+
1082
+ // our tests want to allocate a full core, so we need at last 2*2=4 virtual cpus
1083
+ if cpuAlloc < int64 (smtLevel * 2 ) {
1084
+ e2eskipper .Skipf ("Skipping CPU Manager %s tests since the CPU capacity < 4" , fullCPUsOnlyOpt )
1085
+ }
1086
+
1087
+ framework .Logf ("SMT level %d" , smtLevel )
1088
+
1089
+ cpuPolicyOptions := map [string ]string {
1090
+ cpumanager .FullPCPUsOnlyOption : "true" ,
1091
+ cpumanager .DistributeCPUsAcrossNUMAOption : "false" ,
1092
+ }
1093
+ newCfg := configureCPUManagerInKubelet (oldCfg ,
1094
+ & cpuManagerKubeletArguments {
1095
+ policyName : string (cpumanager .PolicyStatic ),
1096
+ reservedSystemCPUs : cpuset .New (0 ),
1097
+ enableCPUManagerOptions : true ,
1098
+ options : cpuPolicyOptions ,
1099
+ },
1100
+ )
1101
+ updateKubeletConfig (ctx , f , newCfg , true )
1102
+
1103
+ ctnAttrs := []ctnAttribute {
1104
+ {
1105
+ ctnName : "test-gu-container-distribute-cpus-across-numa-disabled" ,
1106
+ cpuRequest : "2000m" ,
1107
+ cpuLimit : "2000m" ,
1108
+ },
1109
+ }
1110
+ pod := makeCPUManagerPod ("test-pod-distribute-cpus-across-numa-disabled" , ctnAttrs )
1111
+ pod = e2epod .NewPodClient (f ).CreateSync (ctx , pod )
1112
+
1113
+ for _ , cnt := range pod .Spec .Containers {
1114
+ ginkgo .By (fmt .Sprintf ("validating the container %s on Gu pod %s" , cnt .Name , pod .Name ))
1115
+
1116
+ logs , err := e2epod .GetPodLogs (ctx , f .ClientSet , f .Namespace .Name , pod .Name , cnt .Name )
1117
+ framework .ExpectNoError (err , "expected log not found in container [%s] of pod [%s]" , cnt .Name , pod .Name )
1118
+
1119
+ framework .Logf ("got pod logs: %v" , logs )
1120
+ cpus , err := cpuset .Parse (strings .TrimSpace (logs ))
1121
+ framework .ExpectNoError (err , "parsing cpuset from logs for [%s] of pod [%s]" , cnt .Name , pod .Name )
1122
+
1123
+ validateSMTAlignment (cpus , smtLevel , pod , & cnt )
1124
+ gomega .Expect (cpus ).To (BePackedCPUs ())
1125
+ }
1126
+ deletePodSyncByName (ctx , f , pod .Name )
1127
+ // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state.
1128
+ // this is in turn needed because we will have an unavoidable (in the current framework) race with th
1129
+ // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire
1130
+ waitForAllContainerRemoval (ctx , pod .Name , pod .Namespace )
1131
+ })
1132
+
1133
+ ginkgo .It ("should assign CPUs distributed across NUMA with distribute-cpus-across-numa and pcpu-only policy options enabled" , func (ctx context.Context ) {
1134
+ var cpusNumPerNUMA , coresNumPerNUMA , numaNodeNum , threadsPerCore int
1135
+
1136
+ fullCPUsOnlyOpt := fmt .Sprintf ("option=%s" , cpumanager .FullPCPUsOnlyOption )
1137
+ _ , cpuAlloc , _ = getLocalNodeCPUDetails (ctx , f )
1138
+ smtLevel := getSMTLevel ()
1139
+ framework .Logf ("SMT level %d" , smtLevel )
1140
+
1141
+ // strict SMT alignment is trivially verified and granted on non-SMT systems
1142
+ if smtLevel < 2 {
1143
+ e2eskipper .Skipf ("Skipping CPU Manager %s tests since SMT disabled" , fullCPUsOnlyOpt )
1144
+ }
1145
+
1146
+ // our tests want to allocate a full core, so we need at last 2*2=4 virtual cpus
1147
+ if cpuAlloc < int64 (smtLevel * 2 ) {
1148
+ e2eskipper .Skipf ("Skipping CPU Manager %s tests since the CPU capacity < 4" , fullCPUsOnlyOpt )
1149
+ }
1150
+
1151
+ // this test is intended to be run on a multi-node NUMA system and
1152
+ // a system with at least 4 cores per socket, hostcheck skips test
1153
+ // if above requirements are not satisfied
1154
+ numaNodeNum , coresNumPerNUMA , threadsPerCore = hostCheck ()
1155
+ cpusNumPerNUMA = coresNumPerNUMA * threadsPerCore
1156
+
1157
+ framework .Logf ("numaNodes on the system %d" , numaNodeNum )
1158
+ framework .Logf ("Cores per NUMA on the system %d" , coresNumPerNUMA )
1159
+ framework .Logf ("Threads per Core on the system %d" , threadsPerCore )
1160
+ framework .Logf ("CPUs per NUMA on the system %d" , cpusNumPerNUMA )
1161
+
1162
+ cpuPolicyOptions := map [string ]string {
1163
+ cpumanager .FullPCPUsOnlyOption : "true" ,
1164
+ cpumanager .DistributeCPUsAcrossNUMAOption : "true" ,
1165
+ }
1166
+ newCfg := configureCPUManagerInKubelet (oldCfg ,
1167
+ & cpuManagerKubeletArguments {
1168
+ policyName : string (cpumanager .PolicyStatic ),
1169
+ reservedSystemCPUs : cpuset .New (0 ),
1170
+ enableCPUManagerOptions : true ,
1171
+ options : cpuPolicyOptions ,
1172
+ },
1173
+ )
1174
+ updateKubeletConfig (ctx , f , newCfg , true )
1175
+ // 'distribute-cpus-across-numa' policy option ensures that CPU allocations are evenly distributed
1176
+ // across NUMA nodes in cases where more than one NUMA node is required to satisfy the allocation.
1177
+ // So, we want to ensure that the CPU Request exceeds the number of CPUs that can fit within a single
1178
+ // NUMA node. We have to pick cpuRequest such that:
1179
+ // 1. CPURequest > cpusNumPerNUMA
1180
+ // 2. Not occupy all the CPUs on the node ande leave room for reserved CPU
1181
+ // 3. CPURequest is a multiple if number of NUMA nodes to allow equal CPU distribution across NUMA nodes
1182
+ //
1183
+ // In summary: cpusNumPerNUMA < CPURequest < ((cpusNumPerNuma * numaNodeNum) - reservedCPUscount)
1184
+ // Considering all these constraints we select: CPURequest= (cpusNumPerNUMA-smtLevel)*numaNodeNum
1185
+
1186
+ cpuReq := (cpusNumPerNUMA - smtLevel ) * numaNodeNum
1187
+ ctnAttrs := []ctnAttribute {
1188
+ {
1189
+ ctnName : "test-gu-container-distribute-cpus-across-numa" ,
1190
+ cpuRequest : fmt .Sprintf ("%d" , cpuReq ),
1191
+ cpuLimit : fmt .Sprintf ("%d" , cpuReq ),
1192
+ },
1193
+ }
1194
+ pod := makeCPUManagerPod ("test-pod-distribute-cpus-across-numa" , ctnAttrs )
1195
+ pod = e2epod .NewPodClient (f ).CreateSync (ctx , pod )
1196
+
1197
+ for _ , cnt := range pod .Spec .Containers {
1198
+ ginkgo .By (fmt .Sprintf ("validating the container %s on Gu pod %s" , cnt .Name , pod .Name ))
1199
+
1200
+ logs , err := e2epod .GetPodLogs (ctx , f .ClientSet , f .Namespace .Name , pod .Name , cnt .Name )
1201
+ framework .ExpectNoError (err , "expected log not found in container [%s] of pod [%s]" , cnt .Name , pod .Name )
1202
+
1203
+ framework .Logf ("got pod logs: %v" , logs )
1204
+ cpus , err := cpuset .Parse (strings .TrimSpace (logs ))
1205
+ framework .ExpectNoError (err , "parsing cpuset from logs for [%s] of pod [%s]" , cnt .Name , pod .Name )
1206
+
1207
+ validateSMTAlignment (cpus , smtLevel , pod , & cnt )
1208
+ // We expect a perfectly even spilit i.e. equal distribution across NUMA Node as the CPU Request is 4*smtLevel*numaNodeNum.
1209
+ expectedSpread := cpus .Size () / numaNodeNum
1210
+ gomega .Expect (cpus ).To (BeDistributedCPUs (expectedSpread ))
1211
+ }
1212
+ deletePodSyncByName (ctx , f , pod .Name )
1213
+ // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state.
1214
+ // this is in turn needed because we will have an unavoidable (in the current framework) race with th
1215
+ // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire
1216
+ waitForAllContainerRemoval (ctx , pod .Name , pod .Namespace )
1217
+ })
1218
+
1069
1219
ginkgo .AfterEach (func (ctx context.Context ) {
1070
1220
updateKubeletConfig (ctx , f , oldCfg , true )
1071
1221
})
@@ -1172,6 +1322,78 @@ func isSMTAlignmentError(pod *v1.Pod) bool {
1172
1322
return re .MatchString (pod .Status .Reason )
1173
1323
}
1174
1324
1325
+ // getNumaNodeCPUs retrieves CPUs for each NUMA node.
1326
+ func getNumaNodeCPUs () (map [int ]cpuset.CPUSet , error ) {
1327
+ numaNodes := make (map [int ]cpuset.CPUSet )
1328
+ nodePaths , err := filepath .Glob ("/sys/devices/system/node/node*/cpulist" )
1329
+ if err != nil {
1330
+ return nil , err
1331
+ }
1332
+
1333
+ for _ , nodePath := range nodePaths {
1334
+ data , err := os .ReadFile (nodePath )
1335
+ framework .ExpectNoError (err , "Error obtaning CPU information from the node" )
1336
+ cpuSet := strings .TrimSpace (string (data ))
1337
+ cpus , err := cpuset .Parse (cpuSet )
1338
+ framework .ExpectNoError (err , "Error parsing CPUset" )
1339
+
1340
+ // Extract node ID from path (e.g., "node0" -> 0)
1341
+ base := filepath .Base (filepath .Dir (nodePath ))
1342
+ nodeID , err := strconv .Atoi (strings .TrimPrefix (base , "node" ))
1343
+ if err != nil {
1344
+ continue
1345
+ }
1346
+ numaNodes [nodeID ] = cpus
1347
+ }
1348
+
1349
+ return numaNodes , nil
1350
+ }
1351
+
1352
+ // computeNUMADistribution calculates CPU distribution per NUMA node.
1353
+ func computeNUMADistribution (allocatedCPUs cpuset.CPUSet ) map [int ]int {
1354
+ numaCPUs , err := getNumaNodeCPUs ()
1355
+ framework .ExpectNoError (err , "Error retrieving NUMA nodes" )
1356
+ framework .Logf ("NUMA Node CPUs allocation: %v" , numaCPUs )
1357
+
1358
+ distribution := make (map [int ]int )
1359
+ for node , cpus := range numaCPUs {
1360
+ distribution [node ] = cpus .Intersection (allocatedCPUs ).Size ()
1361
+ }
1362
+
1363
+ framework .Logf ("allocated CPUs %s distribution: %v" , allocatedCPUs .String (), distribution )
1364
+ return distribution
1365
+ }
1366
+
1367
+ // Custom matcher for checking packed CPUs.
1368
+ func BePackedCPUs () gomegatypes.GomegaMatcher {
1369
+ return gcustom .MakeMatcher (func (allocatedCPUs cpuset.CPUSet ) (bool , error ) {
1370
+ distribution := computeNUMADistribution (allocatedCPUs )
1371
+ for _ , count := range distribution {
1372
+ // This assumption holds true if there are enough CPUs on a single NUMA node.
1373
+ // We are intentionally limiting the CPU request to 2 to minimize the number
1374
+ // of CPUs required to fulfill this case and therefore maximize the chances
1375
+ // of correctly validating this case.
1376
+ if count == allocatedCPUs .Size () {
1377
+ return true , nil
1378
+ }
1379
+ }
1380
+ return false , nil
1381
+ }).WithMessage ("expected CPUs to be packed" )
1382
+ }
1383
+
1384
+ // Custom matcher for checking distributed CPUs.
1385
+ func BeDistributedCPUs (expectedSpread int ) gomegatypes.GomegaMatcher {
1386
+ return gcustom .MakeMatcher (func (allocatedCPUs cpuset.CPUSet ) (bool , error ) {
1387
+ distribution := computeNUMADistribution (allocatedCPUs )
1388
+ for _ , count := range distribution {
1389
+ if count != expectedSpread {
1390
+ return false , nil
1391
+ }
1392
+ }
1393
+ return true , nil
1394
+ }).WithTemplate ("expected CPUs to be evenly distributed across NUMA nodes\n Expected: {{.Data}}\n Got:\n {{.FormattedActual}}\n Distribution: {{.Data}}\n " ).WithTemplateData (expectedSpread )
1395
+ }
1396
+
1175
1397
// Serial because the test updates kubelet configuration.
1176
1398
var _ = SIGDescribe ("CPU Manager" , framework .WithSerial (), feature .CPUManager , func () {
1177
1399
f := framework .NewDefaultFramework ("cpu-manager-test" )
0 commit comments