@@ -54,6 +54,8 @@ const (
54
54
Cgroupv2CPURequest string = "/sys/fs/cgroup/cpu.weight"
55
55
CPUPeriod string = "100000"
56
56
MinContainerRuntimeVersion string = "1.6.9"
57
+
58
+ fakeExtendedResource = "dummy.com/dummy"
57
59
)
58
60
59
61
var (
@@ -64,18 +66,21 @@ var (
64
66
)
65
67
66
68
type ContainerResources struct {
67
- CPUReq string
68
- CPULim string
69
- MemReq string
70
- MemLim string
71
- EphStorReq string
72
- EphStorLim string
69
+ CPUReq string
70
+ CPULim string
71
+ MemReq string
72
+ MemLim string
73
+ EphStorReq string
74
+ EphStorLim string
75
+ ExtendedResourceReq string
76
+ ExtendedResourceLim string
73
77
}
74
78
75
79
type ContainerAllocations struct {
76
- CPUAlloc string
77
- MemAlloc string
78
- ephStorAlloc string
80
+ CPUAlloc string
81
+ MemAlloc string
82
+ ephStorAlloc string
83
+ ExtendedResourceAlloc string
79
84
}
80
85
81
86
type TestContainerInfo struct {
@@ -87,6 +92,28 @@ type TestContainerInfo struct {
87
92
RestartCount int32
88
93
}
89
94
95
+ type containerPatch struct {
96
+ Name string `json:"name"`
97
+ Resources struct {
98
+ Requests struct {
99
+ CPU string `json:"cpu,omitempty"`
100
+ Memory string `json:"memory,omitempty"`
101
+ EphStor string `json:"ephemeral-storage,omitempty"`
102
+ } `json:"requests"`
103
+ Limits struct {
104
+ CPU string `json:"cpu,omitempty"`
105
+ Memory string `json:"memory,omitempty"`
106
+ EphStor string `json:"ephemeral-storage,omitempty"`
107
+ } `json:"limits"`
108
+ } `json:"resources"`
109
+ }
110
+
111
+ type patchSpec struct {
112
+ Spec struct {
113
+ Containers []containerPatch `json:"containers"`
114
+ } `json:"spec"`
115
+ }
116
+
90
117
func supportsInPlacePodVerticalScaling (ctx context.Context , f * framework.Framework ) bool {
91
118
node := getLocalNode (ctx , f )
92
119
re := regexp .MustCompile ("containerd://(.*)" )
@@ -418,6 +445,100 @@ func waitForPodResizeActuation(ctx context.Context, f *framework.Framework, c cl
418
445
return resizedPod
419
446
}
420
447
448
+ func genPatchString (containers []TestContainerInfo ) (string , error ) {
449
+ var patch patchSpec
450
+
451
+ for _ , container := range containers {
452
+ var cPatch containerPatch
453
+ cPatch .Name = container .Name
454
+ cPatch .Resources .Requests .CPU = container .Resources .CPUReq
455
+ cPatch .Resources .Requests .Memory = container .Resources .MemReq
456
+ cPatch .Resources .Limits .CPU = container .Resources .CPULim
457
+ cPatch .Resources .Limits .Memory = container .Resources .MemLim
458
+
459
+ patch .Spec .Containers = append (patch .Spec .Containers , cPatch )
460
+ }
461
+
462
+ patchBytes , err := json .Marshal (patch )
463
+ if err != nil {
464
+ return "" , err
465
+ }
466
+
467
+ return string (patchBytes ), nil
468
+ }
469
+
470
+ func patchNode (ctx context.Context , client clientset.Interface , old * v1.Node , new * v1.Node ) error {
471
+ oldData , err := json .Marshal (old )
472
+ if err != nil {
473
+ return err
474
+ }
475
+
476
+ newData , err := json .Marshal (new )
477
+ if err != nil {
478
+ return err
479
+ }
480
+ patchBytes , err := strategicpatch .CreateTwoWayMergePatch (oldData , newData , & v1.Node {})
481
+ if err != nil {
482
+ return fmt .Errorf ("failed to create merge patch for node %q: %w" , old .Name , err )
483
+ }
484
+ _ , err = client .CoreV1 ().Nodes ().Patch (ctx , old .Name , types .StrategicMergePatchType , patchBytes , metav1.PatchOptions {}, "status" )
485
+ return err
486
+ }
487
+
488
+ func addExtendedResource (clientSet clientset.Interface , nodeName , extendedResourceName string , extendedResourceQuantity resource.Quantity ) {
489
+ extendedResource := v1 .ResourceName (extendedResourceName )
490
+
491
+ ginkgo .By ("Adding a custom resource" )
492
+ OriginalNode , err := clientSet .CoreV1 ().Nodes ().Get (context .Background (), nodeName , metav1.GetOptions {})
493
+ framework .ExpectNoError (err )
494
+
495
+ node := OriginalNode .DeepCopy ()
496
+ node .Status .Capacity [extendedResource ] = extendedResourceQuantity
497
+ node .Status .Allocatable [extendedResource ] = extendedResourceQuantity
498
+ err = patchNode (context .Background (), clientSet , OriginalNode .DeepCopy (), node )
499
+ framework .ExpectNoError (err )
500
+
501
+ gomega .Eventually (func () error {
502
+ node , err = clientSet .CoreV1 ().Nodes ().Get (context .Background (), node .Name , metav1.GetOptions {})
503
+ framework .ExpectNoError (err )
504
+
505
+ fakeResourceCapacity , exists := node .Status .Capacity [extendedResource ]
506
+ if ! exists {
507
+ return fmt .Errorf ("node %s has no %s resource capacity" , node .Name , extendedResourceName )
508
+ }
509
+ if expectedResource := resource .MustParse ("123" ); fakeResourceCapacity .Cmp (expectedResource ) != 0 {
510
+ return fmt .Errorf ("node %s has resource capacity %s, expected: %s" , node .Name , fakeResourceCapacity .String (), expectedResource .String ())
511
+ }
512
+
513
+ return nil
514
+ }).WithTimeout (30 * time .Second ).WithPolling (time .Second ).ShouldNot (gomega .HaveOccurred ())
515
+ }
516
+
517
+ func removeExtendedResource (clientSet clientset.Interface , nodeName , extendedResourceName string ) {
518
+ extendedResource := v1 .ResourceName (extendedResourceName )
519
+
520
+ ginkgo .By ("Removing a custom resource" )
521
+ originalNode , err := clientSet .CoreV1 ().Nodes ().Get (context .Background (), nodeName , metav1.GetOptions {})
522
+ framework .ExpectNoError (err )
523
+
524
+ node := originalNode .DeepCopy ()
525
+ delete (node .Status .Capacity , extendedResource )
526
+ delete (node .Status .Allocatable , extendedResource )
527
+ err = patchNode (context .Background (), clientSet , originalNode .DeepCopy (), node )
528
+ framework .ExpectNoError (err )
529
+
530
+ gomega .Eventually (func () error {
531
+ node , err = clientSet .CoreV1 ().Nodes ().Get (context .Background (), nodeName , metav1.GetOptions {})
532
+ framework .ExpectNoError (err )
533
+
534
+ if _ , exists := node .Status .Capacity [extendedResource ]; exists {
535
+ return fmt .Errorf ("node %s has resource capacity %s which is expected to be removed" , node .Name , extendedResourceName )
536
+ }
537
+
538
+ return nil
539
+ }).WithTimeout (30 * time .Second ).WithPolling (time .Second ).ShouldNot (gomega .HaveOccurred ())
540
+ }
541
+
421
542
func doPodResizeTests () {
422
543
f := framework .NewDefaultFramework ("pod-resize-test" )
423
544
var podClient * e2epod.PodClient
@@ -426,10 +547,11 @@ func doPodResizeTests() {
426
547
})
427
548
428
549
type testCase struct {
429
- name string
430
- containers []TestContainerInfo
431
- patchString string
432
- expected []TestContainerInfo
550
+ name string
551
+ containers []TestContainerInfo
552
+ patchString string
553
+ expected []TestContainerInfo
554
+ addExtendedResource bool
433
555
}
434
556
435
557
noRestart := v1 .NotRequired
@@ -1131,6 +1253,31 @@ func doPodResizeTests() {
1131
1253
},
1132
1254
},
1133
1255
},
1256
+ {
1257
+ name : "Guaranteed QoS pod, one container - increase CPU & memory with an extended resource" ,
1258
+ containers : []TestContainerInfo {
1259
+ {
1260
+ Name : "c1" ,
1261
+ Resources : & ContainerResources {CPUReq : "100m" , CPULim : "100m" , MemReq : "200Mi" , MemLim : "200Mi" ,
1262
+ ExtendedResourceReq : "1" , ExtendedResourceLim : "1" },
1263
+ CPUPolicy : & noRestart ,
1264
+ MemPolicy : & noRestart ,
1265
+ },
1266
+ },
1267
+ patchString : `{"spec":{"containers":[
1268
+ {"name":"c1", "resources":{"requests":{"cpu":"200m","memory":"400Mi"},"limits":{"cpu":"200m","memory":"400Mi"}}}
1269
+ ]}}` ,
1270
+ expected : []TestContainerInfo {
1271
+ {
1272
+ Name : "c1" ,
1273
+ Resources : & ContainerResources {CPUReq : "200m" , CPULim : "200m" , MemReq : "400Mi" , MemLim : "400Mi" ,
1274
+ ExtendedResourceReq : "1" , ExtendedResourceLim : "1" },
1275
+ CPUPolicy : & noRestart ,
1276
+ MemPolicy : & noRestart ,
1277
+ },
1278
+ },
1279
+ addExtendedResource : true ,
1280
+ },
1134
1281
}
1135
1282
1136
1283
timeouts := framework .NewTimeoutContext ()
@@ -1153,6 +1300,20 @@ func doPodResizeTests() {
1153
1300
testPod = makeTestPod (f .Namespace .Name , "testpod" , tStamp , tc .containers )
1154
1301
testPod = e2epod .MustMixinRestrictedPodSecurity (testPod )
1155
1302
1303
+ if tc .addExtendedResource {
1304
+ nodes , err := e2enode .GetReadySchedulableNodes (context .Background (), f .ClientSet )
1305
+ framework .ExpectNoError (err )
1306
+
1307
+ for _ , node := range nodes .Items {
1308
+ addExtendedResource (f .ClientSet , node .Name , fakeExtendedResource , resource .MustParse ("123" ))
1309
+ }
1310
+ defer func () {
1311
+ for _ , node := range nodes .Items {
1312
+ removeExtendedResource (f .ClientSet , node .Name , fakeExtendedResource )
1313
+ }
1314
+ }()
1315
+ }
1316
+
1156
1317
ginkgo .By ("creating pod" )
1157
1318
newPod := podClient .CreateSync (ctx , testPod )
1158
1319
@@ -1161,41 +1322,49 @@ func doPodResizeTests() {
1161
1322
ginkgo .By ("verifying initial pod resize policy is as expected" )
1162
1323
verifyPodResizePolicy (newPod , tc .containers )
1163
1324
1164
- err := e2epod .WaitForPodCondition (ctx , f .ClientSet , newPod .Namespace , newPod .Name , "Ready" , timeouts .PodStartShort , testutils .PodRunningReady )
1165
- framework .ExpectNoError (err , "pod %s/%s did not go running" , newPod .Namespace , newPod .Name )
1166
- framework .Logf ("pod %s/%s running" , newPod .Namespace , newPod .Name )
1167
-
1168
1325
ginkgo .By ("verifying initial pod status resources" )
1169
1326
verifyPodStatusResources (newPod , tc .containers )
1170
1327
1171
- ginkgo .By ("patching pod for resize" )
1172
- patchedPod , pErr = f .ClientSet .CoreV1 ().Pods (newPod .Namespace ).Patch (ctx , newPod .Name ,
1173
- types .StrategicMergePatchType , []byte (tc .patchString ), metav1.PatchOptions {})
1174
- framework .ExpectNoError (pErr , "failed to patch pod for resize" )
1328
+ ginkgo .By ("verifying initial cgroup config are as expected" )
1329
+ framework .ExpectNoError (verifyPodContainersCgroupValues (ctx , f , newPod , tc .containers ))
1175
1330
1176
- ginkgo . By ( "verifying pod patched for resize" )
1177
- verifyPodResources ( patchedPod , tc . expected )
1178
- gomega . Eventually ( ctx , verifyPodAllocations , timeouts . PodStartShort , timeouts . Poll ).
1179
- WithArguments ( patchedPod , tc . containers ).
1180
- Should ( gomega . BeNil (), "failed to verify Pod allocations for patchedPod" )
1331
+ patchAndVerify := func ( patchString string , expectedContainers [] TestContainerInfo , initialContainers [] TestContainerInfo , opStr string , isRollback bool ) {
1332
+ ginkgo . By ( fmt . Sprintf ( "patching pod for %s" , opStr ) )
1333
+ patchedPod , pErr = f . ClientSet . CoreV1 (). Pods ( newPod . Namespace ). Patch ( context . TODO (), newPod . Name ,
1334
+ types . StrategicMergePatchType , [] byte ( patchString ), metav1. PatchOptions {})
1335
+ framework . ExpectNoError ( pErr , fmt . Sprintf ( "failed to patch pod for %s" , opStr ) )
1181
1336
1182
- ginkgo .By ("waiting for resize to be actuated" )
1183
- resizedPod := waitForPodResizeActuation (ctx , f , f .ClientSet , podClient , newPod , patchedPod , tc .expected )
1337
+ ginkgo .By (fmt .Sprintf ("verifying pod patched for %s" , opStr ))
1338
+ verifyPodResources (patchedPod , expectedContainers )
1339
+ gomega .Eventually (ctx , verifyPodAllocations , timeouts .PodStartShort , timeouts .Poll ).
1340
+ WithArguments (patchedPod , initialContainers ).
1341
+ Should (gomega .BeNil (), "failed to verify Pod allocations for patchedPod" )
1184
1342
1185
- ginkgo .By ("verifying pod resources after resize" )
1186
- verifyPodResources ( resizedPod , tc . expected )
1343
+ ginkgo .By (fmt . Sprintf ( "waiting for %s to be actuated" , opStr ) )
1344
+ resizedPod := waitForPodResizeActuation ( ctx , f , podClient , newPod , patchedPod , expectedContainers , initialContainers , isRollback )
1187
1345
1188
- ginkgo .By ("verifying pod allocations after resize" )
1189
- gomega .Eventually (ctx , verifyPodAllocations , timeouts .PodStartShort , timeouts .Poll ).
1190
- WithArguments (resizedPod , tc .expected ).
1191
- Should (gomega .BeNil (), "failed to verify Pod allocations for resizedPod" )
1346
+ // Check cgroup values only for containerd versions before 1.6.9
1347
+ ginkgo .By (fmt .Sprintf ("verifying pod container's cgroup values after %s" , opStr ))
1348
+ framework .ExpectNoError (verifyPodContainersCgroupValues (ctx , f , resizedPod , expectedContainers ))
1349
+
1350
+ ginkgo .By (fmt .Sprintf ("verifying pod resources after %s" , opStr ))
1351
+ verifyPodResources (resizedPod , expectedContainers )
1352
+
1353
+ ginkgo .By (fmt .Sprintf ("verifying pod allocations after %s" , opStr ))
1354
+ gomega .Eventually (ctx , verifyPodAllocations , timeouts .PodStartShort , timeouts .Poll ).
1355
+ WithArguments (resizedPod , expectedContainers ).
1356
+ Should (gomega .BeNil (), "failed to verify Pod allocations for resizedPod" )
1357
+ }
1358
+
1359
+ patchAndVerify (tc .patchString , tc .expected , tc .containers , "resize" , false )
1360
+
1361
+ rbPatchStr , err := genPatchString (tc .containers )
1362
+ framework .ExpectNoError (err )
1363
+ // Resize has been actuated, test rollback
1364
+ patchAndVerify (rbPatchStr , tc .containers , tc .expected , "rollback" , true )
1192
1365
1193
1366
ginkgo .By ("deleting pod" )
1194
- deletePodSyncByName (ctx , f , newPod .Name )
1195
- // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state.
1196
- // this is in turn needed because we will have an unavoidable (in the current framework) race with the
1197
- // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire
1198
- waitForAllContainerRemoval (ctx , newPod .Name , newPod .Namespace )
1367
+ podClient .DeleteSync (ctx , newPod .Name , metav1.DeleteOptions {}, timeouts .PodDelete )
1199
1368
})
1200
1369
}
1201
1370
}
@@ -1286,11 +1455,8 @@ func doPodResizeErrorTests() {
1286
1455
WithArguments (patchedPod , tc .expected ).
1287
1456
Should (gomega .BeNil (), "failed to verify Pod allocations for patchedPod" )
1288
1457
1289
- deletePodSyncByName (ctx , f , newPod .Name )
1290
- // we need to wait for all containers to really be gone so cpumanager reconcile loop will not rewrite the cpu_manager_state.
1291
- // this is in turn needed because we will have an unavoidable (in the current framework) race with the
1292
- // reconcile loop which will make our attempt to delete the state file and to restore the old config go haywire
1293
- waitForAllContainerRemoval (ctx , newPod .Name , newPod .Namespace )
1458
+ ginkgo .By ("deleting pod" )
1459
+ podClient .DeleteSync (ctx , newPod .Name , metav1.DeleteOptions {}, timeouts .PodDelete )
1294
1460
})
1295
1461
}
1296
1462
}
@@ -1301,7 +1467,7 @@ func doPodResizeErrorTests() {
1301
1467
// b) api-server in services doesn't start with --enable-admission-plugins=ResourceQuota
1302
1468
// and is not possible to start it from TEST_ARGS
1303
1469
// Above tests are performed by doSheduletTests() and doPodResizeResourceQuotaTests()
1304
- // in test/node/pod_resize_test .go
1470
+ // in test/e2e/ node/pod_resize .go
1305
1471
1306
1472
var _ = SIGDescribe ("Pod InPlace Resize Container" , framework .WithSerial (), feature .InPlacePodVerticalScaling , "[NodeAlphaFeature:InPlacePodVerticalScaling]" , func () {
1307
1473
if ! podOnCgroupv2Node {
0 commit comments