@@ -33,6 +33,7 @@ import (
33
33
utilfeature "k8s.io/apiserver/pkg/util/feature"
34
34
utilfeaturetesting "k8s.io/apiserver/pkg/util/feature/testing"
35
35
clientset "k8s.io/client-go/kubernetes"
36
+ podutil "k8s.io/kubernetes/pkg/api/v1/pod"
36
37
"k8s.io/kubernetes/pkg/features"
37
38
_ "k8s.io/kubernetes/pkg/scheduler/algorithmprovider"
38
39
testutils "k8s.io/kubernetes/test/utils"
@@ -73,7 +74,7 @@ func TestPreemption(t *testing.T) {
73
74
74
75
defaultPodRes := & v1.ResourceRequirements {Requests : v1.ResourceList {
75
76
v1 .ResourceCPU : * resource .NewMilliQuantity (100 , resource .DecimalSI ),
76
- v1 .ResourceMemory : * resource .NewQuantity (100 , resource .BinarySI )},
77
+ v1 .ResourceMemory : * resource .NewQuantity (100 , resource .DecimalSI )},
77
78
}
78
79
79
80
tests := []struct {
@@ -91,7 +92,7 @@ func TestPreemption(t *testing.T) {
91
92
Priority : & lowPriority ,
92
93
Resources : & v1.ResourceRequirements {Requests : v1.ResourceList {
93
94
v1 .ResourceCPU : * resource .NewMilliQuantity (400 , resource .DecimalSI ),
94
- v1 .ResourceMemory : * resource .NewQuantity (200 , resource .BinarySI )},
95
+ v1 .ResourceMemory : * resource .NewQuantity (200 , resource .DecimalSI )},
95
96
},
96
97
}),
97
98
},
@@ -101,7 +102,7 @@ func TestPreemption(t *testing.T) {
101
102
Priority : & highPriority ,
102
103
Resources : & v1.ResourceRequirements {Requests : v1.ResourceList {
103
104
v1 .ResourceCPU : * resource .NewMilliQuantity (300 , resource .DecimalSI ),
104
- v1 .ResourceMemory : * resource .NewQuantity (200 , resource .BinarySI )},
105
+ v1 .ResourceMemory : * resource .NewQuantity (200 , resource .DecimalSI )},
105
106
},
106
107
}),
107
108
preemptedPodIndexes : map [int ]struct {}{0 : {}},
@@ -237,7 +238,7 @@ func TestPreemption(t *testing.T) {
237
238
nodeRes := & v1.ResourceList {
238
239
v1 .ResourcePods : * resource .NewQuantity (32 , resource .DecimalSI ),
239
240
v1 .ResourceCPU : * resource .NewMilliQuantity (500 , resource .DecimalSI ),
240
- v1 .ResourceMemory : * resource .NewQuantity (500 , resource .BinarySI ),
241
+ v1 .ResourceMemory : * resource .NewQuantity (500 , resource .DecimalSI ),
241
242
}
242
243
node , err := createNode (context .clientSet , "node1" , nodeRes )
243
244
if err != nil {
@@ -313,7 +314,7 @@ func TestDisablePreemption(t *testing.T) {
313
314
Priority : & lowPriority ,
314
315
Resources : & v1.ResourceRequirements {Requests : v1.ResourceList {
315
316
v1 .ResourceCPU : * resource .NewMilliQuantity (400 , resource .DecimalSI ),
316
- v1 .ResourceMemory : * resource .NewQuantity (200 , resource .BinarySI )},
317
+ v1 .ResourceMemory : * resource .NewQuantity (200 , resource .DecimalSI )},
317
318
},
318
319
}),
319
320
},
@@ -323,7 +324,7 @@ func TestDisablePreemption(t *testing.T) {
323
324
Priority : & highPriority ,
324
325
Resources : & v1.ResourceRequirements {Requests : v1.ResourceList {
325
326
v1 .ResourceCPU : * resource .NewMilliQuantity (300 , resource .DecimalSI ),
326
- v1 .ResourceMemory : * resource .NewQuantity (200 , resource .BinarySI )},
327
+ v1 .ResourceMemory : * resource .NewQuantity (200 , resource .DecimalSI )},
327
328
},
328
329
}),
329
330
},
@@ -333,7 +334,7 @@ func TestDisablePreemption(t *testing.T) {
333
334
nodeRes := & v1.ResourceList {
334
335
v1 .ResourcePods : * resource .NewQuantity (32 , resource .DecimalSI ),
335
336
v1 .ResourceCPU : * resource .NewMilliQuantity (500 , resource .DecimalSI ),
336
- v1 .ResourceMemory : * resource .NewQuantity (500 , resource .BinarySI ),
337
+ v1 .ResourceMemory : * resource .NewQuantity (500 , resource .DecimalSI ),
337
338
}
338
339
_ , err := createNode (context .clientSet , "node1" , nodeRes )
339
340
if err != nil {
@@ -375,7 +376,7 @@ func TestDisablePreemption(t *testing.T) {
375
376
func mkPriorityPodWithGrace (tc * TestContext , name string , priority int32 , grace int64 ) * v1.Pod {
376
377
defaultPodRes := & v1.ResourceRequirements {Requests : v1.ResourceList {
377
378
v1 .ResourceCPU : * resource .NewMilliQuantity (100 , resource .DecimalSI ),
378
- v1 .ResourceMemory : * resource .NewQuantity (100 , resource .BinarySI )},
379
+ v1 .ResourceMemory : * resource .NewQuantity (100 , resource .DecimalSI )},
379
380
}
380
381
pod := initPausePod (tc .clientSet , & pausePodConfig {
381
382
Name : name ,
@@ -420,7 +421,7 @@ func TestPreemptionStarvation(t *testing.T) {
420
421
Priority : & highPriority ,
421
422
Resources : & v1.ResourceRequirements {Requests : v1.ResourceList {
422
423
v1 .ResourceCPU : * resource .NewMilliQuantity (300 , resource .DecimalSI ),
423
- v1 .ResourceMemory : * resource .NewQuantity (200 , resource .BinarySI )},
424
+ v1 .ResourceMemory : * resource .NewQuantity (200 , resource .DecimalSI )},
424
425
},
425
426
}),
426
427
},
@@ -430,7 +431,7 @@ func TestPreemptionStarvation(t *testing.T) {
430
431
nodeRes := & v1.ResourceList {
431
432
v1 .ResourcePods : * resource .NewQuantity (32 , resource .DecimalSI ),
432
433
v1 .ResourceCPU : * resource .NewMilliQuantity (500 , resource .DecimalSI ),
433
- v1 .ResourceMemory : * resource .NewQuantity (500 , resource .BinarySI ),
434
+ v1 .ResourceMemory : * resource .NewQuantity (500 , resource .DecimalSI ),
434
435
}
435
436
_ , err := createNode (context .clientSet , "node1" , nodeRes )
436
437
if err != nil {
@@ -490,6 +491,119 @@ func TestPreemptionStarvation(t *testing.T) {
490
491
}
491
492
}
492
493
494
+ // TestPreemptionRaces tests that other scheduling events and operations do not
495
+ // race with the preemption process.
496
+ func TestPreemptionRaces (t * testing.T ) {
497
+ // Initialize scheduler.
498
+ context := initTest (t , "preemption-race" )
499
+ defer cleanupTest (t , context )
500
+ cs := context .clientSet
501
+
502
+ tests := []struct {
503
+ description string
504
+ numInitialPods int // Pods created and executed before running preemptor
505
+ numAdditionalPods int // Pods created after creating the preemptor
506
+ numRepetitions int // Repeat the tests to check races
507
+ preemptor * v1.Pod
508
+ }{
509
+ {
510
+ // This test ensures that while the preempting pod is waiting for the victims
511
+ // terminate, other lower priority pods are not scheduled in the room created
512
+ // after preemption and while the higher priority pods is not scheduled yet.
513
+ description : "ensures that other pods are not scheduled while preemptor is being marked as nominated (issue #72124)" ,
514
+ numInitialPods : 2 ,
515
+ numAdditionalPods : 50 ,
516
+ numRepetitions : 10 ,
517
+ preemptor : initPausePod (cs , & pausePodConfig {
518
+ Name : "preemptor-pod" ,
519
+ Namespace : context .ns .Name ,
520
+ Priority : & highPriority ,
521
+ Resources : & v1.ResourceRequirements {Requests : v1.ResourceList {
522
+ v1 .ResourceCPU : * resource .NewMilliQuantity (4900 , resource .DecimalSI ),
523
+ v1 .ResourceMemory : * resource .NewQuantity (4900 , resource .DecimalSI )},
524
+ },
525
+ }),
526
+ },
527
+ }
528
+
529
+ // Create a node with some resources and a label.
530
+ nodeRes := & v1.ResourceList {
531
+ v1 .ResourcePods : * resource .NewQuantity (100 , resource .DecimalSI ),
532
+ v1 .ResourceCPU : * resource .NewMilliQuantity (5000 , resource .DecimalSI ),
533
+ v1 .ResourceMemory : * resource .NewQuantity (5000 , resource .DecimalSI ),
534
+ }
535
+ _ , err := createNode (context .clientSet , "node1" , nodeRes )
536
+ if err != nil {
537
+ t .Fatalf ("Error creating nodes: %v" , err )
538
+ }
539
+
540
+ for _ , test := range tests {
541
+ if test .numRepetitions <= 0 {
542
+ test .numRepetitions = 1
543
+ }
544
+ for n := 0 ; n < test .numRepetitions ; n ++ {
545
+ initialPods := make ([]* v1.Pod , test .numInitialPods )
546
+ additionalPods := make ([]* v1.Pod , test .numAdditionalPods )
547
+ // Create and run existingPods.
548
+ for i := 0 ; i < test .numInitialPods ; i ++ {
549
+ initialPods [i ], err = createPausePod (cs , mkPriorityPodWithGrace (context , fmt .Sprintf ("rpod-%v" , i ), mediumPriority , 0 ))
550
+ if err != nil {
551
+ t .Fatalf ("Test [%v]: Error creating pause pod: %v" , test .description , err )
552
+ }
553
+ }
554
+ // make sure that initial Pods are all scheduled.
555
+ for _ , p := range initialPods {
556
+ if err := waitForPodToSchedule (cs , p ); err != nil {
557
+ t .Fatalf ("Pod %v/%v didn't get scheduled: %v" , p .Namespace , p .Name , err )
558
+ }
559
+ }
560
+ // Create the preemptor.
561
+ klog .Info ("Creating the preemptor pod..." )
562
+ preemptor , err := createPausePod (cs , test .preemptor )
563
+ if err != nil {
564
+ t .Errorf ("Error while creating the preempting pod: %v" , err )
565
+ }
566
+
567
+ klog .Info ("Creating additional pods..." )
568
+ for i := 0 ; i < test .numAdditionalPods ; i ++ {
569
+ additionalPods [i ], err = createPausePod (cs , mkPriorityPodWithGrace (context , fmt .Sprintf ("ppod-%v" , i ), mediumPriority , 0 ))
570
+ if err != nil {
571
+ t .Fatalf ("Test [%v]: Error creating pending pod: %v" , test .description , err )
572
+ }
573
+ }
574
+ // Check that the preemptor pod gets nominated node name.
575
+ if err := waitForNominatedNodeName (cs , preemptor ); err != nil {
576
+ t .Errorf ("Test [%v]: NominatedNodeName annotation was not set for pod %v/%v: %v" , test .description , preemptor .Namespace , preemptor .Name , err )
577
+ }
578
+ // Make sure that preemptor is scheduled after preemptions.
579
+ if err := waitForPodToScheduleWithTimeout (cs , preemptor , 60 * time .Second ); err != nil {
580
+ t .Errorf ("Preemptor pod %v didn't get scheduled: %v" , preemptor .Name , err )
581
+ }
582
+
583
+ klog .Info ("Check unschedulable pods still exists and were never scheduled..." )
584
+ for _ , p := range additionalPods {
585
+ pod , err := cs .CoreV1 ().Pods (p .Namespace ).Get (p .Name , metav1.GetOptions {})
586
+ if err != nil {
587
+ t .Errorf ("Error in getting Pod %v/%v info: %v" , p .Namespace , p .Name , err )
588
+ }
589
+ if len (pod .Spec .NodeName ) > 0 {
590
+ t .Errorf ("Pod %v/%v is already scheduled" , p .Namespace , p .Name )
591
+ }
592
+ _ , cond := podutil .GetPodCondition (& pod .Status , v1 .PodScheduled )
593
+ if cond != nil && cond .Status != v1 .ConditionFalse {
594
+ t .Errorf ("Pod %v/%v is no longer unschedulable: %v" , p .Namespace , p .Name , err )
595
+ }
596
+ }
597
+ // Cleanup
598
+ klog .Info ("Cleaning up all pods..." )
599
+ allPods := additionalPods
600
+ allPods = append (allPods , initialPods ... )
601
+ allPods = append (allPods , preemptor )
602
+ cleanupPods (cs , t , allPods )
603
+ }
604
+ }
605
+ }
606
+
493
607
// TestNominatedNodeCleanUp checks that when there are nominated pods on a
494
608
// node and a higher priority pod is nominated to run on the node, the nominated
495
609
// node name of the lower priority pods is cleared.
@@ -515,7 +629,7 @@ func TestNominatedNodeCleanUp(t *testing.T) {
515
629
nodeRes := & v1.ResourceList {
516
630
v1 .ResourcePods : * resource .NewQuantity (32 , resource .DecimalSI ),
517
631
v1 .ResourceCPU : * resource .NewMilliQuantity (500 , resource .DecimalSI ),
518
- v1 .ResourceMemory : * resource .NewQuantity (500 , resource .BinarySI ),
632
+ v1 .ResourceMemory : * resource .NewQuantity (500 , resource .DecimalSI ),
519
633
}
520
634
_ , err := createNode (context .clientSet , "node1" , nodeRes )
521
635
if err != nil {
@@ -543,7 +657,7 @@ func TestNominatedNodeCleanUp(t *testing.T) {
543
657
Priority : & mediumPriority ,
544
658
Resources : & v1.ResourceRequirements {Requests : v1.ResourceList {
545
659
v1 .ResourceCPU : * resource .NewMilliQuantity (400 , resource .DecimalSI ),
546
- v1 .ResourceMemory : * resource .NewQuantity (400 , resource .BinarySI )},
660
+ v1 .ResourceMemory : * resource .NewQuantity (400 , resource .DecimalSI )},
547
661
},
548
662
})
549
663
medPriPod , err := createPausePod (cs , podConf )
@@ -561,7 +675,7 @@ func TestNominatedNodeCleanUp(t *testing.T) {
561
675
Priority : & highPriority ,
562
676
Resources : & v1.ResourceRequirements {Requests : v1.ResourceList {
563
677
v1 .ResourceCPU : * resource .NewMilliQuantity (300 , resource .DecimalSI ),
564
- v1 .ResourceMemory : * resource .NewQuantity (200 , resource .BinarySI )},
678
+ v1 .ResourceMemory : * resource .NewQuantity (200 , resource .DecimalSI )},
565
679
},
566
680
})
567
681
highPriPod , err := createPausePod (cs , podConf )
@@ -626,12 +740,12 @@ func TestPDBInPreemption(t *testing.T) {
626
740
627
741
defaultPodRes := & v1.ResourceRequirements {Requests : v1.ResourceList {
628
742
v1 .ResourceCPU : * resource .NewMilliQuantity (100 , resource .DecimalSI ),
629
- v1 .ResourceMemory : * resource .NewQuantity (100 , resource .BinarySI )},
743
+ v1 .ResourceMemory : * resource .NewQuantity (100 , resource .DecimalSI )},
630
744
}
631
745
defaultNodeRes := & v1.ResourceList {
632
746
v1 .ResourcePods : * resource .NewQuantity (32 , resource .DecimalSI ),
633
747
v1 .ResourceCPU : * resource .NewMilliQuantity (500 , resource .DecimalSI ),
634
- v1 .ResourceMemory : * resource .NewQuantity (500 , resource .BinarySI ),
748
+ v1 .ResourceMemory : * resource .NewQuantity (500 , resource .DecimalSI ),
635
749
}
636
750
637
751
type nodeConfig struct {
@@ -683,7 +797,7 @@ func TestPDBInPreemption(t *testing.T) {
683
797
Priority : & highPriority ,
684
798
Resources : & v1.ResourceRequirements {Requests : v1.ResourceList {
685
799
v1 .ResourceCPU : * resource .NewMilliQuantity (300 , resource .DecimalSI ),
686
- v1 .ResourceMemory : * resource .NewQuantity (200 , resource .BinarySI )},
800
+ v1 .ResourceMemory : * resource .NewQuantity (200 , resource .DecimalSI )},
687
801
},
688
802
}),
689
803
preemptedPodIndexes : map [int ]struct {}{2 : {}},
@@ -721,7 +835,7 @@ func TestPDBInPreemption(t *testing.T) {
721
835
Priority : & highPriority ,
722
836
Resources : & v1.ResourceRequirements {Requests : v1.ResourceList {
723
837
v1 .ResourceCPU : * resource .NewMilliQuantity (500 , resource .DecimalSI ),
724
- v1 .ResourceMemory : * resource .NewQuantity (200 , resource .BinarySI )},
838
+ v1 .ResourceMemory : * resource .NewQuantity (200 , resource .DecimalSI )},
725
839
},
726
840
}),
727
841
preemptedPodIndexes : map [int ]struct {}{1 : {}},
@@ -801,7 +915,7 @@ func TestPDBInPreemption(t *testing.T) {
801
915
Priority : & highPriority ,
802
916
Resources : & v1.ResourceRequirements {Requests : v1.ResourceList {
803
917
v1 .ResourceCPU : * resource .NewMilliQuantity (500 , resource .DecimalSI ),
804
- v1 .ResourceMemory : * resource .NewQuantity (400 , resource .BinarySI )},
918
+ v1 .ResourceMemory : * resource .NewQuantity (400 , resource .DecimalSI )},
805
919
},
806
920
}),
807
921
// The third node is chosen because PDB is not violated for node 3 and the victims have lower priority than node-2.
0 commit comments