Skip to content

Commit 1385280

Browse files
authored
Merge pull request kubernetes#91775 from cofyc/fix91755
VolumeBinding: Skip/fail fast in PreFilter phase and improve error reporting
2 parents f705d62 + 814a6f2 commit 1385280

File tree

13 files changed

+364
-89
lines changed

13 files changed

+364
-89
lines changed

cmd/kube-scheduler/app/server_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ profiles:
156156
{Name: "NodePorts"},
157157
{Name: "PodTopologySpread"},
158158
{Name: "InterPodAffinity"},
159+
{Name: "VolumeBinding"},
159160
},
160161
"FilterPlugin": {
161162
{Name: "NodeUnschedulable"},
@@ -287,6 +288,7 @@ profiles:
287288
{Name: "NodePorts"},
288289
{Name: "PodTopologySpread"},
289290
{Name: "InterPodAffinity"},
291+
{Name: "VolumeBinding"},
290292
},
291293
"FilterPlugin": {
292294
{Name: "NodeUnschedulable"},

pkg/controller/volume/scheduling/scheduler_binder.go

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -78,24 +78,30 @@ type InTreeToCSITranslator interface {
7878
//
7979
// This integrates into the existing scheduler workflow as follows:
8080
// 1. The scheduler takes a Pod off the scheduler queue and processes it serially:
81-
// a. Invokes all filter plugins, parallelized across nodes. FindPodVolumes() is invoked here.
82-
// b. Invokes all score plugins. Future/TBD
83-
// c. Selects the best node for the Pod.
84-
// d. Invokes all reserve plugins. AssumePodVolumes() is invoked here.
81+
// a. Invokes all pre-filter plugins for the pod. GetPodVolumes() is invoked
82+
// here, pod volume information will be saved in current scheduling cycle state for later use.
83+
// b. Invokes all filter plugins, parallelized across nodes. FindPodVolumes() is invoked here.
84+
// c. Invokes all score plugins. Future/TBD
85+
// d. Selects the best node for the Pod.
86+
// e. Invokes all reserve plugins. AssumePodVolumes() is invoked here.
8587
// i. If PVC binding is required, cache in-memory only:
8688
// * For manual binding: update PV objects for prebinding to the corresponding PVCs.
8789
// * For dynamic provisioning: update PVC object with a selected node from c)
8890
// * For the pod, which PVCs and PVs need API updates.
8991
// ii. Afterwards, the main scheduler caches the Pod->Node binding in the scheduler's pod cache,
9092
// This is handled in the scheduler and not here.
91-
// e. Asynchronously bind volumes and pod in a separate goroutine
93+
// f. Asynchronously bind volumes and pod in a separate goroutine
9294
// i. BindPodVolumes() is called first in PreBind phase. It makes all the necessary API updates and waits for
9395
// PV controller to fully bind and provision the PVCs. If binding fails, the Pod is sent
9496
// back through the scheduler.
9597
// ii. After BindPodVolumes() is complete, then the scheduler does the final Pod->Node binding.
9698
// 2. Once all the assume operations are done in d), the scheduler processes the next Pod in the scheduler queue
9799
// while the actual binding operation occurs in the background.
98100
type SchedulerVolumeBinder interface {
101+
// GetPodVolumes returns a pod's PVCs separated into bound, unbound with delayed binding (including provisioning)
102+
// and unbound with immediate binding (including prebound)
103+
GetPodVolumes(pod *v1.Pod) (boundClaims, unboundClaimsDelayBinding, unboundClaimsImmediate []*v1.PersistentVolumeClaim, err error)
104+
99105
// FindPodVolumes checks if all of a Pod's PVCs can be satisfied by the node.
100106
//
101107
// If a PVC is bound, it checks if the PV's NodeAffinity matches the Node.
@@ -105,7 +111,7 @@ type SchedulerVolumeBinder interface {
105111
// (currently) not usable for the pod.
106112
//
107113
// This function is called by the volume binding scheduler predicate and can be called in parallel
108-
FindPodVolumes(pod *v1.Pod, node *v1.Node) (reasons ConflictReasons, err error)
114+
FindPodVolumes(pod *v1.Pod, boundClaims, claimsToBind []*v1.PersistentVolumeClaim, node *v1.Node) (reasons ConflictReasons, err error)
109115

110116
// AssumePodVolumes will:
111117
// 1. Take the PV matches for unbound PVCs and update the PV cache assuming
@@ -194,7 +200,7 @@ func (b *volumeBinder) DeletePodBindings(pod *v1.Pod) {
194200
// FindPodVolumes caches the matching PVs and PVCs to provision per node in podBindingCache.
195201
// This method intentionally takes in a *v1.Node object instead of using volumebinder.nodeInformer.
196202
// That's necessary because some operations will need to pass in to the predicate fake node objects.
197-
func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (reasons ConflictReasons, err error) {
203+
func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, boundClaims, claimsToBind []*v1.PersistentVolumeClaim, node *v1.Node) (reasons ConflictReasons, err error) {
198204
podName := getPodName(pod)
199205

200206
// Warning: Below log needs high verbosity as it can be printed several times (#60933).
@@ -248,18 +254,6 @@ func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (reasons Confl
248254
b.podBindingCache.UpdateBindings(pod, node.Name, matchedBindings, provisionedClaims)
249255
}()
250256

251-
// The pod's volumes need to be processed in one call to avoid the race condition where
252-
// volumes can get bound/provisioned in between calls.
253-
boundClaims, claimsToBind, unboundClaimsImmediate, err := b.getPodVolumes(pod)
254-
if err != nil {
255-
return nil, err
256-
}
257-
258-
// Immediate claims should be bound
259-
if len(unboundClaimsImmediate) > 0 {
260-
return nil, fmt.Errorf("pod has unbound immediate PersistentVolumeClaims")
261-
}
262-
263257
// Check PV node affinity on bound volumes
264258
if len(boundClaims) > 0 {
265259
boundVolumesSatisfied, err = b.checkBoundClaims(boundClaims, node, podName)
@@ -684,9 +678,9 @@ func (b *volumeBinder) arePodVolumesBound(pod *v1.Pod) bool {
684678
return true
685679
}
686680

687-
// getPodVolumes returns a pod's PVCs separated into bound, unbound with delayed binding (including provisioning)
681+
// GetPodVolumes returns a pod's PVCs separated into bound, unbound with delayed binding (including provisioning)
688682
// and unbound with immediate binding (including prebound)
689-
func (b *volumeBinder) getPodVolumes(pod *v1.Pod) (boundClaims []*v1.PersistentVolumeClaim, unboundClaimsDelayBinding []*v1.PersistentVolumeClaim, unboundClaimsImmediate []*v1.PersistentVolumeClaim, err error) {
683+
func (b *volumeBinder) GetPodVolumes(pod *v1.Pod) (boundClaims []*v1.PersistentVolumeClaim, unboundClaimsDelayBinding []*v1.PersistentVolumeClaim, unboundClaimsImmediate []*v1.PersistentVolumeClaim, err error) {
690684
boundClaims = []*v1.PersistentVolumeClaim{}
691685
unboundClaimsImmediate = []*v1.PersistentVolumeClaim{}
692686
unboundClaimsDelayBinding = []*v1.PersistentVolumeClaim{}

pkg/controller/volume/scheduling/scheduler_binder_fake.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,13 @@ type FakeVolumeBinder struct {
4242
BindCalled bool
4343
}
4444

45+
// GetPodVolumes implements SchedulerVolumeBinder.GetPodVolumes.
46+
func (b *FakeVolumeBinder) GetPodVolumes(pod *v1.Pod) (boundClaims, unboundClaimsDelayBinding, unboundClaimsImmediate []*v1.PersistentVolumeClaim, err error) {
47+
return nil, nil, nil, nil
48+
}
49+
4550
// FindPodVolumes implements SchedulerVolumeBinder.FindPodVolumes.
46-
func (b *FakeVolumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (reasons ConflictReasons, err error) {
51+
func (b *FakeVolumeBinder) FindPodVolumes(pod *v1.Pod, _, _ []*v1.PersistentVolumeClaim, node *v1.Node) (reasons ConflictReasons, err error) {
4752
return b.config.FindReasons, b.config.FindErr
4853
}
4954

pkg/controller/volume/scheduling/scheduler_binder_test.go

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,18 @@ func checkReasons(t *testing.T, actual, expected ConflictReasons) {
771771
}
772772
}
773773

774+
// findPodVolumes gets and finds volumes for given pod and node
775+
func findPodVolumes(binder SchedulerVolumeBinder, pod *v1.Pod, node *v1.Node) (ConflictReasons, error) {
776+
boundClaims, claimsToBind, unboundClaimsImmediate, err := binder.GetPodVolumes(pod)
777+
if err != nil {
778+
return nil, err
779+
}
780+
if len(unboundClaimsImmediate) > 0 {
781+
return nil, fmt.Errorf("pod has unbound immediate PersistentVolumeClaims")
782+
}
783+
return binder.FindPodVolumes(pod, boundClaims, claimsToBind, node)
784+
}
785+
774786
func TestFindPodVolumesWithoutProvisioning(t *testing.T) {
775787
type scenarioType struct {
776788
// Inputs
@@ -907,7 +919,7 @@ func TestFindPodVolumesWithoutProvisioning(t *testing.T) {
907919
}
908920

909921
// Execute
910-
reasons, err := testEnv.binder.FindPodVolumes(scenario.pod, testNode)
922+
reasons, err := findPodVolumes(testEnv.binder, scenario.pod, testNode)
911923

912924
// Validate
913925
if !scenario.shouldFail && err != nil {
@@ -1012,7 +1024,7 @@ func TestFindPodVolumesWithProvisioning(t *testing.T) {
10121024
}
10131025

10141026
// Execute
1015-
reasons, err := testEnv.binder.FindPodVolumes(scenario.pod, testNode)
1027+
reasons, err := findPodVolumes(testEnv.binder, scenario.pod, testNode)
10161028

10171029
// Validate
10181030
if !scenario.shouldFail && err != nil {
@@ -1112,7 +1124,7 @@ func TestFindPodVolumesWithCSIMigration(t *testing.T) {
11121124
}
11131125

11141126
// Execute
1115-
reasons, err := testEnv.binder.FindPodVolumes(scenario.pod, node)
1127+
reasons, err := findPodVolumes(testEnv.binder, scenario.pod, node)
11161128

11171129
// Validate
11181130
if !scenario.shouldFail && err != nil {
@@ -1933,7 +1945,7 @@ func TestFindAssumeVolumes(t *testing.T) {
19331945

19341946
// Execute
19351947
// 1. Find matching PVs
1936-
reasons, err := testEnv.binder.FindPodVolumes(pod, testNode)
1948+
reasons, err := findPodVolumes(testEnv.binder, pod, testNode)
19371949
if err != nil {
19381950
t.Errorf("Test failed: FindPodVolumes returned error: %v", err)
19391951
}
@@ -1959,7 +1971,7 @@ func TestFindAssumeVolumes(t *testing.T) {
19591971
// This should always return the original chosen pv
19601972
// Run this many times in case sorting returns different orders for the two PVs.
19611973
for i := 0; i < 50; i++ {
1962-
reasons, err := testEnv.binder.FindPodVolumes(pod, testNode)
1974+
reasons, err := findPodVolumes(testEnv.binder, pod, testNode)
19631975
if err != nil {
19641976
t.Errorf("Test failed: FindPodVolumes returned error: %v", err)
19651977
}

pkg/scheduler/algorithmprovider/registry.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ func getDefaultConfig() *schedulerapi.Plugins {
8787
{Name: nodeports.Name},
8888
{Name: podtopologyspread.Name},
8989
{Name: interpodaffinity.Name},
90+
{Name: volumebinding.Name},
9091
},
9192
},
9293
Filter: &schedulerapi.PluginSet{

pkg/scheduler/algorithmprovider/registry_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ func TestClusterAutoscalerProvider(t *testing.T) {
5858
{Name: nodeports.Name},
5959
{Name: podtopologyspread.Name},
6060
{Name: interpodaffinity.Name},
61+
{Name: volumebinding.Name},
6162
},
6263
},
6364
Filter: &schedulerapi.PluginSet{
@@ -154,6 +155,7 @@ func TestApplyFeatureGates(t *testing.T) {
154155
{Name: nodeports.Name},
155156
{Name: podtopologyspread.Name},
156157
{Name: interpodaffinity.Name},
158+
{Name: volumebinding.Name},
157159
},
158160
},
159161
Filter: &schedulerapi.PluginSet{
@@ -238,6 +240,7 @@ func TestApplyFeatureGates(t *testing.T) {
238240
{Name: nodeports.Name},
239241
{Name: podtopologyspread.Name},
240242
{Name: interpodaffinity.Name},
243+
{Name: volumebinding.Name},
241244
},
242245
},
243246
Filter: &schedulerapi.PluginSet{

pkg/scheduler/apis/config/testing/compatibility_test.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
666666
{Name: "NodePorts"},
667667
{Name: "NodeResourcesFit"},
668668
{Name: "ServiceAffinity"},
669+
{Name: "VolumeBinding"},
669670
{Name: "InterPodAffinity"},
670671
},
671672
"FilterPlugin": {
@@ -773,6 +774,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
773774
{Name: "NodePorts"},
774775
{Name: "NodeResourcesFit"},
775776
{Name: "ServiceAffinity"},
777+
{Name: "VolumeBinding"},
776778
{Name: "InterPodAffinity"},
777779
},
778780
"FilterPlugin": {
@@ -892,6 +894,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
892894
{Name: "NodePorts"},
893895
{Name: "NodeResourcesFit"},
894896
{Name: "ServiceAffinity"},
897+
{Name: "VolumeBinding"},
895898
{Name: "InterPodAffinity"},
896899
},
897900
"FilterPlugin": {
@@ -1013,6 +1016,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
10131016
{Name: "NodePorts"},
10141017
{Name: "NodeResourcesFit"},
10151018
{Name: "ServiceAffinity"},
1019+
{Name: "VolumeBinding"},
10161020
{Name: "InterPodAffinity"},
10171021
},
10181022
"FilterPlugin": {
@@ -1134,6 +1138,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
11341138
{Name: "NodePorts"},
11351139
{Name: "NodeResourcesFit"},
11361140
{Name: "ServiceAffinity"},
1141+
{Name: "VolumeBinding"},
11371142
{Name: "InterPodAffinity"},
11381143
},
11391144
"FilterPlugin": {
@@ -1260,6 +1265,7 @@ func TestCompatibility_v1_Scheduler(t *testing.T) {
12601265
{Name: "NodePorts"},
12611266
{Name: "NodeResourcesFit"},
12621267
{Name: "ServiceAffinity"},
1268+
{Name: "VolumeBinding"},
12631269
{Name: "InterPodAffinity"},
12641270
},
12651271
"FilterPlugin": {
@@ -1389,6 +1395,7 @@ func TestAlgorithmProviderCompatibility(t *testing.T) {
13891395
{Name: "NodePorts"},
13901396
{Name: "PodTopologySpread"},
13911397
{Name: "InterPodAffinity"},
1398+
{Name: "VolumeBinding"},
13921399
},
13931400
"FilterPlugin": {
13941401
{Name: "NodeUnschedulable"},
@@ -1457,6 +1464,7 @@ func TestAlgorithmProviderCompatibility(t *testing.T) {
14571464
{Name: "NodePorts"},
14581465
{Name: "PodTopologySpread"},
14591466
{Name: "InterPodAffinity"},
1467+
{Name: "VolumeBinding"},
14601468
},
14611469
"FilterPlugin": {
14621470
{Name: "NodeUnschedulable"},
@@ -1545,6 +1553,7 @@ func TestPluginsConfigurationCompatibility(t *testing.T) {
15451553
{Name: "NodePorts"},
15461554
{Name: "PodTopologySpread"},
15471555
{Name: "InterPodAffinity"},
1556+
{Name: "VolumeBinding"},
15481557
},
15491558
"FilterPlugin": {
15501559
{Name: "NodeUnschedulable"},
@@ -1740,6 +1749,7 @@ func TestPluginsConfigurationCompatibility(t *testing.T) {
17401749
{Name: "NodePorts"},
17411750
{Name: "InterPodAffinity"},
17421751
{Name: "PodTopologySpread"},
1752+
{Name: "VolumeBinding"},
17431753
},
17441754
},
17451755
Filter: &config.PluginSet{

pkg/scheduler/framework/plugins/legacy_registry.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ func NewLegacyRegistry() *LegacyRegistry {
269269
})
270270
registry.registerPredicateConfigProducer(CheckVolumeBindingPred,
271271
func(args ConfigProducerArgs) (plugins config.Plugins, pluginConfig []config.PluginConfig) {
272+
plugins.PreFilter = appendToPluginSet(plugins.PreFilter, volumebinding.Name, nil)
272273
plugins.Filter = appendToPluginSet(plugins.Filter, volumebinding.Name, nil)
273274
plugins.Reserve = appendToPluginSet(plugins.Reserve, volumebinding.Name, nil)
274275
plugins.PreBind = appendToPluginSet(plugins.PreBind, volumebinding.Name, nil)

pkg/scheduler/framework/plugins/volumebinding/BUILD

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,15 @@ go_test(
3636
srcs = ["volume_binding_test.go"],
3737
embed = [":go_default_library"],
3838
deps = [
39+
"//pkg/controller/volume/persistentvolume/util:go_default_library",
3940
"//pkg/controller/volume/scheduling:go_default_library",
41+
"//pkg/scheduler/apis/config:go_default_library",
4042
"//pkg/scheduler/framework/v1alpha1:go_default_library",
4143
"//staging/src/k8s.io/api/core/v1:go_default_library",
44+
"//staging/src/k8s.io/api/storage/v1:go_default_library",
45+
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
46+
"//staging/src/k8s.io/client-go/informers:go_default_library",
47+
"//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library",
48+
"//vendor/k8s.io/utils/pointer:go_default_library",
4249
],
4350
)

0 commit comments

Comments
 (0)