@@ -78,24 +78,30 @@ type InTreeToCSITranslator interface {
78
78
//
79
79
// This integrates into the existing scheduler workflow as follows:
80
80
// 1. The scheduler takes a Pod off the scheduler queue and processes it serially:
81
- // a. Invokes all filter plugins, parallelized across nodes. FindPodVolumes() is invoked here.
82
- // b. Invokes all score plugins. Future/TBD
83
- // c. Selects the best node for the Pod.
84
- // d. Invokes all reserve plugins. AssumePodVolumes() is invoked here.
81
+ // a. Invokes all pre-filter plugins for the pod. GetPodVolumes() is invoked
82
+ // here, pod volume information will be saved in current scheduling cycle state for later use.
83
+ // b. Invokes all filter plugins, parallelized across nodes. FindPodVolumes() is invoked here.
84
+ // c. Invokes all score plugins. Future/TBD
85
+ // d. Selects the best node for the Pod.
86
+ // e. Invokes all reserve plugins. AssumePodVolumes() is invoked here.
85
87
// i. If PVC binding is required, cache in-memory only:
86
88
// * For manual binding: update PV objects for prebinding to the corresponding PVCs.
87
89
// * For dynamic provisioning: update PVC object with a selected node from c)
88
90
// * For the pod, which PVCs and PVs need API updates.
89
91
// ii. Afterwards, the main scheduler caches the Pod->Node binding in the scheduler's pod cache,
90
92
// This is handled in the scheduler and not here.
91
- // e . Asynchronously bind volumes and pod in a separate goroutine
93
+ // f . Asynchronously bind volumes and pod in a separate goroutine
92
94
// i. BindPodVolumes() is called first in PreBind phase. It makes all the necessary API updates and waits for
93
95
// PV controller to fully bind and provision the PVCs. If binding fails, the Pod is sent
94
96
// back through the scheduler.
95
97
// ii. After BindPodVolumes() is complete, then the scheduler does the final Pod->Node binding.
96
98
// 2. Once all the assume operations are done in d), the scheduler processes the next Pod in the scheduler queue
97
99
// while the actual binding operation occurs in the background.
98
100
type SchedulerVolumeBinder interface {
101
+ // GetPodVolumes returns a pod's PVCs separated into bound, unbound with delayed binding (including provisioning)
102
+ // and unbound with immediate binding (including prebound)
103
+ GetPodVolumes (pod * v1.Pod ) (boundClaims , unboundClaimsDelayBinding , unboundClaimsImmediate []* v1.PersistentVolumeClaim , err error )
104
+
99
105
// FindPodVolumes checks if all of a Pod's PVCs can be satisfied by the node.
100
106
//
101
107
// If a PVC is bound, it checks if the PV's NodeAffinity matches the Node.
@@ -105,7 +111,7 @@ type SchedulerVolumeBinder interface {
105
111
// (currently) not usable for the pod.
106
112
//
107
113
// This function is called by the volume binding scheduler predicate and can be called in parallel
108
- FindPodVolumes (pod * v1.Pod , node * v1.Node ) (reasons ConflictReasons , err error )
114
+ FindPodVolumes (pod * v1.Pod , boundClaims , claimsToBind [] * v1. PersistentVolumeClaim , node * v1.Node ) (reasons ConflictReasons , err error )
109
115
110
116
// AssumePodVolumes will:
111
117
// 1. Take the PV matches for unbound PVCs and update the PV cache assuming
@@ -194,7 +200,7 @@ func (b *volumeBinder) DeletePodBindings(pod *v1.Pod) {
194
200
// FindPodVolumes caches the matching PVs and PVCs to provision per node in podBindingCache.
195
201
// This method intentionally takes in a *v1.Node object instead of using volumebinder.nodeInformer.
196
202
// That's necessary because some operations will need to pass in to the predicate fake node objects.
197
- func (b * volumeBinder ) FindPodVolumes (pod * v1.Pod , node * v1.Node ) (reasons ConflictReasons , err error ) {
203
+ func (b * volumeBinder ) FindPodVolumes (pod * v1.Pod , boundClaims , claimsToBind [] * v1. PersistentVolumeClaim , node * v1.Node ) (reasons ConflictReasons , err error ) {
198
204
podName := getPodName (pod )
199
205
200
206
// Warning: Below log needs high verbosity as it can be printed several times (#60933).
@@ -248,18 +254,6 @@ func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (reasons Confl
248
254
b .podBindingCache .UpdateBindings (pod , node .Name , matchedBindings , provisionedClaims )
249
255
}()
250
256
251
- // The pod's volumes need to be processed in one call to avoid the race condition where
252
- // volumes can get bound/provisioned in between calls.
253
- boundClaims , claimsToBind , unboundClaimsImmediate , err := b .getPodVolumes (pod )
254
- if err != nil {
255
- return nil , err
256
- }
257
-
258
- // Immediate claims should be bound
259
- if len (unboundClaimsImmediate ) > 0 {
260
- return nil , fmt .Errorf ("pod has unbound immediate PersistentVolumeClaims" )
261
- }
262
-
263
257
// Check PV node affinity on bound volumes
264
258
if len (boundClaims ) > 0 {
265
259
boundVolumesSatisfied , err = b .checkBoundClaims (boundClaims , node , podName )
@@ -684,9 +678,9 @@ func (b *volumeBinder) arePodVolumesBound(pod *v1.Pod) bool {
684
678
return true
685
679
}
686
680
687
- // getPodVolumes returns a pod's PVCs separated into bound, unbound with delayed binding (including provisioning)
681
+ // GetPodVolumes returns a pod's PVCs separated into bound, unbound with delayed binding (including provisioning)
688
682
// and unbound with immediate binding (including prebound)
689
- func (b * volumeBinder ) getPodVolumes (pod * v1.Pod ) (boundClaims []* v1.PersistentVolumeClaim , unboundClaimsDelayBinding []* v1.PersistentVolumeClaim , unboundClaimsImmediate []* v1.PersistentVolumeClaim , err error ) {
683
+ func (b * volumeBinder ) GetPodVolumes (pod * v1.Pod ) (boundClaims []* v1.PersistentVolumeClaim , unboundClaimsDelayBinding []* v1.PersistentVolumeClaim , unboundClaimsImmediate []* v1.PersistentVolumeClaim , err error ) {
690
684
boundClaims = []* v1.PersistentVolumeClaim {}
691
685
unboundClaimsImmediate = []* v1.PersistentVolumeClaim {}
692
686
unboundClaimsDelayBinding = []* v1.PersistentVolumeClaim {}
0 commit comments