Skip to content

Commit d4ce66f

Browse files
authored
Merge pull request kubernetes#90660 from Huang-Wei/synced-sched-err-call
Move unschedulable Pod to internal schedulingQ synchronously
2 parents b170451 + 133a025 commit d4ce66f

File tree

4 files changed

+142
-182
lines changed

4 files changed

+142
-182
lines changed

pkg/scheduler/factory.go

Lines changed: 13 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ import (
2929
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3030
"k8s.io/apimachinery/pkg/fields"
3131
"k8s.io/apimachinery/pkg/runtime"
32-
"k8s.io/apimachinery/pkg/types"
33-
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
3432
"k8s.io/apimachinery/pkg/util/sets"
3533
utilfeature "k8s.io/apiserver/pkg/util/feature"
3634
"k8s.io/client-go/informers"
@@ -58,11 +56,6 @@ import (
5856
"k8s.io/kubernetes/pkg/scheduler/profile"
5957
)
6058

61-
const (
62-
initialGetBackoff = 100 * time.Millisecond
63-
maximalGetBackoff = time.Minute
64-
)
65-
6659
// Binder knows how to write a binding.
6760
type Binder interface {
6861
Bind(binding *v1.Binding) error
@@ -206,7 +199,7 @@ func (c *Configurator) create() (*Scheduler, error) {
206199
Algorithm: algo,
207200
Profiles: profiles,
208201
NextPod: internalqueue.MakeNextPodFunc(podQueue),
209-
Error: MakeDefaultErrorFunc(c.client, podQueue, c.schedulerCache),
202+
Error: MakeDefaultErrorFunc(c.client, c.informerFactory.Core().V1().Pods().Lister(), podQueue, c.schedulerCache),
210203
StopEverything: c.StopEverything,
211204
SchedulingQueue: podQueue,
212205
}, nil
@@ -476,7 +469,7 @@ func NewPodInformer(client clientset.Interface, resyncPeriod time.Duration) core
476469
}
477470

478471
// MakeDefaultErrorFunc construct a function to handle pod scheduler error
479-
func MakeDefaultErrorFunc(client clientset.Interface, podQueue internalqueue.SchedulingQueue, schedulerCache internalcache.Cache) func(*framework.QueuedPodInfo, error) {
472+
func MakeDefaultErrorFunc(client clientset.Interface, podLister corelisters.PodLister, podQueue internalqueue.SchedulingQueue, schedulerCache internalcache.Cache) func(*framework.QueuedPodInfo, error) {
480473
return func(podInfo *framework.QueuedPodInfo, err error) {
481474
pod := podInfo.Pod
482475
if err == core.ErrNoNodesAvailable {
@@ -501,40 +494,17 @@ func MakeDefaultErrorFunc(client clientset.Interface, podQueue internalqueue.Sch
501494
klog.Errorf("Error scheduling %v/%v: %v; retrying", pod.Namespace, pod.Name, err)
502495
}
503496

504-
podSchedulingCycle := podQueue.SchedulingCycle()
505-
// Retry asynchronously.
506-
// Note that this is extremely rudimentary and we need a more real error handling path.
507-
go func() {
508-
defer utilruntime.HandleCrash()
509-
podID := types.NamespacedName{
510-
Namespace: pod.Namespace,
511-
Name: pod.Name,
512-
}
513-
514-
// Get the pod again; it may have changed/been scheduled already.
515-
getBackoff := initialGetBackoff
516-
for {
517-
pod, err := client.CoreV1().Pods(podID.Namespace).Get(context.TODO(), podID.Name, metav1.GetOptions{})
518-
if err == nil {
519-
if len(pod.Spec.NodeName) == 0 {
520-
podInfo.Pod = pod
521-
if err := podQueue.AddUnschedulableIfNotPresent(podInfo, podSchedulingCycle); err != nil {
522-
klog.Error(err)
523-
}
524-
}
525-
break
526-
}
527-
if apierrors.IsNotFound(err) {
528-
klog.Warningf("A pod %v no longer exists", podID)
529-
return
530-
}
531-
klog.Errorf("Error getting pod %v for retry: %v; retrying...", podID, err)
532-
if getBackoff = getBackoff * 2; getBackoff > maximalGetBackoff {
533-
getBackoff = maximalGetBackoff
534-
}
535-
time.Sleep(getBackoff)
536-
}
537-
}()
497+
// Check if the Pod exists in informer cache.
498+
cachedPod, err := podLister.Pods(pod.Namespace).Get(pod.Name)
499+
if err != nil {
500+
klog.Warningf("Pod %v/%v doesn't exist in informer cache: %v", pod.Namespace, pod.Name, err)
501+
return
502+
}
503+
// As <cachedPod> is from SharedInformer, we need to do a DeepCopy() here.
504+
podInfo.Pod = cachedPod.DeepCopy()
505+
if err := podQueue.AddUnschedulableIfNotPresent(podInfo, podQueue.SchedulingCycle()); err != nil {
506+
klog.Error(err)
507+
}
538508
}
539509
}
540510

0 commit comments

Comments
 (0)