Skip to content

Commit 089457e

Browse files
committed
fix: check correctly if the event is scale down
Signed-off-by: Kensei Nakada <[email protected]>
1 parent e395715 commit 089457e

File tree

3 files changed

+123
-2
lines changed

3 files changed

+123
-2
lines changed

pkg/scheduler/backend/queue/scheduling_queue.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -989,10 +989,10 @@ func (p *PriorityQueue) AssignedPodAdded(logger klog.Logger, pod *v1.Pod) {
989989
// may make pending pods with matching affinity terms schedulable.
990990
func (p *PriorityQueue) AssignedPodUpdated(logger klog.Logger, oldPod, newPod *v1.Pod, event framework.ClusterEvent) {
991991
p.lock.Lock()
992-
if event.Resource == framework.Pod && event.ActionType&framework.UpdatePodScaleDown != 0 {
992+
if (framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.UpdatePodScaleDown}.Match(event)) {
993993
// In this case, we don't want to pre-filter Pods by getUnschedulablePodsWithCrossTopologyTerm
994994
// because Pod related events may make Pods that were rejected by NodeResourceFit schedulable.
995-
p.moveAllToActiveOrBackoffQueue(logger, framework.EventAssignedPodUpdate, oldPod, newPod, nil)
995+
p.moveAllToActiveOrBackoffQueue(logger, event, oldPod, newPod, nil)
996996
} else {
997997
// Pre-filter Pods to move by getUnschedulablePodsWithCrossTopologyTerm
998998
// because Pod related events only make Pods rejected by cross topology term schedulable.

pkg/scheduler/backend/queue/scheduling_queue_test.go

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2053,6 +2053,121 @@ func TestPriorityQueue_AssignedPodAdded_(t *testing.T) {
20532053
}
20542054
}
20552055

2056+
func TestPriorityQueue_AssignedPodUpdated(t *testing.T) {
2057+
metrics.Register()
2058+
tests := []struct {
2059+
name string
2060+
unschedPod *v1.Pod
2061+
unschedPlugin string
2062+
updatedAssignedPod *v1.Pod
2063+
event framework.ClusterEvent
2064+
wantToRequeue bool
2065+
}{
2066+
{
2067+
name: "Pod rejected by pod affinity is requeued with matching Pod's update",
2068+
unschedPod: st.MakePod().Name("afp").Namespace("ns1").UID("afp").Annotation("annot2", "val2").PodAffinityExists("service", "region", st.PodAffinityWithRequiredReq).Obj(),
2069+
unschedPlugin: names.InterPodAffinity,
2070+
event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.UpdatePodLabel},
2071+
updatedAssignedPod: st.MakePod().Name("lbp").Namespace("ns1").Label("service", "securityscan").Node("node1").Obj(),
2072+
wantToRequeue: true,
2073+
},
2074+
{
2075+
name: "Pod rejected by pod affinity isn't requeued with unrelated Pod's update",
2076+
unschedPod: st.MakePod().Name("afp").Namespace("ns1").UID("afp").Annotation("annot2", "val2").PodAffinityExists("service", "region", st.PodAffinityWithRequiredReq).Obj(),
2077+
unschedPlugin: names.InterPodAffinity,
2078+
event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.UpdatePodLabel},
2079+
updatedAssignedPod: st.MakePod().Name("lbp").Namespace("unrelated").Label("unrelated", "unrelated").Node("node1").Obj(),
2080+
wantToRequeue: false,
2081+
},
2082+
{
2083+
name: "Pod rejected by pod topology spread is requeued with Pod's update in the same namespace",
2084+
unschedPod: st.MakePod().Name("tsp").Namespace("ns1").UID("tsp").SpreadConstraint(1, "node", v1.DoNotSchedule, nil, nil, nil, nil, nil).Obj(),
2085+
unschedPlugin: names.PodTopologySpread,
2086+
event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.UpdatePodLabel},
2087+
updatedAssignedPod: st.MakePod().Name("lbp").Namespace("ns1").Label("service", "securityscan").Node("node1").Obj(),
2088+
wantToRequeue: true,
2089+
},
2090+
{
2091+
name: "Pod rejected by pod topology spread isn't requeued with unrelated Pod's update",
2092+
unschedPod: st.MakePod().Name("afp").Namespace("ns1").UID("afp").Annotation("annot2", "val2").PodAffinityExists("service", "region", st.PodAffinityWithRequiredReq).Obj(),
2093+
unschedPlugin: names.PodTopologySpread,
2094+
event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.UpdatePodLabel},
2095+
updatedAssignedPod: st.MakePod().Name("lbp").Namespace("unrelated").Label("unrelated", "unrelated").Node("node1").Obj(),
2096+
wantToRequeue: false,
2097+
},
2098+
{
2099+
name: "Pod rejected by resource fit is requeued with assigned Pod's scale down",
2100+
unschedPod: st.MakePod().Name("rp").Namespace("ns1").UID("afp").Annotation("annot2", "val2").Req(map[v1.ResourceName]string{v1.ResourceCPU: "1"}).Obj(),
2101+
unschedPlugin: names.NodeResourcesFit,
2102+
event: framework.ClusterEvent{Resource: framework.EventResource("AssignedPod"), ActionType: framework.UpdatePodScaleDown},
2103+
updatedAssignedPod: st.MakePod().Name("lbp").Namespace("ns2").Node("node1").Obj(),
2104+
wantToRequeue: true,
2105+
},
2106+
{
2107+
name: "Pod rejected by other plugins isn't requeued with any Pod's update",
2108+
unschedPod: st.MakePod().Name("afp").Namespace("ns1").UID("afp").Annotation("annot2", "val2").Obj(),
2109+
unschedPlugin: "fakePlugin",
2110+
event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.UpdatePodLabel},
2111+
updatedAssignedPod: st.MakePod().Name("lbp").Namespace("unrelated").Label("unrelated", "unrelated").Node("node1").Obj(),
2112+
wantToRequeue: false,
2113+
},
2114+
}
2115+
2116+
for _, tt := range tests {
2117+
t.Run(tt.name, func(t *testing.T) {
2118+
logger, ctx := ktesting.NewTestContext(t)
2119+
ctx, cancel := context.WithCancel(ctx)
2120+
defer cancel()
2121+
2122+
c := testingclock.NewFakeClock(time.Now())
2123+
m := makeEmptyQueueingHintMapPerProfile()
2124+
m[""] = map[framework.ClusterEvent][]*QueueingHintFunction{
2125+
{Resource: framework.Pod, ActionType: framework.UpdatePodLabel}: {
2126+
{
2127+
PluginName: "fakePlugin",
2128+
QueueingHintFn: queueHintReturnQueue,
2129+
},
2130+
{
2131+
PluginName: names.InterPodAffinity,
2132+
QueueingHintFn: queueHintReturnQueue,
2133+
},
2134+
{
2135+
PluginName: names.PodTopologySpread,
2136+
QueueingHintFn: queueHintReturnQueue,
2137+
},
2138+
},
2139+
{Resource: framework.Pod, ActionType: framework.UpdatePodScaleDown}: {
2140+
{
2141+
PluginName: names.NodeResourcesFit,
2142+
QueueingHintFn: queueHintReturnQueue,
2143+
},
2144+
},
2145+
}
2146+
q := NewTestQueue(ctx, newDefaultQueueSort(), WithClock(c), WithQueueingHintMapPerProfile(m))
2147+
2148+
// To simulate the pod is failed in scheduling in the real world, Pop() the pod from activeQ before AddUnschedulableIfNotPresent()s below.
2149+
q.Add(logger, tt.unschedPod)
2150+
if p, err := q.Pop(logger); err != nil || p.Pod != tt.unschedPod {
2151+
t.Errorf("Expected: %v after Pop, but got: %v", tt.unschedPod.Name, p.Pod.Name)
2152+
}
2153+
2154+
err := q.AddUnschedulableIfNotPresent(logger, q.newQueuedPodInfo(tt.unschedPod, tt.unschedPlugin), q.SchedulingCycle())
2155+
if err != nil {
2156+
t.Fatalf("unexpected error from AddUnschedulableIfNotPresent: %v", err)
2157+
}
2158+
2159+
// Move clock to make the unschedulable pods complete backoff.
2160+
c.Step(DefaultPodInitialBackoffDuration + time.Second)
2161+
2162+
q.AssignedPodUpdated(logger, nil, tt.updatedAssignedPod, tt.event)
2163+
2164+
if q.activeQ.has(newQueuedPodInfoForLookup(tt.unschedPod)) != tt.wantToRequeue {
2165+
t.Fatalf("unexpected Pod move: Pod should be requeued: %v. Pod is actually requeued: %v", tt.wantToRequeue, !tt.wantToRequeue)
2166+
}
2167+
})
2168+
}
2169+
}
2170+
20562171
func TestPriorityQueue_NominatedPodsForNode(t *testing.T) {
20572172
objs := []runtime.Object{medPriorityPodInfo.Pod, unschedulablePodInfo.Pod, highPriorityPodInfo.Pod}
20582173
logger, ctx := ktesting.NewTestContext(t)

pkg/scheduler/framework/types_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1709,6 +1709,12 @@ func TestCloudEvent_Match(t *testing.T) {
17091709
comingEvent: ClusterEvent{Resource: Pod, ActionType: UpdateNodeLabel},
17101710
wantResult: true,
17111711
},
1712+
{
1713+
name: "event with resource = 'Pod' matching with coming events carries unschedulablePod",
1714+
event: ClusterEvent{Resource: Pod, ActionType: UpdateNodeLabel | UpdateNodeTaint},
1715+
comingEvent: ClusterEvent{Resource: unschedulablePod, ActionType: UpdateNodeLabel},
1716+
wantResult: true,
1717+
},
17121718
{
17131719
name: "event with resource = '*' matching with coming events carries same actionType",
17141720
event: ClusterEvent{Resource: WildCard, ActionType: UpdateNodeLabel},

0 commit comments

Comments
 (0)