Skip to content

Commit 4959819

Browse files
committed
bug(nodeunschedulable): register missing Pod event for NodeUnschedulable plugin
1 parent 048c853 commit 4959819

File tree

3 files changed

+111
-4
lines changed

3 files changed

+111
-4
lines changed

pkg/scheduler/framework/plugins/nodeunschedulable/node_unschedulable.go

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,56 @@ const (
5151
// EventsToRegister returns the possible events that may make a Pod
5252
// failed by this plugin schedulable.
5353
func (pl *NodeUnschedulable) EventsToRegister(_ context.Context) ([]framework.ClusterEventWithHint, error) {
54-
// preCheck is not used when QHint is enabled.
54+
if !pl.enableSchedulingQueueHint {
55+
return []framework.ClusterEventWithHint{
56+
// A note about UpdateNodeLabel event:
57+
// Ideally, it's supposed to register only Add | UpdateNodeTaint because UpdateNodeLabel will never change the result from this plugin.
58+
// But, we may miss Node/Add event due to preCheck, and we decided to register UpdateNodeTaint | UpdateNodeLabel for all plugins registering Node/Add.
59+
// See: https://github.com/kubernetes/kubernetes/issues/109437
60+
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeTaint | framework.UpdateNodeLabel}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
61+
}, nil
62+
}
63+
5564
return []framework.ClusterEventWithHint{
65+
// When QueueingHint is enabled, we don't use preCheck and we don't need to register UpdateNodeLabel event.
5666
{Event: framework.ClusterEvent{Resource: framework.Node, ActionType: framework.Add | framework.UpdateNodeTaint}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
67+
// When the QueueingHint feature is enabled,
68+
// the scheduling queue uses Pod/Update Queueing Hint
69+
// to determine whether a Pod's update makes the Pod schedulable or not.
70+
// https://github.com/kubernetes/kubernetes/pull/122234
71+
{Event: framework.ClusterEvent{Resource: framework.Pod, ActionType: framework.UpdatePodTolerations}, QueueingHintFn: pl.isSchedulableAfterPodTolerationChange},
5772
}, nil
5873
}
5974

75+
// isSchedulableAfterPodTolerationChange is invoked whenever a pod's toleration changed.
76+
func (pl *NodeUnschedulable) isSchedulableAfterPodTolerationChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (framework.QueueingHint, error) {
77+
_, modifiedPod, err := util.As[*v1.Pod](oldObj, newObj)
78+
if err != nil {
79+
return framework.Queue, err
80+
}
81+
82+
if pod.UID == modifiedPod.UID {
83+
// Note: we don't need to check oldPod tolerations the taint because:
84+
// - Taint can be added, but can't be modified nor removed.
85+
// - If the Pod already has the toleration, it shouldn't have rejected by this plugin in the first place.
86+
// Meaning, here this Pod has been rejected by this plugin, and hence it shouldn't have the toleration yet.
87+
if v1helper.TolerationsTolerateTaint(modifiedPod.Spec.Tolerations, &v1.Taint{
88+
Key: v1.TaintNodeUnschedulable,
89+
Effect: v1.TaintEffectNoSchedule,
90+
}) {
91+
// This update makes the pod tolerate the unschedulable taint.
92+
logger.V(5).Info("a new toleration is added for the unschedulable Pod, and it may make it schedulable", "pod", klog.KObj(modifiedPod))
93+
return framework.Queue, nil
94+
}
95+
logger.V(5).Info("a new toleration is added for the unschedulable Pod, but it's an unrelated toleration", "pod", klog.KObj(modifiedPod))
96+
return framework.QueueSkip, nil
97+
}
98+
99+
logger.V(5).Info("a new toleration is added for a Pod, but it's an unrelated Pod and wouldn't change the TaintToleration plugin's decision", "pod", klog.KObj(modifiedPod))
100+
101+
return framework.QueueSkip, nil
102+
}
103+
60104
// isSchedulableAfterNodeChange is invoked for all node events reported by
61105
// an informer. It checks whether that change made a previously unschedulable
62106
// pod schedulable.

pkg/scheduler/framework/plugins/nodeunschedulable/node_unschedulable_test.go

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
v1 "k8s.io/api/core/v1"
2424
"k8s.io/kubernetes/pkg/scheduler/framework"
2525
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
26+
st "k8s.io/kubernetes/pkg/scheduler/testing"
2627
"k8s.io/kubernetes/test/utils/ktesting"
2728
)
2829

@@ -96,14 +97,14 @@ func TestIsSchedulableAfterNodeChange(t *testing.T) {
9697
expectedErr bool
9798
}{
9899
{
99-
name: "backoff-wrong-new-object",
100+
name: "error-wrong-new-object",
100101
pod: &v1.Pod{},
101102
newObj: "not-a-node",
102103
expectedHint: framework.Queue,
103104
expectedErr: true,
104105
},
105106
{
106-
name: "backoff-wrong-old-object",
107+
name: "error-wrong-old-object",
107108
pod: &v1.Pod{},
108109
newObj: &v1.Node{
109110
Spec: v1.NodeSpec{
@@ -186,3 +187,64 @@ func TestIsSchedulableAfterNodeChange(t *testing.T) {
186187
})
187188
}
188189
}
190+
191+
func TestIsSchedulableAfterPodTolerationChange(t *testing.T) {
192+
testCases := []struct {
193+
name string
194+
pod *v1.Pod
195+
oldObj, newObj interface{}
196+
expectedHint framework.QueueingHint
197+
expectedErr bool
198+
}{
199+
{
200+
name: "error-wrong-new-object",
201+
pod: &v1.Pod{},
202+
newObj: "not-a-pod",
203+
expectedHint: framework.Queue,
204+
expectedErr: true,
205+
},
206+
{
207+
name: "error-wrong-old-object",
208+
pod: &v1.Pod{},
209+
newObj: &v1.Pod{},
210+
oldObj: "not-a-pod",
211+
expectedHint: framework.Queue,
212+
expectedErr: true,
213+
},
214+
{
215+
name: "skip-different-pod-update",
216+
pod: st.MakePod().UID("uid").Obj(),
217+
newObj: st.MakePod().UID("different-uid").Toleration(v1.TaintNodeUnschedulable).Obj(),
218+
oldObj: st.MakePod().UID("different-uid").Obj(),
219+
expectedHint: framework.QueueSkip,
220+
},
221+
{
222+
name: "queue-when-the-unsched-pod-gets-toleration",
223+
pod: st.MakePod().UID("uid").Obj(),
224+
newObj: st.MakePod().UID("uid").Toleration(v1.TaintNodeUnschedulable).Obj(),
225+
oldObj: st.MakePod().UID("uid").Obj(),
226+
expectedHint: framework.Queue,
227+
},
228+
{
229+
name: "skip-when-the-unsched-pod-gets-unrelated-toleration",
230+
pod: st.MakePod().UID("uid").Obj(),
231+
newObj: st.MakePod().UID("uid").Toleration("unrelated-key").Obj(),
232+
oldObj: st.MakePod().UID("uid").Obj(),
233+
expectedHint: framework.QueueSkip,
234+
},
235+
}
236+
237+
for _, testCase := range testCases {
238+
t.Run(testCase.name, func(t *testing.T) {
239+
logger, _ := ktesting.NewTestContext(t)
240+
pl := &NodeUnschedulable{}
241+
got, err := pl.isSchedulableAfterPodTolerationChange(logger, testCase.pod, testCase.oldObj, testCase.newObj)
242+
if err != nil && !testCase.expectedErr {
243+
t.Errorf("unexpected error: %v", err)
244+
}
245+
if got != testCase.expectedHint {
246+
t.Errorf("isSchedulableAfterNodeChange() = %v, want %v", got, testCase.expectedHint)
247+
}
248+
})
249+
}
250+
}

pkg/scheduler/framework/types.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ const (
6969
UpdatePodLabel
7070
// UpdatePodScaleDown is an update for pod's scale down (i.e., any resource request is reduced).
7171
UpdatePodScaleDown
72-
// UpdatePodTolerations is an update for pod's tolerations.
72+
// UpdatePodTolerations is an addition for pod's tolerations.
73+
// (Due to API validation, we can add, but cannot modify or remove tolerations.)
7374
UpdatePodTolerations
7475
// UpdatePodSchedulingGatesEliminated is an update for pod's scheduling gates, which eliminates all scheduling gates in the Pod.
7576
UpdatePodSchedulingGatesEliminated

0 commit comments

Comments
 (0)