Skip to content

Commit 9887699

Browse files
authored
Merge pull request kubernetes#128307 from NoicFank/bugfix-scheduler-preemption
bugfix(scheduler): preemption picks wrong victim node with higher priority pod on it
2 parents a12a32c + 68f7a7c commit 9887699

File tree

3 files changed

+105
-0
lines changed

3 files changed

+105
-0
lines changed

pkg/scheduler/framework/plugins/defaultpreemption/default_preemption.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,8 @@ func (pl *DefaultPreemption) SelectVictimsOnNode(
191191
}
192192
var victims []*v1.Pod
193193
numViolatingVictim := 0
194+
// Sort potentialVictims by pod priority from high to low, which ensures to
195+
// reprieve higher priority pods first.
194196
sort.Slice(potentialVictims, func(i, j int) bool { return util.MoreImportantPod(potentialVictims[i].Pod, potentialVictims[j].Pod) })
195197
// Try to reprieve as many pods as possible. We first try to reprieve the PDB
196198
// violating victims and then other non-violating ones. In both cases, we start
@@ -225,6 +227,11 @@ func (pl *DefaultPreemption) SelectVictimsOnNode(
225227
return nil, 0, framework.AsStatus(err)
226228
}
227229
}
230+
231+
// Sort victims after reprieving pods to keep the pods in the victims sorted in order of priority from high to low.
232+
if len(violatingVictims) != 0 && len(nonViolatingVictims) != 0 {
233+
sort.Slice(victims, func(i, j int) bool { return util.MoreImportantPod(victims[i], victims[j]) })
234+
}
228235
return victims, numViolatingVictim, framework.NewStatus(framework.Success)
229236
}
230237

pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"encoding/json"
2222
"errors"
2323
"fmt"
24+
"math"
2425
"math/rand"
2526
"sort"
2627
"strings"
@@ -144,6 +145,12 @@ func (pl *TestPlugin) Filter(ctx context.Context, state *framework.CycleState, p
144145
return nil
145146
}
146147

148+
const (
149+
LabelKeyIsViolatingPDB = "test.kubernetes.io/is-violating-pdb"
150+
LabelValueViolatingPDB = "violating"
151+
LabelValueNonViolatingPDB = "non-violating"
152+
)
153+
147154
func TestPostFilter(t *testing.T) {
148155
metrics.Register()
149156
onePodRes := map[v1.ResourceName]string{v1.ResourcePods: "1"}
@@ -152,6 +159,7 @@ func TestPostFilter(t *testing.T) {
152159
name string
153160
pod *v1.Pod
154161
pods []*v1.Pod
162+
pdbs []*policy.PodDisruptionBudget
155163
nodes []*v1.Node
156164
filteredNodesStatuses *framework.NodeToStatus
157165
extender framework.Extender
@@ -234,6 +242,29 @@ func TestPostFilter(t *testing.T) {
234242
wantResult: framework.NewPostFilterResultWithNominatedNode("node2"),
235243
wantStatus: framework.NewStatus(framework.Success),
236244
},
245+
{
246+
name: "pod can be made schedulable on minHighestPriority node",
247+
pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(veryHighPriority).Obj(),
248+
pods: []*v1.Pod{
249+
st.MakePod().Name("p1").UID("p1").Label(LabelKeyIsViolatingPDB, LabelValueNonViolatingPDB).Namespace(v1.NamespaceDefault).Priority(highPriority).Node("node1").Obj(),
250+
st.MakePod().Name("p2").UID("p2").Label(LabelKeyIsViolatingPDB, LabelValueViolatingPDB).Namespace(v1.NamespaceDefault).Priority(lowPriority).Node("node1").Obj(),
251+
st.MakePod().Name("p3").UID("p3").Label(LabelKeyIsViolatingPDB, LabelValueViolatingPDB).Namespace(v1.NamespaceDefault).Priority(midPriority).Node("node2").Obj(),
252+
},
253+
pdbs: []*policy.PodDisruptionBudget{
254+
st.MakePDB().Name("violating-pdb").Namespace(v1.NamespaceDefault).MatchLabel(LabelKeyIsViolatingPDB, LabelValueViolatingPDB).MinAvailable("100%").Obj(),
255+
st.MakePDB().Name("non-violating-pdb").Namespace(v1.NamespaceDefault).MatchLabel(LabelKeyIsViolatingPDB, LabelValueNonViolatingPDB).MinAvailable("0").DisruptionsAllowed(math.MaxInt32).Obj(),
256+
},
257+
nodes: []*v1.Node{
258+
st.MakeNode().Name("node1").Capacity(onePodRes).Obj(),
259+
st.MakeNode().Name("node2").Capacity(onePodRes).Obj(),
260+
},
261+
filteredNodesStatuses: framework.NewNodeToStatus(map[string]*framework.Status{
262+
"node1": framework.NewStatus(framework.Unschedulable),
263+
"node2": framework.NewStatus(framework.Unschedulable),
264+
}, framework.NewStatus(framework.UnschedulableAndUnresolvable)),
265+
wantResult: framework.NewPostFilterResultWithNominatedNode("node2"),
266+
wantStatus: framework.NewStatus(framework.Success),
267+
},
237268
{
238269
name: "preemption result filtered out by extenders",
239270
pod: st.MakePod().Name("p").UID("p").Namespace(v1.NamespaceDefault).Priority(highPriority).Obj(),
@@ -365,6 +396,13 @@ func TestPostFilter(t *testing.T) {
365396
for i := range tt.pods {
366397
podInformer.GetStore().Add(tt.pods[i])
367398
}
399+
pdbInformer := informerFactory.Policy().V1().PodDisruptionBudgets().Informer()
400+
for i := range tt.pdbs {
401+
if err := pdbInformer.GetStore().Add(tt.pdbs[i]); err != nil {
402+
t.Fatal(err)
403+
}
404+
}
405+
368406
// Register NodeResourceFit as the Filter & PreFilter plugin.
369407
registeredPlugins := []tf.RegisterPluginFunc{
370408
tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New),

pkg/scheduler/testing/wrappers.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,14 @@ import (
2222
"time"
2323

2424
v1 "k8s.io/api/core/v1"
25+
policy "k8s.io/api/policy/v1"
2526
resourceapi "k8s.io/api/resource/v1alpha3"
2627
storagev1 "k8s.io/api/storage/v1"
2728
"k8s.io/apimachinery/pkg/api/resource"
2829
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2930
"k8s.io/apimachinery/pkg/runtime/schema"
3031
"k8s.io/apimachinery/pkg/types"
32+
"k8s.io/apimachinery/pkg/util/intstr"
3133
imageutils "k8s.io/kubernetes/test/utils/image"
3234
"k8s.io/utils/ptr"
3335
)
@@ -231,6 +233,64 @@ func (c *ContainerWrapper) RestartPolicy(restartPolicy v1.ContainerRestartPolicy
231233
return c
232234
}
233235

236+
// PodDisruptionBudgetWrapper wraps a PodDisruptionBudget inside.
237+
type PodDisruptionBudgetWrapper struct {
238+
policy.PodDisruptionBudget
239+
}
240+
241+
// MakePDB creates a PodDisruptionBudget wrapper.
242+
func MakePDB() *PodDisruptionBudgetWrapper {
243+
return &PodDisruptionBudgetWrapper{policy.PodDisruptionBudget{}}
244+
}
245+
246+
// Obj returns the inner PodDisruptionBudget.
247+
func (p *PodDisruptionBudgetWrapper) Obj() *policy.PodDisruptionBudget {
248+
return &p.PodDisruptionBudget
249+
}
250+
251+
// Name sets `name` as the name of the inner PodDisruptionBudget.
252+
func (p *PodDisruptionBudgetWrapper) Name(name string) *PodDisruptionBudgetWrapper {
253+
p.SetName(name)
254+
return p
255+
}
256+
257+
// Namespace sets `namespace` as the namespace of the inner PodDisruptionBudget.
258+
func (p *PodDisruptionBudgetWrapper) Namespace(namespace string) *PodDisruptionBudgetWrapper {
259+
p.SetNamespace(namespace)
260+
return p
261+
}
262+
263+
// MinAvailable sets `minAvailable` to the inner PodDisruptionBudget.Spec.MinAvailable.
264+
func (p *PodDisruptionBudgetWrapper) MinAvailable(minAvailable string) *PodDisruptionBudgetWrapper {
265+
p.Spec.MinAvailable = &intstr.IntOrString{
266+
Type: intstr.String,
267+
StrVal: minAvailable,
268+
}
269+
return p
270+
}
271+
272+
// MatchLabel adds a {key,value} to the inner PodDisruptionBudget.Spec.Selector.MatchLabels.
273+
func (p *PodDisruptionBudgetWrapper) MatchLabel(key, value string) *PodDisruptionBudgetWrapper {
274+
selector := p.Spec.Selector
275+
if selector == nil {
276+
selector = &metav1.LabelSelector{}
277+
}
278+
matchLabels := selector.MatchLabels
279+
if matchLabels == nil {
280+
matchLabels = map[string]string{}
281+
}
282+
matchLabels[key] = value
283+
selector.MatchLabels = matchLabels
284+
p.Spec.Selector = selector
285+
return p
286+
}
287+
288+
// DisruptionsAllowed sets `disruptionsAllowed` to the inner PodDisruptionBudget.Status.DisruptionsAllowed.
289+
func (p *PodDisruptionBudgetWrapper) DisruptionsAllowed(disruptionsAllowed int32) *PodDisruptionBudgetWrapper {
290+
p.Status.DisruptionsAllowed = disruptionsAllowed
291+
return p
292+
}
293+
234294
// PodWrapper wraps a Pod inside.
235295
type PodWrapper struct{ v1.Pod }
236296

0 commit comments

Comments
 (0)