Skip to content

Commit 9eb097c

Browse files
authored
Merge pull request kubernetes#91168 from ahg-g/ahg-affinity5
First pod with affinity can schedule only on nodes with matching topology keys
2 parents f411271 + 5d2c054 commit 9eb097c

File tree

2 files changed

+105
-78
lines changed

2 files changed

+105
-78
lines changed

pkg/scheduler/framework/plugins/interpodaffinity/filtering.go

Lines changed: 56 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,22 @@ func podMatchesAllAffinityTerms(pod *v1.Pod, terms []framework.AffinityTerm) boo
160160
return true
161161
}
162162

163+
// getMatchingAntiAffinityTopologyPairs calculates the following for "existingPod" on given node:
164+
// (1) Whether it has PodAntiAffinity
165+
// (2) Whether ANY AffinityTerm matches the incoming pod
166+
func getMatchingAntiAffinityTopologyPairsOfPod(newPod *v1.Pod, existingPod *framework.PodInfo, node *v1.Node) topologyToMatchedTermCount {
167+
topologyMap := make(topologyToMatchedTermCount)
168+
for _, term := range existingPod.RequiredAntiAffinityTerms {
169+
if schedutil.PodMatchesTermsNamespaceAndSelector(newPod, term.Namespaces, term.Selector) {
170+
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
171+
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
172+
topologyMap[pair]++
173+
}
174+
}
175+
}
176+
return topologyMap
177+
}
178+
163179
// getTPMapMatchingExistingAntiAffinity calculates the following for each existing pod on each node:
164180
// (1) Whether it has PodAntiAffinity
165181
// (2) Whether any AffinityTerm matches the incoming pod
@@ -314,89 +330,61 @@ func getPreFilterState(cycleState *framework.CycleState) (*preFilterState, error
314330

315331
// Checks if scheduling the pod onto this node would break any anti-affinity
316332
// terms indicated by the existing pods.
317-
func (pl *InterPodAffinity) satisfiesExistingPodsAntiAffinity(pod *v1.Pod, state *preFilterState, nodeInfo *framework.NodeInfo) (bool, error) {
318-
node := nodeInfo.Node()
319-
topologyMap := state.topologyToMatchedExistingAntiAffinityTerms
320-
321-
// Iterate over topology pairs to get any of the pods being affected by
322-
// the scheduled pod anti-affinity terms
323-
for topologyKey, topologyValue := range node.Labels {
324-
if topologyMap[topologyPair{key: topologyKey, value: topologyValue}] > 0 {
325-
klog.V(10).Infof("Cannot schedule pod %+v onto node %v", pod.Name, node.Name)
326-
return false, nil
327-
}
328-
}
329-
return true, nil
330-
}
331-
332-
// nodeMatchesAllAffinityTerms checks whether "nodeInfo" matches all affinity terms of the incoming pod.
333-
func nodeMatchesAllAffinityTerms(nodeInfo *framework.NodeInfo, state *preFilterState) bool {
334-
node := nodeInfo.Node()
335-
for _, term := range state.podInfo.RequiredAffinityTerms {
336-
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
337-
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
338-
if state.topologyToMatchedAffinityTerms[pair] <= 0 {
333+
func satisfyExistingPodsAntiAffinity(state *preFilterState, nodeInfo *framework.NodeInfo) bool {
334+
if len(state.topologyToMatchedExistingAntiAffinityTerms) > 0 {
335+
// Iterate over topology pairs to get any of the pods being affected by
336+
// the scheduled pod anti-affinity terms
337+
for topologyKey, topologyValue := range nodeInfo.Node().Labels {
338+
tp := topologyPair{key: topologyKey, value: topologyValue}
339+
if state.topologyToMatchedExistingAntiAffinityTerms[tp] > 0 {
339340
return false
340341
}
341-
} else {
342-
return false
343342
}
344343
}
345344
return true
346345
}
347346

348-
// nodeMatchesAnyTopologyTerm checks whether "nodeInfo" matches any of the pod's anti affinity terms.
349-
func nodeMatchesAnyAntiAffinityTerm(nodeInfo *framework.NodeInfo, state *preFilterState) bool {
350-
node := nodeInfo.Node()
347+
// Checks if the node satisifies the incoming pod's anti-affinity rules.
348+
func satisfyPodAntiAffinity(state *preFilterState, nodeInfo *framework.NodeInfo) bool {
351349
for _, term := range state.podInfo.RequiredAntiAffinityTerms {
352-
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
353-
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
354-
if state.topologyToMatchedAntiAffinityTerms[pair] > 0 {
355-
return true
350+
if topologyValue, ok := nodeInfo.Node().Labels[term.TopologyKey]; ok {
351+
tp := topologyPair{key: term.TopologyKey, value: topologyValue}
352+
if state.topologyToMatchedAntiAffinityTerms[tp] > 0 {
353+
return false
356354
}
357355
}
358356
}
359-
return false
357+
return true
360358
}
361359

362-
// getMatchingAntiAffinityTopologyPairs calculates the following for "existingPod" on given node:
363-
// (1) Whether it has PodAntiAffinity
364-
// (2) Whether ANY AffinityTerm matches the incoming pod
365-
func getMatchingAntiAffinityTopologyPairsOfPod(newPod *v1.Pod, existingPod *framework.PodInfo, node *v1.Node) topologyToMatchedTermCount {
366-
topologyMap := make(topologyToMatchedTermCount)
367-
for _, term := range existingPod.RequiredAntiAffinityTerms {
368-
if schedutil.PodMatchesTermsNamespaceAndSelector(newPod, term.Namespaces, term.Selector) {
369-
if topologyValue, ok := node.Labels[term.TopologyKey]; ok {
370-
pair := topologyPair{key: term.TopologyKey, value: topologyValue}
371-
topologyMap[pair]++
360+
// Checks if the node satisfies the incoming pod's affinity rules.
361+
func satisfyPodAffinity(state *preFilterState, nodeInfo *framework.NodeInfo) bool {
362+
podsExist := true
363+
for _, term := range state.podInfo.RequiredAffinityTerms {
364+
if topologyValue, ok := nodeInfo.Node().Labels[term.TopologyKey]; ok {
365+
tp := topologyPair{key: term.TopologyKey, value: topologyValue}
366+
if state.topologyToMatchedAffinityTerms[tp] <= 0 {
367+
podsExist = false
372368
}
369+
} else {
370+
// All topology labels must exist on the node.
371+
return false
373372
}
374373
}
375-
return topologyMap
376-
}
377374

378-
// satisfiesPodsAffinityAntiAffinity checks if scheduling the pod onto this node would break any term of this pod.
379-
// This function returns two boolean flags. The first boolean flag indicates whether the pod matches affinity rules
380-
// or not. The second boolean flag indicates if the pod matches anti-affinity rules.
381-
func (pl *InterPodAffinity) satisfiesPodsAffinityAntiAffinity(state *preFilterState, nodeInfo *framework.NodeInfo) (bool, bool, error) {
382-
// Check all affinity terms.
383-
if !nodeMatchesAllAffinityTerms(nodeInfo, state) {
375+
if !podsExist {
384376
// This pod may be the first pod in a series that have affinity to themselves. In order
385377
// to not leave such pods in pending state forever, we check that if no other pod
386-
// in the cluster matches the namespace and selector of this pod and the pod matches
387-
// its own terms, then we allow the pod to pass the affinity check.
378+
// in the cluster matches the namespace and selector of this pod, the pod matches
379+
// its own terms, and the node has all the requested topologies, then we allow the pod
380+
// to pass the affinity check.
388381
podInfo := state.podInfo
389-
if len(state.topologyToMatchedAffinityTerms) != 0 || !podMatchesAllAffinityTerms(podInfo.Pod, podInfo.RequiredAffinityTerms) {
390-
return false, false, nil
382+
if len(state.topologyToMatchedAffinityTerms) == 0 && podMatchesAllAffinityTerms(podInfo.Pod, podInfo.RequiredAffinityTerms) {
383+
return true
391384
}
385+
return false
392386
}
393-
394-
// Check all anti-affinity terms.
395-
if nodeMatchesAnyAntiAffinityTerm(nodeInfo, state) {
396-
return true, false, nil
397-
}
398-
399-
return true, true, nil
387+
return true
400388
}
401389

402390
// Filter invoked at the filter extension point.
@@ -411,25 +399,17 @@ func (pl *InterPodAffinity) Filter(ctx context.Context, cycleState *framework.Cy
411399
return framework.NewStatus(framework.Error, err.Error())
412400
}
413401

414-
if s, err := pl.satisfiesExistingPodsAntiAffinity(pod, state, nodeInfo); !s || err != nil {
415-
if err != nil {
416-
return framework.NewStatus(framework.Error, err.Error())
417-
}
418-
return framework.NewStatus(framework.Unschedulable, ErrReasonAffinityNotMatch, ErrReasonExistingAntiAffinityRulesNotMatch)
402+
if !satisfyPodAffinity(state, nodeInfo) {
403+
return framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonAffinityNotMatch, ErrReasonAffinityRulesNotMatch)
419404
}
420405

421-
// Now check if <pod> requirements will be satisfied on this node.
422-
if satisfiesAffinity, satisfiesAntiAffinity, err := pl.satisfiesPodsAffinityAntiAffinity(state, nodeInfo); err != nil || !satisfiesAffinity || !satisfiesAntiAffinity {
423-
if err != nil {
424-
return framework.NewStatus(framework.Error, err.Error())
425-
}
426-
427-
if !satisfiesAffinity {
428-
return framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonAffinityNotMatch, ErrReasonAffinityRulesNotMatch)
429-
}
430-
406+
if !satisfyPodAntiAffinity(state, nodeInfo) {
431407
return framework.NewStatus(framework.Unschedulable, ErrReasonAffinityNotMatch, ErrReasonAntiAffinityRulesNotMatch)
432408
}
433409

410+
if !satisfyExistingPodsAntiAffinity(state, nodeInfo) {
411+
return framework.NewStatus(framework.Unschedulable, ErrReasonAffinityNotMatch, ErrReasonExistingAntiAffinityRulesNotMatch)
412+
}
413+
434414
return nil
435415
}

pkg/scheduler/framework/plugins/interpodaffinity/filtering_test.go

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -707,7 +707,7 @@ func TestRequiredAffinitySingleNode(t *testing.T) {
707707
wantStatus: framework.NewStatus(
708708
framework.Unschedulable,
709709
ErrReasonAffinityNotMatch,
710-
ErrReasonExistingAntiAffinityRulesNotMatch,
710+
ErrReasonAntiAffinityRulesNotMatch,
711711
),
712712
name: "PodAntiAffinity symmetry check b1: incoming pod and existing pod partially match each other on AffinityTerms",
713713
},
@@ -768,7 +768,7 @@ func TestRequiredAffinitySingleNode(t *testing.T) {
768768
wantStatus: framework.NewStatus(
769769
framework.Unschedulable,
770770
ErrReasonAffinityNotMatch,
771-
ErrReasonExistingAntiAffinityRulesNotMatch,
771+
ErrReasonAntiAffinityRulesNotMatch,
772772
),
773773
name: "PodAntiAffinity symmetry check b2: incoming pod and existing pod partially match each other on AffinityTerms",
774774
},
@@ -888,6 +888,53 @@ func TestRequiredAffinityMultipleNodes(t *testing.T) {
888888
name: "The affinity rule is to schedule all of the pods of this collection to the same zone. The first pod of the collection " +
889889
"should not be blocked from being scheduled onto any node, even there's no existing pod that matches the rule anywhere.",
890890
},
891+
{
892+
pod: createPodWithAffinityTerms(defaultNamespace, "", map[string]string{"foo": "bar", "service": "securityscan"},
893+
[]v1.PodAffinityTerm{
894+
{
895+
LabelSelector: &metav1.LabelSelector{
896+
MatchExpressions: []metav1.LabelSelectorRequirement{
897+
{
898+
Key: "foo",
899+
Operator: metav1.LabelSelectorOpIn,
900+
Values: []string{"bar"},
901+
},
902+
},
903+
},
904+
TopologyKey: "zone",
905+
},
906+
{
907+
LabelSelector: &metav1.LabelSelector{
908+
MatchExpressions: []metav1.LabelSelectorRequirement{
909+
{
910+
Key: "service",
911+
Operator: metav1.LabelSelectorOpIn,
912+
Values: []string{"securityscan"},
913+
},
914+
},
915+
},
916+
TopologyKey: "zone",
917+
},
918+
}, nil),
919+
pods: []*v1.Pod{{Spec: v1.PodSpec{NodeName: "nodeA"}, ObjectMeta: metav1.ObjectMeta{Name: "p1", Labels: map[string]string{"foo": "bar"}}}},
920+
nodes: []*v1.Node{
921+
{ObjectMeta: metav1.ObjectMeta{Name: "nodeA", Labels: map[string]string{"zoneLabel": "az1", "hostname": "h1"}}},
922+
{ObjectMeta: metav1.ObjectMeta{Name: "nodeB", Labels: map[string]string{"zoneLabel": "az2", "hostname": "h2"}}},
923+
},
924+
wantStatuses: []*framework.Status{
925+
framework.NewStatus(
926+
framework.UnschedulableAndUnresolvable,
927+
ErrReasonAffinityNotMatch,
928+
ErrReasonAffinityRulesNotMatch,
929+
),
930+
framework.NewStatus(
931+
framework.UnschedulableAndUnresolvable,
932+
ErrReasonAffinityNotMatch,
933+
ErrReasonAffinityRulesNotMatch,
934+
),
935+
},
936+
name: "The first pod of the collection can only be scheduled on nodes labelled with the requested topology keys",
937+
},
891938
{
892939
pod: createPodWithAffinityTerms(defaultNamespace, "", nil, nil,
893940
[]v1.PodAffinityTerm{

0 commit comments

Comments
 (0)