Skip to content

Commit 7d604c3

Browse files
committed
Break DS controller on scheduler predicates and predicate errors
1 parent ce2102f commit 7d604c3

File tree

8 files changed

+87
-159
lines changed

8 files changed

+87
-159
lines changed

pkg/controller/.import-restrictions

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,8 @@
241241
"k8s.io/kubernetes/pkg/registry/core/secret",
242242
"k8s.io/kubernetes/pkg/scheduler/algorithm",
243243
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates",
244+
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper",
245+
"k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1",
244246
"k8s.io/kubernetes/pkg/scheduler/nodeinfo",
245247
"k8s.io/kubernetes/pkg/serviceaccount",
246248
"k8s.io/kubernetes/pkg/util/goroutinemap",

pkg/controller/daemon/BUILD

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@ go_library(
1616
importpath = "k8s.io/kubernetes/pkg/controller/daemon",
1717
deps = [
1818
"//pkg/api/v1/pod:go_default_library",
19+
"//pkg/apis/core/v1/helper:go_default_library",
1920
"//pkg/controller:go_default_library",
2021
"//pkg/controller/daemon/util:go_default_library",
21-
"//pkg/scheduler/algorithm/predicates:go_default_library",
22+
"//pkg/scheduler/framework/plugins/helper:go_default_library",
2223
"//pkg/scheduler/nodeinfo:go_default_library",
2324
"//pkg/util/labels:go_default_library",
2425
"//staging/src/k8s.io/api/apps/v1:go_default_library",

pkg/controller/daemon/daemon_controller.go

Lines changed: 39 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,10 @@ import (
4848
"k8s.io/client-go/util/workqueue"
4949
"k8s.io/component-base/metrics/prometheus/ratelimiter"
5050
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
51+
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
5152
"k8s.io/kubernetes/pkg/controller"
5253
"k8s.io/kubernetes/pkg/controller/daemon/util"
53-
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
54+
pluginhelper "k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper"
5455
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
5556
"k8s.io/utils/integer"
5657
)
@@ -622,7 +623,7 @@ func (dsc *DaemonSetsController) addNode(obj interface{}) {
622623
}
623624
node := obj.(*v1.Node)
624625
for _, ds := range dsList {
625-
_, shouldSchedule, _, err := dsc.nodeShouldRunDaemonPod(node, ds)
626+
shouldSchedule, _, err := dsc.nodeShouldRunDaemonPod(node, ds)
626627
if err != nil {
627628
continue
628629
}
@@ -684,11 +685,11 @@ func (dsc *DaemonSetsController) updateNode(old, cur interface{}) {
684685
}
685686
// TODO: it'd be nice to pass a hint with these enqueues, so that each ds would only examine the added node (unless it has other work to do, too).
686687
for _, ds := range dsList {
687-
_, oldShouldSchedule, oldShouldContinueRunning, err := dsc.nodeShouldRunDaemonPod(oldNode, ds)
688+
oldShouldSchedule, oldShouldContinueRunning, err := dsc.nodeShouldRunDaemonPod(oldNode, ds)
688689
if err != nil {
689690
continue
690691
}
691-
_, currentShouldSchedule, currentShouldContinueRunning, err := dsc.nodeShouldRunDaemonPod(curNode, ds)
692+
currentShouldSchedule, currentShouldContinueRunning, err := dsc.nodeShouldRunDaemonPod(curNode, ds)
692693
if err != nil {
693694
continue
694695
}
@@ -788,7 +789,7 @@ func (dsc *DaemonSetsController) podsShouldBeOnNode(
788789
ds *apps.DaemonSet,
789790
) (nodesNeedingDaemonPods, podsToDelete []string, err error) {
790791

791-
_, shouldSchedule, shouldContinueRunning, err := dsc.nodeShouldRunDaemonPod(node, ds)
792+
shouldSchedule, shouldContinueRunning, err := dsc.nodeShouldRunDaemonPod(node, ds)
792793
if err != nil {
793794
return
794795
}
@@ -1053,7 +1054,7 @@ func (dsc *DaemonSetsController) updateDaemonSetStatus(ds *apps.DaemonSet, nodeL
10531054

10541055
var desiredNumberScheduled, currentNumberScheduled, numberMisscheduled, numberReady, updatedNumberScheduled, numberAvailable int
10551056
for _, node := range nodeList {
1056-
wantToRun, _, _, err := dsc.nodeShouldRunDaemonPod(node, ds)
1057+
wantToRun, _, err := dsc.nodeShouldRunDaemonPod(node, ds)
10571058
if err != nil {
10581059
return err
10591060
}
@@ -1192,102 +1193,53 @@ func (dsc *DaemonSetsController) syncDaemonSet(key string) error {
11921193
return dsc.updateDaemonSetStatus(ds, nodeList, hash, true)
11931194
}
11941195

1195-
func (dsc *DaemonSetsController) simulate(newPod *v1.Pod, node *v1.Node, ds *apps.DaemonSet) ([]predicates.PredicateFailureReason, *schedulernodeinfo.NodeInfo, error) {
1196-
objects, err := dsc.podNodeIndex.ByIndex("nodeName", node.Name)
1197-
if err != nil {
1198-
return nil, nil, err
1199-
}
1200-
1201-
nodeInfo := schedulernodeinfo.NewNodeInfo()
1202-
nodeInfo.SetNode(node)
1203-
1204-
for _, obj := range objects {
1205-
// Ignore pods that belong to the daemonset when taking into account whether a daemonset should bind to a node.
1206-
pod, ok := obj.(*v1.Pod)
1207-
if !ok {
1208-
continue
1209-
}
1210-
if metav1.IsControlledBy(pod, ds) {
1211-
continue
1212-
}
1213-
nodeInfo.AddPod(pod)
1214-
}
1215-
1216-
_, reasons, err := Predicates(newPod, nodeInfo)
1217-
return reasons, nodeInfo, err
1218-
}
1219-
12201196
// nodeShouldRunDaemonPod checks a set of preconditions against a (node,daemonset) and returns a
12211197
// summary. Returned booleans are:
1222-
// * wantToRun:
1223-
// Returns true when a user would expect a pod to run on this node and ignores conditions
1224-
// such as DiskPressure or insufficient resource that would cause a daemonset pod not to schedule.
1225-
// This is primarily used to populate daemonset status.
12261198
// * shouldSchedule:
12271199
// Returns true when a daemonset should be scheduled to a node if a daemonset pod is not already
12281200
// running on that node.
12291201
// * shouldContinueRunning:
12301202
// Returns true when a daemonset should continue running on a node if a daemonset pod is already
12311203
// running on that node.
1232-
func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *apps.DaemonSet) (wantToRun, shouldSchedule, shouldContinueRunning bool, err error) {
1233-
newPod := NewPod(ds, node.Name)
1204+
func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *apps.DaemonSet) (bool, bool, error) {
1205+
pod := NewPod(ds, node.Name)
12341206

1235-
// Because these bools require an && of all their required conditions, we start
1236-
// with all bools set to true and set a bool to false if a condition is not met.
1237-
// A bool should probably not be set to true after this line.
1238-
wantToRun, shouldSchedule, shouldContinueRunning = true, true, true
12391207
// If the daemon set specifies a node name, check that it matches with node.Name.
12401208
if !(ds.Spec.Template.Spec.NodeName == "" || ds.Spec.Template.Spec.NodeName == node.Name) {
1241-
return false, false, false, nil
1209+
return false, false, nil
12421210
}
12431211

1244-
reasons, nodeInfo, err := dsc.simulate(newPod, node, ds)
1212+
nodeInfo := schedulernodeinfo.NewNodeInfo()
1213+
nodeInfo.SetNode(node)
1214+
taints, err := nodeInfo.Taints()
12451215
if err != nil {
1246-
klog.Warningf("DaemonSet Predicates failed on node %s for ds '%s/%s' due to unexpected error: %v", node.Name, ds.ObjectMeta.Namespace, ds.ObjectMeta.Name, err)
1247-
return false, false, false, err
1248-
}
1249-
1250-
// TODO(k82cn): When 'ScheduleDaemonSetPods' upgrade to beta or GA, remove unnecessary check on failure reason,
1251-
// e.g. InsufficientResourceError; and simplify "wantToRun, shouldSchedule, shouldContinueRunning"
1252-
// into one result, e.g. selectedNode.
1253-
for _, r := range reasons {
1254-
klog.V(4).Infof("DaemonSet Predicates failed on node %s for ds '%s/%s' for reason: %v", node.Name, ds.ObjectMeta.Namespace, ds.ObjectMeta.Name, r.GetReason())
1255-
switch reason := r.(type) {
1256-
case *predicates.PredicateFailureError:
1257-
// we try to partition predicates into two partitions here: intentional on the part of the operator and not.
1258-
switch reason {
1259-
// intentional
1260-
case
1261-
predicates.ErrNodeSelectorNotMatch,
1262-
predicates.ErrPodNotMatchHostName,
1263-
predicates.ErrNodeLabelPresenceViolated,
1264-
// this one is probably intentional since it's a workaround for not having
1265-
// pod hard anti affinity.
1266-
predicates.ErrPodNotFitsHostPorts:
1267-
return false, false, false, nil
1268-
case predicates.ErrTaintsTolerationsNotMatch:
1269-
// DaemonSet is expected to respect taints and tolerations
1270-
fitsNoExecute, _, err := predicates.PodToleratesNodeNoExecuteTaints(newPod, nil, nodeInfo)
1271-
if err != nil {
1272-
return false, false, false, err
1273-
}
1274-
if !fitsNoExecute {
1275-
return false, false, false, nil
1276-
}
1277-
wantToRun, shouldSchedule = false, false
1278-
// unexpected
1279-
case
1280-
predicates.ErrPodAffinityNotMatch,
1281-
predicates.ErrServiceAffinityViolated:
1282-
klog.Warningf("unexpected predicate failure reason: %s", reason.GetReason())
1283-
return false, false, false, fmt.Errorf("unexpected reason: DaemonSet Predicates should not return reason %s", reason.GetReason())
1284-
default:
1285-
klog.V(4).Infof("unknown predicate failure reason: %s", reason.GetReason())
1286-
wantToRun, shouldSchedule, shouldContinueRunning = false, false, false
1287-
dsc.eventRecorder.Eventf(ds, v1.EventTypeWarning, FailedPlacementReason, "failed to place pod on %q: %s", node.ObjectMeta.Name, reason.GetReason())
1288-
}
1289-
}
1216+
klog.Warningf("failed to get node %q taints: %v", node.Name, err)
1217+
return false, false, err
1218+
}
1219+
1220+
fitsNodeName, fitsNodeAffinity, fitsTaints := PodFitsNode(pod, node, taints)
1221+
if !fitsNodeName || !fitsNodeAffinity {
1222+
return false, false, nil
12901223
}
1224+
1225+
if !fitsTaints {
1226+
fitsNoExecuteTaint := v1helper.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, taints, func(t *v1.Taint) bool {
1227+
return t.Effect == v1.TaintEffectNoExecute
1228+
})
1229+
1230+
return false, fitsNoExecuteTaint, nil
1231+
}
1232+
1233+
return true, true, nil
1234+
}
1235+
1236+
// PodFitsNode Checks if a DaemonSet's pod can be scheduled on a node.
1237+
func PodFitsNode(pod *v1.Pod, node *v1.Node, taints []v1.Taint) (fitsNodeName, fitsNodeAffinity, fitsTaints bool) {
1238+
fitsNodeName = len(pod.Spec.NodeName) == 0 || pod.Spec.NodeName == node.Name
1239+
fitsNodeAffinity = pluginhelper.PodMatchesNodeSelectorAndAffinityTerms(pod, node)
1240+
fitsTaints = v1helper.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, taints, func(t *v1.Taint) bool {
1241+
return t.Effect == v1.TaintEffectNoExecute || t.Effect == v1.TaintEffectNoSchedule
1242+
})
12911243
return
12921244
}
12931245

@@ -1303,55 +1255,6 @@ func NewPod(ds *apps.DaemonSet, nodeName string) *v1.Pod {
13031255
return newPod
13041256
}
13051257

1306-
// checkNodeFitness runs a set of predicates that select candidate nodes for the DaemonSet;
1307-
// the predicates include:
1308-
// - PodFitsHost: checks pod's NodeName against node
1309-
// - PodMatchNodeSelector: checks pod's NodeSelector and NodeAffinity against node
1310-
// - PodToleratesNodeTaints: exclude tainted node unless pod has specific toleration
1311-
func checkNodeFitness(pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []predicates.PredicateFailureReason, error) {
1312-
var predicateFails []predicates.PredicateFailureReason
1313-
fit, reasons, err := predicates.PodFitsHost(pod, nil, nodeInfo)
1314-
if err != nil {
1315-
return false, predicateFails, err
1316-
}
1317-
if !fit {
1318-
predicateFails = append(predicateFails, reasons...)
1319-
}
1320-
1321-
fit, reasons, err = predicates.PodMatchNodeSelector(pod, nil, nodeInfo)
1322-
if err != nil {
1323-
return false, predicateFails, err
1324-
}
1325-
if !fit {
1326-
predicateFails = append(predicateFails, reasons...)
1327-
}
1328-
1329-
fit, reasons, err = predicates.PodToleratesNodeTaints(pod, nil, nodeInfo)
1330-
if err != nil {
1331-
return false, predicateFails, err
1332-
}
1333-
if !fit {
1334-
predicateFails = append(predicateFails, reasons...)
1335-
}
1336-
return len(predicateFails) == 0, predicateFails, nil
1337-
}
1338-
1339-
// Predicates checks if a DaemonSet's pod can be scheduled on a node using GeneralPredicates
1340-
// and PodToleratesNodeTaints predicate
1341-
func Predicates(pod *v1.Pod, nodeInfo *schedulernodeinfo.NodeInfo) (bool, []predicates.PredicateFailureReason, error) {
1342-
var predicateFails []predicates.PredicateFailureReason
1343-
1344-
fit, reasons, err := checkNodeFitness(pod, nodeInfo)
1345-
if err != nil {
1346-
return false, predicateFails, err
1347-
}
1348-
if !fit {
1349-
predicateFails = append(predicateFails, reasons...)
1350-
}
1351-
1352-
return len(predicateFails) == 0, predicateFails, nil
1353-
}
1354-
13551258
type podByCreationTimestampAndPhase []*v1.Pod
13561259

13571260
func (o podByCreationTimestampAndPhase) Len() int { return len(o) }

pkg/controller/daemon/daemon_controller_test.go

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1539,9 +1539,8 @@ func setDaemonSetCritical(ds *apps.DaemonSet) {
15391539
}
15401540

15411541
func TestNodeShouldRunDaemonPod(t *testing.T) {
1542-
var shouldCreate, wantToRun, shouldContinueRunning bool
1542+
var shouldCreate, shouldContinueRunning bool
15431543
shouldCreate = true
1544-
wantToRun = true
15451544
shouldContinueRunning = true
15461545
cases := []struct {
15471546
predicateName string
@@ -1565,7 +1564,6 @@ func TestNodeShouldRunDaemonPod(t *testing.T) {
15651564
},
15661565
},
15671566
},
1568-
wantToRun: true,
15691567
shouldCreate: true,
15701568
shouldContinueRunning: true,
15711569
},
@@ -1582,7 +1580,6 @@ func TestNodeShouldRunDaemonPod(t *testing.T) {
15821580
},
15831581
},
15841582
},
1585-
wantToRun: true,
15861583
shouldCreate: shouldCreate,
15871584
shouldContinueRunning: true,
15881585
},
@@ -1599,7 +1596,6 @@ func TestNodeShouldRunDaemonPod(t *testing.T) {
15991596
},
16001597
},
16011598
},
1602-
wantToRun: false,
16031599
shouldCreate: false,
16041600
shouldContinueRunning: false,
16051601
},
@@ -1633,7 +1629,6 @@ func TestNodeShouldRunDaemonPod(t *testing.T) {
16331629
},
16341630
},
16351631
},
1636-
wantToRun: wantToRun,
16371632
shouldCreate: shouldCreate,
16381633
shouldContinueRunning: shouldContinueRunning,
16391634
},
@@ -1662,7 +1657,6 @@ func TestNodeShouldRunDaemonPod(t *testing.T) {
16621657
},
16631658
},
16641659
},
1665-
wantToRun: true,
16661660
shouldCreate: shouldCreate, // This is because we don't care about the resource constraints any more and let default scheduler handle it.
16671661
shouldContinueRunning: true,
16681662
},
@@ -1691,7 +1685,6 @@ func TestNodeShouldRunDaemonPod(t *testing.T) {
16911685
},
16921686
},
16931687
},
1694-
wantToRun: true,
16951688
shouldCreate: true,
16961689
shouldContinueRunning: true,
16971690
},
@@ -1710,7 +1703,6 @@ func TestNodeShouldRunDaemonPod(t *testing.T) {
17101703
},
17111704
},
17121705
},
1713-
wantToRun: false,
17141706
shouldCreate: false,
17151707
shouldContinueRunning: false,
17161708
},
@@ -1729,7 +1721,6 @@ func TestNodeShouldRunDaemonPod(t *testing.T) {
17291721
},
17301722
},
17311723
},
1732-
wantToRun: true,
17331724
shouldCreate: true,
17341725
shouldContinueRunning: true,
17351726
},
@@ -1764,7 +1755,6 @@ func TestNodeShouldRunDaemonPod(t *testing.T) {
17641755
},
17651756
},
17661757
},
1767-
wantToRun: false,
17681758
shouldCreate: false,
17691759
shouldContinueRunning: false,
17701760
},
@@ -1799,7 +1789,6 @@ func TestNodeShouldRunDaemonPod(t *testing.T) {
17991789
},
18001790
},
18011791
},
1802-
wantToRun: true,
18031792
shouldCreate: true,
18041793
shouldContinueRunning: true,
18051794
},
@@ -1817,7 +1806,6 @@ func TestNodeShouldRunDaemonPod(t *testing.T) {
18171806
},
18181807
},
18191808
nodeUnschedulable: true,
1820-
wantToRun: true,
18211809
shouldCreate: true,
18221810
shouldContinueRunning: true,
18231811
},
@@ -1840,11 +1828,8 @@ func TestNodeShouldRunDaemonPod(t *testing.T) {
18401828
manager.podNodeIndex.Add(p)
18411829
}
18421830
c.ds.Spec.UpdateStrategy = *strategy
1843-
wantToRun, shouldRun, shouldContinueRunning, err := manager.nodeShouldRunDaemonPod(node, c.ds)
1831+
shouldRun, shouldContinueRunning, err := manager.nodeShouldRunDaemonPod(node, c.ds)
18441832

1845-
if wantToRun != c.wantToRun {
1846-
t.Errorf("[%v] strategy: %v, predicateName: %v expected wantToRun: %v, got: %v", i, c.ds.Spec.UpdateStrategy.Type, c.predicateName, c.wantToRun, wantToRun)
1847-
}
18481833
if shouldRun != c.shouldCreate {
18491834
t.Errorf("[%v] strategy: %v, predicateName: %v expected shouldRun: %v, got: %v", i, c.ds.Spec.UpdateStrategy.Type, c.predicateName, c.shouldCreate, shouldRun)
18501835
}

pkg/controller/daemon/update.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ func (dsc *DaemonSetsController) getUnavailableNumbers(ds *apps.DaemonSet, nodeL
389389
var numUnavailable, desiredNumberScheduled int
390390
for i := range nodeList {
391391
node := nodeList[i]
392-
wantToRun, _, _, err := dsc.nodeShouldRunDaemonPod(node, ds)
392+
wantToRun, _, err := dsc.nodeShouldRunDaemonPod(node, ds)
393393
if err != nil {
394394
return -1, -1, err
395395
}

0 commit comments

Comments
 (0)