@@ -48,9 +48,10 @@ import (
48
48
"k8s.io/client-go/util/workqueue"
49
49
"k8s.io/component-base/metrics/prometheus/ratelimiter"
50
50
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
51
+ v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
51
52
"k8s.io/kubernetes/pkg/controller"
52
53
"k8s.io/kubernetes/pkg/controller/daemon/util"
53
- "k8s.io/kubernetes/pkg/scheduler/algorithm/predicates "
54
+ pluginhelper "k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper "
54
55
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
55
56
"k8s.io/utils/integer"
56
57
)
@@ -622,11 +623,11 @@ func (dsc *DaemonSetsController) addNode(obj interface{}) {
622
623
}
623
624
node := obj .(* v1.Node )
624
625
for _ , ds := range dsList {
625
- _ , shouldSchedule , _ , err := dsc .nodeShouldRunDaemonPod (node , ds )
626
+ shouldRun , _ , err := dsc .nodeShouldRunDaemonPod (node , ds )
626
627
if err != nil {
627
628
continue
628
629
}
629
- if shouldSchedule {
630
+ if shouldRun {
630
631
dsc .enqueueDaemonSet (ds )
631
632
}
632
633
}
@@ -684,15 +685,15 @@ func (dsc *DaemonSetsController) updateNode(old, cur interface{}) {
684
685
}
685
686
// TODO: it'd be nice to pass a hint with these enqueues, so that each ds would only examine the added node (unless it has other work to do, too).
686
687
for _ , ds := range dsList {
687
- _ , oldShouldSchedule , oldShouldContinueRunning , err := dsc .nodeShouldRunDaemonPod (oldNode , ds )
688
+ oldShouldRun , oldShouldContinueRunning , err := dsc .nodeShouldRunDaemonPod (oldNode , ds )
688
689
if err != nil {
689
690
continue
690
691
}
691
- _ , currentShouldSchedule , currentShouldContinueRunning , err := dsc .nodeShouldRunDaemonPod (curNode , ds )
692
+ currentShouldRun , currentShouldContinueRunning , err := dsc .nodeShouldRunDaemonPod (curNode , ds )
692
693
if err != nil {
693
694
continue
694
695
}
695
- if (oldShouldSchedule != currentShouldSchedule ) || (oldShouldContinueRunning != currentShouldContinueRunning ) {
696
+ if (oldShouldRun != currentShouldRun ) || (oldShouldContinueRunning != currentShouldContinueRunning ) {
696
697
dsc .enqueueDaemonSet (ds )
697
698
}
698
699
}
@@ -788,15 +789,15 @@ func (dsc *DaemonSetsController) podsShouldBeOnNode(
788
789
ds * apps.DaemonSet ,
789
790
) (nodesNeedingDaemonPods , podsToDelete []string , err error ) {
790
791
791
- _ , shouldSchedule , shouldContinueRunning , err := dsc .nodeShouldRunDaemonPod (node , ds )
792
+ shouldRun , shouldContinueRunning , err := dsc .nodeShouldRunDaemonPod (node , ds )
792
793
if err != nil {
793
794
return
794
795
}
795
796
796
797
daemonPods , exists := nodeToDaemonPods [node .Name ]
797
798
798
799
switch {
799
- case shouldSchedule && ! exists :
800
+ case shouldRun && ! exists :
800
801
// If daemon pod is supposed to be running on node, but isn't, create daemon pod.
801
802
nodesNeedingDaemonPods = append (nodesNeedingDaemonPods , node .Name )
802
803
case shouldContinueRunning :
@@ -1053,14 +1054,14 @@ func (dsc *DaemonSetsController) updateDaemonSetStatus(ds *apps.DaemonSet, nodeL
1053
1054
1054
1055
var desiredNumberScheduled , currentNumberScheduled , numberMisscheduled , numberReady , updatedNumberScheduled , numberAvailable int
1055
1056
for _ , node := range nodeList {
1056
- wantToRun , _ , _ , err := dsc .nodeShouldRunDaemonPod (node , ds )
1057
+ shouldRun , _ , err := dsc .nodeShouldRunDaemonPod (node , ds )
1057
1058
if err != nil {
1058
1059
return err
1059
1060
}
1060
1061
1061
1062
scheduled := len (nodeToDaemonPods [node .Name ]) > 0
1062
1063
1063
- if wantToRun {
1064
+ if shouldRun {
1064
1065
desiredNumberScheduled ++
1065
1066
if scheduled {
1066
1067
currentNumberScheduled ++
@@ -1192,102 +1193,53 @@ func (dsc *DaemonSetsController) syncDaemonSet(key string) error {
1192
1193
return dsc .updateDaemonSetStatus (ds , nodeList , hash , true )
1193
1194
}
1194
1195
1195
- func (dsc * DaemonSetsController ) simulate (newPod * v1.Pod , node * v1.Node , ds * apps.DaemonSet ) ([]predicates.PredicateFailureReason , * schedulernodeinfo.NodeInfo , error ) {
1196
- objects , err := dsc .podNodeIndex .ByIndex ("nodeName" , node .Name )
1197
- if err != nil {
1198
- return nil , nil , err
1199
- }
1200
-
1201
- nodeInfo := schedulernodeinfo .NewNodeInfo ()
1202
- nodeInfo .SetNode (node )
1203
-
1204
- for _ , obj := range objects {
1205
- // Ignore pods that belong to the daemonset when taking into account whether a daemonset should bind to a node.
1206
- pod , ok := obj .(* v1.Pod )
1207
- if ! ok {
1208
- continue
1209
- }
1210
- if metav1 .IsControlledBy (pod , ds ) {
1211
- continue
1212
- }
1213
- nodeInfo .AddPod (pod )
1214
- }
1215
-
1216
- _ , reasons , err := Predicates (newPod , nodeInfo )
1217
- return reasons , nodeInfo , err
1218
- }
1219
-
1220
1196
// nodeShouldRunDaemonPod checks a set of preconditions against a (node,daemonset) and returns a
1221
1197
// summary. Returned booleans are:
1222
- // * wantToRun:
1223
- // Returns true when a user would expect a pod to run on this node and ignores conditions
1224
- // such as DiskPressure or insufficient resource that would cause a daemonset pod not to schedule.
1225
- // This is primarily used to populate daemonset status.
1226
- // * shouldSchedule:
1227
- // Returns true when a daemonset should be scheduled to a node if a daemonset pod is not already
1198
+ // * shouldRun:
1199
+ // Returns true when a daemonset should run on the node if a daemonset pod is not already
1228
1200
// running on that node.
1229
1201
// * shouldContinueRunning:
1230
1202
// Returns true when a daemonset should continue running on a node if a daemonset pod is already
1231
1203
// running on that node.
1232
- func (dsc * DaemonSetsController ) nodeShouldRunDaemonPod (node * v1.Node , ds * apps.DaemonSet ) (wantToRun , shouldSchedule , shouldContinueRunning bool , err error ) {
1233
- newPod := NewPod (ds , node .Name )
1204
+ func (dsc * DaemonSetsController ) nodeShouldRunDaemonPod (node * v1.Node , ds * apps.DaemonSet ) (bool , bool , error ) {
1205
+ pod := NewPod (ds , node .Name )
1234
1206
1235
- // Because these bools require an && of all their required conditions, we start
1236
- // with all bools set to true and set a bool to false if a condition is not met.
1237
- // A bool should probably not be set to true after this line.
1238
- wantToRun , shouldSchedule , shouldContinueRunning = true , true , true
1239
1207
// If the daemon set specifies a node name, check that it matches with node.Name.
1240
1208
if ! (ds .Spec .Template .Spec .NodeName == "" || ds .Spec .Template .Spec .NodeName == node .Name ) {
1241
- return false , false , false , nil
1209
+ return false , false , nil
1242
1210
}
1243
1211
1244
- reasons , nodeInfo , err := dsc .simulate (newPod , node , ds )
1212
+ nodeInfo := schedulernodeinfo .NewNodeInfo ()
1213
+ nodeInfo .SetNode (node )
1214
+ taints , err := nodeInfo .Taints ()
1245
1215
if err != nil {
1246
- klog .Warningf ("DaemonSet Predicates failed on node %s for ds '%s/%s' due to unexpected error: %v" , node .Name , ds .ObjectMeta .Namespace , ds .ObjectMeta .Name , err )
1247
- return false , false , false , err
1248
- }
1249
-
1250
- // TODO(k82cn): When 'ScheduleDaemonSetPods' upgrade to beta or GA, remove unnecessary check on failure reason,
1251
- // e.g. InsufficientResourceError; and simplify "wantToRun, shouldSchedule, shouldContinueRunning"
1252
- // into one result, e.g. selectedNode.
1253
- for _ , r := range reasons {
1254
- klog .V (4 ).Infof ("DaemonSet Predicates failed on node %s for ds '%s/%s' for reason: %v" , node .Name , ds .ObjectMeta .Namespace , ds .ObjectMeta .Name , r .GetReason ())
1255
- switch reason := r .(type ) {
1256
- case * predicates.PredicateFailureError :
1257
- // we try to partition predicates into two partitions here: intentional on the part of the operator and not.
1258
- switch reason {
1259
- // intentional
1260
- case
1261
- predicates .ErrNodeSelectorNotMatch ,
1262
- predicates .ErrPodNotMatchHostName ,
1263
- predicates .ErrNodeLabelPresenceViolated ,
1264
- // this one is probably intentional since it's a workaround for not having
1265
- // pod hard anti affinity.
1266
- predicates .ErrPodNotFitsHostPorts :
1267
- return false , false , false , nil
1268
- case predicates .ErrTaintsTolerationsNotMatch :
1269
- // DaemonSet is expected to respect taints and tolerations
1270
- fitsNoExecute , _ , err := predicates .PodToleratesNodeNoExecuteTaints (newPod , nil , nodeInfo )
1271
- if err != nil {
1272
- return false , false , false , err
1273
- }
1274
- if ! fitsNoExecute {
1275
- return false , false , false , nil
1276
- }
1277
- wantToRun , shouldSchedule = false , false
1278
- // unexpected
1279
- case
1280
- predicates .ErrPodAffinityNotMatch ,
1281
- predicates .ErrServiceAffinityViolated :
1282
- klog .Warningf ("unexpected predicate failure reason: %s" , reason .GetReason ())
1283
- return false , false , false , fmt .Errorf ("unexpected reason: DaemonSet Predicates should not return reason %s" , reason .GetReason ())
1284
- default :
1285
- klog .V (4 ).Infof ("unknown predicate failure reason: %s" , reason .GetReason ())
1286
- wantToRun , shouldSchedule , shouldContinueRunning = false , false , false
1287
- dsc .eventRecorder .Eventf (ds , v1 .EventTypeWarning , FailedPlacementReason , "failed to place pod on %q: %s" , node .ObjectMeta .Name , reason .GetReason ())
1288
- }
1289
- }
1216
+ klog .Warningf ("failed to get node %q taints: %v" , node .Name , err )
1217
+ return false , false , err
1218
+ }
1219
+
1220
+ fitsNodeName , fitsNodeAffinity , fitsTaints := Predicates (pod , node , taints )
1221
+ if ! fitsNodeName || ! fitsNodeAffinity {
1222
+ return false , false , nil
1223
+ }
1224
+
1225
+ if ! fitsTaints {
1226
+ // Scheduled daemon pods should continue running if they tolerate NoExecute taint.
1227
+ shouldContinueRunning := v1helper .TolerationsTolerateTaintsWithFilter (pod .Spec .Tolerations , taints , func (t * v1.Taint ) bool {
1228
+ return t .Effect == v1 .TaintEffectNoExecute
1229
+ })
1230
+ return false , shouldContinueRunning , nil
1290
1231
}
1232
+
1233
+ return true , true , nil
1234
+ }
1235
+
1236
+ // Predicates checks if a DaemonSet's pod can run on a node.
1237
+ func Predicates (pod * v1.Pod , node * v1.Node , taints []v1.Taint ) (fitsNodeName , fitsNodeAffinity , fitsTaints bool ) {
1238
+ fitsNodeName = len (pod .Spec .NodeName ) == 0 || pod .Spec .NodeName == node .Name
1239
+ fitsNodeAffinity = pluginhelper .PodMatchesNodeSelectorAndAffinityTerms (pod , node )
1240
+ fitsTaints = v1helper .TolerationsTolerateTaintsWithFilter (pod .Spec .Tolerations , taints , func (t * v1.Taint ) bool {
1241
+ return t .Effect == v1 .TaintEffectNoExecute || t .Effect == v1 .TaintEffectNoSchedule
1242
+ })
1291
1243
return
1292
1244
}
1293
1245
@@ -1303,55 +1255,6 @@ func NewPod(ds *apps.DaemonSet, nodeName string) *v1.Pod {
1303
1255
return newPod
1304
1256
}
1305
1257
1306
- // checkNodeFitness runs a set of predicates that select candidate nodes for the DaemonSet;
1307
- // the predicates include:
1308
- // - PodFitsHost: checks pod's NodeName against node
1309
- // - PodMatchNodeSelector: checks pod's NodeSelector and NodeAffinity against node
1310
- // - PodToleratesNodeTaints: exclude tainted node unless pod has specific toleration
1311
- func checkNodeFitness (pod * v1.Pod , nodeInfo * schedulernodeinfo.NodeInfo ) (bool , []predicates.PredicateFailureReason , error ) {
1312
- var predicateFails []predicates.PredicateFailureReason
1313
- fit , reasons , err := predicates .PodFitsHost (pod , nil , nodeInfo )
1314
- if err != nil {
1315
- return false , predicateFails , err
1316
- }
1317
- if ! fit {
1318
- predicateFails = append (predicateFails , reasons ... )
1319
- }
1320
-
1321
- fit , reasons , err = predicates .PodMatchNodeSelector (pod , nil , nodeInfo )
1322
- if err != nil {
1323
- return false , predicateFails , err
1324
- }
1325
- if ! fit {
1326
- predicateFails = append (predicateFails , reasons ... )
1327
- }
1328
-
1329
- fit , reasons , err = predicates .PodToleratesNodeTaints (pod , nil , nodeInfo )
1330
- if err != nil {
1331
- return false , predicateFails , err
1332
- }
1333
- if ! fit {
1334
- predicateFails = append (predicateFails , reasons ... )
1335
- }
1336
- return len (predicateFails ) == 0 , predicateFails , nil
1337
- }
1338
-
1339
- // Predicates checks if a DaemonSet's pod can be scheduled on a node using GeneralPredicates
1340
- // and PodToleratesNodeTaints predicate
1341
- func Predicates (pod * v1.Pod , nodeInfo * schedulernodeinfo.NodeInfo ) (bool , []predicates.PredicateFailureReason , error ) {
1342
- var predicateFails []predicates.PredicateFailureReason
1343
-
1344
- fit , reasons , err := checkNodeFitness (pod , nodeInfo )
1345
- if err != nil {
1346
- return false , predicateFails , err
1347
- }
1348
- if ! fit {
1349
- predicateFails = append (predicateFails , reasons ... )
1350
- }
1351
-
1352
- return len (predicateFails ) == 0 , predicateFails , nil
1353
- }
1354
-
1355
1258
type podByCreationTimestampAndPhase []* v1.Pod
1356
1259
1357
1260
func (o podByCreationTimestampAndPhase ) Len () int { return len (o ) }
0 commit comments