@@ -380,45 +380,83 @@ func makePodTemplateSpec(coreClient coreset.CoreV1Interface, proxyLister configl
380
380
resources = * cr .Spec .Resources
381
381
}
382
382
383
+ nodes , err := coreClient .Nodes ().List (context .TODO (), metav1.ListOptions {LabelSelector : "topology.kubernetes.io/zone" })
384
+ if err != nil {
385
+ return corev1.PodTemplateSpec {}, deps , fmt .Errorf ("could not check nodes for zone failure domain: %s" , err )
386
+ }
387
+ hasZoneFailureDomain := len (nodes .Items ) >= 1
388
+
389
+ // defaults topology spread constraints to both zone, node and workers.
390
+ // on SNO environments, these constraints will always work, since the
391
+ // skew will always be 0.
392
+ // some bare metal cluster nodes will not include the zone labels, in
393
+ // which case we just omit the related constraint.
394
+ // we constraint scheduling to workers because we want to reduce the
395
+ // scope of scheduling to workers only. we need this constraint
396
+ // because tainted nodes (such as control plane nodes) are not excluded
397
+ // from skew calculations, so if we don't limit scheduling to workers
398
+ // the maxSkew won't allow more than one pod to be scheduled per node.
399
+ // see https://k8s.io/docs/concepts/workloads/pods/pod-topology-spread-constraints
400
+ // and https://github.com/kubernetes/kubernetes/issues/80921 for details.
401
+ topologySpreadConstraints := []corev1.TopologySpreadConstraint {
402
+ {
403
+ MaxSkew : 1 ,
404
+ TopologyKey : "kubernetes.io/hostname" ,
405
+ WhenUnsatisfiable : corev1 .DoNotSchedule ,
406
+ LabelSelector : & metav1.LabelSelector {
407
+ MatchLabels : defaults .DeploymentLabels ,
408
+ },
409
+ },
410
+ {
411
+ MaxSkew : 1 ,
412
+ TopologyKey : "node-role.kubernetes.io/worker" ,
413
+ WhenUnsatisfiable : corev1 .DoNotSchedule ,
414
+ LabelSelector : & metav1.LabelSelector {
415
+ MatchLabels : defaults .DeploymentLabels ,
416
+ },
417
+ },
418
+ }
419
+ if hasZoneFailureDomain {
420
+ zoneConstraint := corev1.TopologySpreadConstraint {
421
+ MaxSkew : 1 ,
422
+ TopologyKey : "topology.kubernetes.io/zone" ,
423
+ WhenUnsatisfiable : corev1 .DoNotSchedule ,
424
+ LabelSelector : & metav1.LabelSelector {
425
+ MatchLabels : defaults .DeploymentLabels ,
426
+ },
427
+ }
428
+ topologySpreadConstraints = append (topologySpreadConstraints , zoneConstraint )
429
+ }
430
+
431
+ // topology spread constraints might conflict with node selectors, so we
432
+ // do not set defaults when they're specified.
433
+ if cr .Spec .NodeSelector != nil {
434
+ topologySpreadConstraints = nil
435
+ }
436
+
437
+ if cr .Spec .TopologySpreadConstraints != nil {
438
+ topologySpreadConstraints = cr .Spec .TopologySpreadConstraints
439
+ }
440
+
383
441
// if user has provided an affinity through config spec we use it here, if not
384
442
// then we fallback to a preferred affinity configuration. we only require a
385
443
// certain affinity during schedule if the number of replicas is defined to two.
386
444
affinity := cr .Spec .Affinity
387
- if affinity == nil {
445
+ if affinity == nil && cr . Spec . Replicas == 2 {
388
446
affinity = & corev1.Affinity {
389
447
PodAntiAffinity : & corev1.PodAntiAffinity {
390
- PreferredDuringSchedulingIgnoredDuringExecution : []corev1.WeightedPodAffinityTerm {
448
+ RequiredDuringSchedulingIgnoredDuringExecution : []corev1.PodAffinityTerm {
391
449
{
392
- Weight : 100 ,
393
- PodAffinityTerm : corev1.PodAffinityTerm {
394
- TopologyKey : "kubernetes.io/hostname" ,
395
- Namespaces : []string {
396
- defaults .ImageRegistryOperatorNamespace ,
397
- },
398
- LabelSelector : & metav1.LabelSelector {
399
- MatchLabels : defaults .DeploymentLabels ,
400
- },
450
+ TopologyKey : "kubernetes.io/hostname" ,
451
+ Namespaces : []string {
452
+ defaults .ImageRegistryOperatorNamespace ,
401
453
},
402
- },
403
- },
404
- },
405
- }
406
- if cr .Spec .Replicas == 2 {
407
- affinity = & corev1.Affinity {
408
- PodAntiAffinity : & corev1.PodAntiAffinity {
409
- RequiredDuringSchedulingIgnoredDuringExecution : []corev1.PodAffinityTerm {
410
- {
411
- TopologyKey : "kubernetes.io/hostname" ,
412
- Namespaces : []string {
413
- defaults .ImageRegistryOperatorNamespace ,
414
- },
415
- LabelSelector : & metav1.LabelSelector {
416
- MatchLabels : defaults .DeploymentLabels ,
417
- },
454
+ LabelSelector : & metav1.LabelSelector {
455
+ MatchLabels : defaults .DeploymentLabels ,
418
456
},
419
457
},
420
458
},
421
- }
459
+ },
422
460
}
423
461
}
424
462
@@ -478,6 +516,7 @@ func makePodTemplateSpec(coreClient coreset.CoreV1Interface, proxyLister configl
478
516
ServiceAccountName : defaults .ServiceAccountName ,
479
517
SecurityContext : securityContext ,
480
518
Affinity : affinity ,
519
+ TopologySpreadConstraints : topologySpreadConstraints ,
481
520
TerminationGracePeriodSeconds : & gracePeriod ,
482
521
},
483
522
}
0 commit comments