@@ -10,6 +10,7 @@ import (
1010 "os"
1111 "path"
1212 "regexp"
13+ "sort"
1314 "strconv"
1415 "strings"
1516 "sync"
@@ -39,6 +40,7 @@ import (
3940 "k8s.io/kubectl/pkg/util/templates"
4041 admissionapi "k8s.io/pod-security-admission/api"
4142 "k8s.io/utils/exec"
43+ utilptr "k8s.io/utils/ptr"
4244
4345 configclient "github.com/openshift/client-go/config/clientset/versioned"
4446 imagev1client "github.com/openshift/client-go/image/clientset/versioned/typed/image/v1"
@@ -51,7 +53,10 @@ import (
5153)
5254
5355const (
54- gatherContainerName = "gather"
56+ gatherContainerName = "gather"
57+ unreachableTaintKey = "node.kubernetes.io/unreachable"
58+ notReadyTaintKey = "node.kubernetes.io/not-ready"
59+ controlPlaneNodeRoleLabel = "node-role.kubernetes.io/control-plane"
5560)
5661
5762var (
@@ -99,6 +104,30 @@ sleep 5
99104done`
100105)
101106
107+ // buildNodeAffinity builds a node affinity from the provided node hostnames.
108+ func buildNodeAffinity (nodeHostnames []string ) * corev1.Affinity {
109+ if len (nodeHostnames ) == 0 {
110+ return nil
111+ }
112+ return & corev1.Affinity {
113+ NodeAffinity : & corev1.NodeAffinity {
114+ RequiredDuringSchedulingIgnoredDuringExecution : & corev1.NodeSelector {
115+ NodeSelectorTerms : []corev1.NodeSelectorTerm {
116+ {
117+ MatchExpressions : []corev1.NodeSelectorRequirement {
118+ {
119+ Key : "kubernetes.io/hostname" ,
120+ Operator : corev1 .NodeSelectorOpIn ,
121+ Values : nodeHostnames ,
122+ },
123+ },
124+ },
125+ },
126+ },
127+ },
128+ }
129+ }
130+
102131const (
103132 // number of concurrent must-gather Pods to run if --all-images or multiple --image are provided
104133 concurrentMG = 4
@@ -404,6 +433,125 @@ func (o *MustGatherOptions) Validate() error {
404433 return nil
405434}
406435
436+ func getNodeLastHeartbeatTime (node corev1.Node ) * metav1.Time {
437+ for _ , cond := range node .Status .Conditions {
438+ if cond .Type == corev1 .NodeReady {
439+ if ! cond .LastHeartbeatTime .IsZero () {
440+ return utilptr.To [metav1.Time ](cond .LastHeartbeatTime )
441+ }
442+ return nil
443+ }
444+ }
445+ return nil
446+ }
447+
448+ func isNodeReadyByCondition (node corev1.Node ) bool {
449+ for _ , cond := range node .Status .Conditions {
450+ if cond .Type == corev1 .NodeReady && cond .Status == corev1 .ConditionTrue {
451+ return true
452+ }
453+ }
454+ return false
455+ }
456+
457+ func isNodeReadyAndReachableByTaint (node corev1.Node ) bool {
458+ for _ , taint := range node .Spec .Taints {
459+ if taint .Key == unreachableTaintKey || taint .Key == notReadyTaintKey {
460+ return false
461+ }
462+ }
463+ return true
464+ }
465+
466+ // getCandidateNodeNames identifies suitable nodes for constructing a list of
467+ // node names for a node affinity expression.
468+ func getCandidateNodeNames (nodes * corev1.NodeList , hasMaster bool ) []string {
469+ // Identify ready and reacheable nodes
470+ var controlPlaneNodes , allControlPlaneNodes , workerNodes , unschedulableNodes , remainingNodes , selectedNodes []corev1.Node
471+ for _ , node := range nodes .Items {
472+ if _ , ok := node .Labels [controlPlaneNodeRoleLabel ]; ok {
473+ allControlPlaneNodes = append (allControlPlaneNodes , node )
474+ }
475+ if ! isNodeReadyByCondition (node ) || ! isNodeReadyAndReachableByTaint (node ) {
476+ remainingNodes = append (remainingNodes , node )
477+ continue
478+ }
479+ if node .Spec .Unschedulable {
480+ unschedulableNodes = append (unschedulableNodes , node )
481+ continue
482+ }
483+ if _ , ok := node .Labels [controlPlaneNodeRoleLabel ]; ok {
484+ controlPlaneNodes = append (controlPlaneNodes , node )
485+ } else {
486+ workerNodes = append (workerNodes , node )
487+ }
488+ }
489+
490+ // INFO(ingvagabund): a single target node should be enough. Yet,
491+ // it might help to provide more to allow the scheduler choose a more
492+ // suitable node based on the cluster scheduling capabilities.
493+
494+ // hasMaster cause the must-gather pod to set a node selector targeting all
495+ // control plane node. So there's no point of processing other than control
496+ // plane nodes
497+ if hasMaster {
498+ if len (controlPlaneNodes ) > 0 {
499+ selectedNodes = controlPlaneNodes
500+ } else {
501+ selectedNodes = allControlPlaneNodes
502+ }
503+ } else {
504+ // For hypershift case
505+
506+ // Order of preference:
507+ // - ready and reachable control plane nodes first
508+ // - then ready and reachable worker nodes
509+ // - then ready and reachable unschedulable nodes
510+ // - then any other node
511+ selectedNodes = controlPlaneNodes // this will be very likely empty for hypershift
512+ if len (selectedNodes ) == 0 {
513+ selectedNodes = workerNodes
514+ }
515+ if len (selectedNodes ) == 0 {
516+ // unschedulable nodes might still be better candidates than the remaining
517+ // not ready and not reacheable nodes
518+ selectedNodes = unschedulableNodes
519+ }
520+ if len (selectedNodes ) == 0 {
521+ // whatever is left for the last resort
522+ selectedNodes = remainingNodes
523+ }
524+ }
525+
526+ // Sort nodes based on the cond.LastHeartbeatTime.Time to prefer nodes that
527+ // reported their status most recently
528+ sort .SliceStable (selectedNodes , func (i , j int ) bool {
529+ iTime := getNodeLastHeartbeatTime (selectedNodes [i ])
530+ jTime := getNodeLastHeartbeatTime (selectedNodes [j ])
531+ // iTime has no effect since all is sorted right
532+ if jTime == nil {
533+ return true
534+ }
535+ if iTime == nil {
536+ return false
537+ }
538+ return jTime .Before (iTime )
539+ })
540+
541+ // Limit the number of nodes to 10 at most to provide a sane list of nodes
542+ nodeNames := []string {}
543+ var nodeNamesSize = 0
544+ for _ , n := range selectedNodes {
545+ nodeNames = append (nodeNames , n .Name )
546+ nodeNamesSize ++
547+ if nodeNamesSize >= 10 {
548+ break
549+ }
550+ }
551+
552+ return nodeNames
553+ }
554+
407555// Run creates and runs a must-gather pod
408556func (o * MustGatherOptions ) Run () error {
409557 var errs []error
@@ -476,6 +624,8 @@ func (o *MustGatherOptions) Run() error {
476624 }
477625 }
478626
627+ affinity := buildNodeAffinity (getCandidateNodeNames (nodes , hasMaster ))
628+
479629 // ... and create must-gather pod(s)
480630 var pods []* corev1.Pod
481631 for _ , image := range o .Images {
@@ -497,7 +647,7 @@ func (o *MustGatherOptions) Run() error {
497647 return err
498648 }
499649 for _ , node := range nodes .Items {
500- pods = append (pods , o .newPod (node .Name , image , hasMaster ))
650+ pods = append (pods , o .newPod (node .Name , image , hasMaster , affinity ))
501651 }
502652 } else {
503653 if o .NodeName != "" {
@@ -507,7 +657,7 @@ func (o *MustGatherOptions) Run() error {
507657 return err
508658 }
509659 }
510- pods = append (pods , o .newPod (o .NodeName , image , hasMaster ))
660+ pods = append (pods , o .newPod (o .NodeName , image , hasMaster , affinity ))
511661 }
512662 }
513663
@@ -925,7 +1075,8 @@ func newClusterRoleBinding(ns *corev1.Namespace) *rbacv1.ClusterRoleBinding {
9251075// newPod creates a pod with 2 containers with a shared volume mount:
9261076// - gather: init containers that run gather command
9271077// - copy: no-op container we can exec into
928- func (o * MustGatherOptions ) newPod (node , image string , hasMaster bool ) * corev1.Pod {
1078+ func (o * MustGatherOptions ) newPod (node , image string , hasMaster bool , affinity * corev1.Affinity ) * corev1.Pod {
1079+
9291080 zero := int64 (0 )
9301081
9311082 nodeSelector := map [string ]string {
@@ -1025,6 +1176,7 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool) *corev1.P
10251176 Operator : "Exists" ,
10261177 },
10271178 },
1179+ Affinity : affinity ,
10281180 },
10291181 }
10301182 if o .HostNetwork {
0 commit comments