@@ -52,6 +52,8 @@ import (
5252
5353const (
5454 gatherContainerName = "gather"
55+ unreachableTaintKey = "node.kubernetes.io/unreachable"
56+ notReadyTaintKey = "node.kubernetes.io/not-ready"
5557)
5658
5759var (
@@ -99,6 +101,44 @@ sleep 5
99101done`
100102)
101103
104+ var (
105+ tolerationNotReady = corev1.Toleration {
106+ Key : "node.kubernetes.io/not-ready" ,
107+ Operator : corev1 .TolerationOpExists ,
108+ Effect : corev1 .TaintEffectNoSchedule ,
109+ }
110+
111+ tolerationMasterNoSchedule = corev1.Toleration {
112+ Key : "node-role.kubernetes.io/master" ,
113+ Operator : corev1 .TolerationOpExists ,
114+ Effect : corev1 .TaintEffectNoSchedule ,
115+ }
116+ )
117+
118+ // buildNodeSelector builds a node selector from the provided node names.
119+ func buildNodeAffinity (nodeNames []string ) * corev1.Affinity {
120+ if len (nodeNames ) == 0 {
121+ return nil
122+ }
123+ return & corev1.Affinity {
124+ NodeAffinity : & corev1.NodeAffinity {
125+ RequiredDuringSchedulingIgnoredDuringExecution : & corev1.NodeSelector {
126+ NodeSelectorTerms : []corev1.NodeSelectorTerm {
127+ {
128+ MatchExpressions : []corev1.NodeSelectorRequirement {
129+ {
130+ Key : "kubernetes.io/hostname" ,
131+ Operator : corev1 .NodeSelectorOpIn ,
132+ Values : nodeNames ,
133+ },
134+ },
135+ },
136+ },
137+ },
138+ },
139+ }
140+ }
141+
102142const (
103143 // number of concurrent must-gather Pods to run if --all-images or multiple --image are provided
104144 concurrentMG = 4
@@ -404,6 +444,42 @@ func (o *MustGatherOptions) Validate() error {
404444 return nil
405445}
406446
447+ // prioritizeHealthyNodes returns a preferred node to run the must-gather pod on, and a fallback node if no preferred node is found.
448+ func prioritizeHealthyNodes (nodes * corev1.NodeList ) (preferred * corev1.Node , fallback * corev1.Node ) {
449+ var fallbackNode * corev1.Node
450+ var latestHeartbeat time.Time
451+
452+ for _ , node := range nodes .Items {
453+ var hasUnhealthyTaint bool
454+ for _ , taint := range node .Spec .Taints {
455+ if taint .Key == unreachableTaintKey || taint .Key == notReadyTaintKey {
456+ hasUnhealthyTaint = true
457+ break
458+ }
459+ }
460+
461+ // Look at heartbeat time for readiness hint
462+ for _ , cond := range node .Status .Conditions {
463+ if cond .Type == corev1 .NodeReady && cond .Status == corev1 .ConditionTrue {
464+ // Check if heartbeat is recent (less than 2m old)
465+ if time .Since (cond .LastHeartbeatTime .Time ) < 2 * time .Minute {
466+ if ! hasUnhealthyTaint {
467+ return & node , nil // return immediately on good candidate
468+ }
469+ }
470+ break
471+ }
472+ }
473+
474+ if fallbackNode == nil || node .CreationTimestamp .Time .After (latestHeartbeat ) {
475+ fallbackNode = & node
476+ latestHeartbeat = node .CreationTimestamp .Time
477+ }
478+ }
479+
480+ return nil , fallbackNode
481+ }
482+
407483// Run creates and runs a must-gather pod
408484func (o * MustGatherOptions ) Run () error {
409485 var errs []error
@@ -460,15 +536,52 @@ func (o *MustGatherOptions) Run() error {
460536 defer cleanupNamespace ()
461537 }
462538
463- // Prefer to run in master if there's any but don't be explicit otherwise.
464- // This enables the command to run by default in hypershift where there's no masters.
465- nodes , err := o .Client .CoreV1 ().Nodes ().List (context .TODO (), metav1.ListOptions {
466- LabelSelector : o .NodeSelector ,
467- })
539+ nodes , err := o .Client .CoreV1 ().Nodes ().List (context .TODO (), metav1.ListOptions {})
468540 if err != nil {
469541 return err
470542 }
471- var hasMaster bool
543+
544+ var cpNodes , workerNodes , unschedulableNodes []corev1.Node
545+ for _ , node := range nodes .Items {
546+ isReady := false
547+ for _ , cond := range node .Status .Conditions {
548+ if cond .Type == corev1 .NodeReady && cond .Status == corev1 .ConditionTrue {
549+ isReady = true
550+ break
551+ }
552+ }
553+ if ! isReady {
554+ continue
555+ }
556+ if node .Spec .Unschedulable {
557+ unschedulableNodes = append (unschedulableNodes , node )
558+ continue
559+ }
560+ if _ , ok := node .Labels ["node-role.kubernetes.io/control-plane" ]; ok {
561+ cpNodes = append (cpNodes , node )
562+ } else {
563+ workerNodes = append (workerNodes , node )
564+ }
565+ }
566+
567+ selectedNodes := cpNodes
568+ if len (selectedNodes ) == 0 {
569+ selectedNodes = workerNodes
570+ }
571+ if len (selectedNodes ) == 0 {
572+ selectedNodes = unschedulableNodes
573+ }
574+
575+ var nodeNames []string
576+ for _ , n := range selectedNodes {
577+ nodeNames = append (nodeNames , n .Name )
578+ }
579+
580+ affinity := buildNodeAffinity (nodeNames )
581+ // If we have no nodes, we cannot run the must-gather pod.
582+
583+ // Determine if there is a master node
584+ hasMaster := false
472585 for _ , node := range nodes .Items {
473586 if _ , ok := node .Labels ["node-role.kubernetes.io/master" ]; ok {
474587 hasMaster = true
@@ -497,7 +610,7 @@ func (o *MustGatherOptions) Run() error {
497610 return err
498611 }
499612 for _ , node := range nodes .Items {
500- pods = append (pods , o .newPod (node .Name , image , hasMaster ))
613+ pods = append (pods , o .newPod (node .Name , image , hasMaster , affinity ))
501614 }
502615 } else {
503616 if o .NodeName != "" {
@@ -507,7 +620,7 @@ func (o *MustGatherOptions) Run() error {
507620 return err
508621 }
509622 }
510- pods = append (pods , o .newPod (o .NodeName , image , hasMaster ))
623+ pods = append (pods , o .newPod (o .NodeName , image , hasMaster , affinity ))
511624 }
512625 }
513626
@@ -925,7 +1038,8 @@ func newClusterRoleBinding(ns *corev1.Namespace) *rbacv1.ClusterRoleBinding {
9251038// newPod creates a pod with 2 containers with a shared volume mount:
9261039// - gather: init containers that run gather command
9271040// - copy: no-op container we can exec into
928- func (o * MustGatherOptions ) newPod (node , image string , hasMaster bool ) * corev1.Pod {
1041+ func (o * MustGatherOptions ) newPod (node , image string , hasMaster bool , affinity * corev1.Affinity ) * corev1.Pod {
1042+
9291043 zero := int64 (0 )
9301044
9311045 nodeSelector := map [string ]string {
@@ -943,6 +1057,31 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool) *corev1.P
9431057 cleanedSourceDir := path .Clean (o .SourceDir )
9441058 volumeUsageChecker := fmt .Sprintf (volumeUsageCheckerScript , cleanedSourceDir , cleanedSourceDir , o .VolumePercentage , o .VolumePercentage , executedCommand )
9451059
1060+ // Define taints we do not want to tolerate (can be extended with user input in the future)
1061+ excludedTaints := []corev1.Taint {
1062+ {Key : unreachableTaintKey , Effect : corev1 .TaintEffectNoExecute },
1063+ {Key : notReadyTaintKey , Effect : corev1 .TaintEffectNoSchedule },
1064+ }
1065+
1066+ // Define candidate tolerations we may want to apply
1067+ candidateTolerations := []corev1.Toleration {tolerationNotReady }
1068+ if node == "" && hasMaster {
1069+ candidateTolerations = append (candidateTolerations , tolerationMasterNoSchedule )
1070+ }
1071+
1072+ // Build the toleration list by only adding tolerations that do NOT tolerate excluded taints
1073+ filteredTolerations := make ([]corev1.Toleration , 0 , len (candidateTolerations ))
1074+ TolerationLoop:
1075+ for _ , tol := range candidateTolerations {
1076+ for _ , excluded := range excludedTaints {
1077+ if tol .ToleratesTaint (& excluded ) {
1078+ // Skip this toleration if it tolerates an excluded taint
1079+ continue TolerationLoop
1080+ }
1081+ }
1082+ filteredTolerations = append (filteredTolerations , tol )
1083+ }
1084+
9461085 ret := & corev1.Pod {
9471086 ObjectMeta : metav1.ObjectMeta {
9481087 GenerateName : "must-gather-" ,
@@ -1025,6 +1164,7 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool) *corev1.P
10251164 Operator : "Exists" ,
10261165 },
10271166 },
1167+ Affinity : affinity ,
10281168 },
10291169 }
10301170 if o .HostNetwork {
0 commit comments