Skip to content

Commit 0c09391

Browse files
Merge pull request #2135 from ingvagabund/must-gather-no-affinity-if-nodename-set
OCPBUGS-64743: fix(must-gather): do not set node affinity if nodename is set
2 parents 4d35590 + 49b0706 commit 0c09391

File tree

2 files changed

+207
-28
lines changed

2 files changed

+207
-28
lines changed

pkg/cli/admin/mustgather/mustgather.go

Lines changed: 57 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ const (
5757
unreachableTaintKey = "node.kubernetes.io/unreachable"
5858
notReadyTaintKey = "node.kubernetes.io/not-ready"
5959
controlPlaneNodeRoleLabel = "node-role.kubernetes.io/control-plane"
60+
defaultMustGatherCommand = "/usr/bin/gather"
61+
defaultVolumePercentage = 70
62+
defaultSourceDir = "/must-gather/"
6063
)
6164

6265
var (
@@ -172,10 +175,10 @@ func NewMustGatherCommand(f kcmdutil.Factory, streams genericiooptions.IOStreams
172175

173176
func NewMustGatherOptions(streams genericiooptions.IOStreams) *MustGatherOptions {
174177
opts := &MustGatherOptions{
175-
SourceDir: "/must-gather/",
178+
SourceDir: defaultSourceDir,
176179
IOStreams: streams,
177180
Timeout: 10 * time.Minute,
178-
VolumePercentage: 70,
181+
VolumePercentage: defaultVolumePercentage,
179182
}
180183
opts.LogOut = opts.newPrefixWriter(streams.Out, "[must-gather ] OUT", false, true)
181184
opts.RawOut = opts.newPrefixWriter(streams.Out, "", false, false)
@@ -636,7 +639,7 @@ func (o *MustGatherOptions) Run() error {
636639
}
637640
var hasMaster bool
638641
for _, node := range nodes.Items {
639-
if _, ok := node.Labels["node-role.kubernetes.io/control-plane"]; ok {
642+
if _, ok := node.Labels[controlPlaneNodeRoleLabel]; ok {
640643
hasMaster = true
641644
break
642645
}
@@ -1099,38 +1102,21 @@ func newClusterRoleBinding(ns *corev1.Namespace) *rbacv1.ClusterRoleBinding {
10991102
}
11001103
}
11011104

1102-
// newPod creates a pod with 2 containers with a shared volume mount:
1103-
// - gather: init containers that run gather command
1104-
// - copy: no-op container we can exec into
1105-
func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity *corev1.Affinity) *corev1.Pod {
1106-
1105+
func defaultMustGatherPod(image string) *corev1.Pod {
11071106
zero := int64(0)
11081107

1109-
nodeSelector := map[string]string{
1110-
corev1.LabelOSStable: "linux",
1111-
}
1112-
if node == "" && hasMaster {
1113-
nodeSelector["node-role.kubernetes.io/control-plane"] = ""
1114-
}
1108+
cleanedSourceDir := path.Clean(defaultSourceDir)
1109+
volumeUsageChecker := fmt.Sprintf(volumeUsageCheckerScript, cleanedSourceDir, defaultVolumePercentage)
1110+
podCmd := buildPodCommand(volumeUsageChecker, defaultMustGatherCommand)
11151111

1116-
executedCommand := "/usr/bin/gather"
1117-
if len(o.Command) > 0 {
1118-
executedCommand = strings.Join(o.Command, " ")
1119-
}
1120-
1121-
cleanedSourceDir := path.Clean(o.SourceDir)
1122-
volumeUsageChecker := fmt.Sprintf(volumeUsageCheckerScript, cleanedSourceDir, o.VolumePercentage)
1123-
podCmd := buildPodCommand(volumeUsageChecker, executedCommand)
1124-
1125-
ret := &corev1.Pod{
1112+
return &corev1.Pod{
11261113
ObjectMeta: metav1.ObjectMeta{
11271114
GenerateName: "must-gather-",
11281115
Labels: map[string]string{
11291116
"app": "must-gather",
11301117
},
11311118
},
11321119
Spec: corev1.PodSpec{
1133-
NodeName: node,
11341120
// This pod is ok to be OOMKilled but not preempted. Following the conventions mentioned at:
11351121
// https://github.com/openshift/enhancements/blob/master/CONVENTIONS.md#priority-classes
11361122
// so setting priority class to system-cluster-critical
@@ -1192,8 +1178,9 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity
11921178
},
11931179
},
11941180
},
1195-
HostNetwork: o.HostNetwork,
1196-
NodeSelector: nodeSelector,
1181+
NodeSelector: map[string]string{
1182+
corev1.LabelOSStable: "linux",
1183+
},
11971184
TerminationGracePeriodSeconds: &zero,
11981185
Tolerations: []corev1.Toleration{
11991186
{
@@ -1203,9 +1190,51 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity
12031190
Operator: "Exists",
12041191
},
12051192
},
1206-
Affinity: affinity,
12071193
},
12081194
}
1195+
}
1196+
1197+
// newPod creates a pod with 2 containers with a shared volume mount:
1198+
// - gather: init containers that run gather command
1199+
// - copy: no-op container we can exec into
1200+
func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity *corev1.Affinity) *corev1.Pod {
1201+
executedCommand := defaultMustGatherCommand
1202+
if len(o.Command) > 0 {
1203+
executedCommand = strings.Join(o.Command, " ")
1204+
}
1205+
1206+
cleanedSourceDir := path.Clean(o.SourceDir)
1207+
volumeUsageChecker := fmt.Sprintf(volumeUsageCheckerScript, cleanedSourceDir, o.VolumePercentage)
1208+
podCmd := buildPodCommand(volumeUsageChecker, executedCommand)
1209+
1210+
ret := defaultMustGatherPod(image)
1211+
ret.Spec.Containers[0].Command = []string{"/bin/bash", "-c", podCmd}
1212+
ret.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{{
1213+
Name: "must-gather-output",
1214+
MountPath: cleanedSourceDir,
1215+
ReadOnly: false,
1216+
}}
1217+
ret.Spec.Containers[1].VolumeMounts = []corev1.VolumeMount{{
1218+
Name: "must-gather-output",
1219+
MountPath: cleanedSourceDir,
1220+
ReadOnly: false,
1221+
}}
1222+
ret.Spec.HostNetwork = o.HostNetwork
1223+
if node == "" && hasMaster {
1224+
ret.Spec.NodeSelector[controlPlaneNodeRoleLabel] = ""
1225+
}
1226+
1227+
if node != "" {
1228+
ret.Spec.NodeName = node
1229+
} else {
1230+
// Set affinity towards the most suitable nodes. E.g. to exclude
1231+
// nodes that if unreachable could cause the must-gather pod to stay
1232+
// in Pending state indefinitely. Please check getCandidateNodeNames
1233+
// function for more details about how the selection of the most
1234+
// suitable nodes is performed.
1235+
ret.Spec.Affinity = affinity
1236+
}
1237+
12091238
if o.HostNetwork {
12101239
// If a user specified hostNetwork he might have intended to perform
12111240
// packet captures on the host, for that we need to set the correct

pkg/cli/admin/mustgather/mustgather_test.go

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package mustgather
33
import (
44
"context"
55
"fmt"
6+
"path"
67
"reflect"
78
"testing"
89
"time"
@@ -478,3 +479,152 @@ sync && echo 'Caches written to disk'`,
478479
})
479480
}
480481
}
482+
483+
func TestNewPod(t *testing.T) {
484+
generateMustGatherPod := func(image string, update func(pod *corev1.Pod)) *corev1.Pod {
485+
pod := defaultMustGatherPod(image)
486+
update(pod)
487+
return pod
488+
}
489+
490+
tests := []struct {
491+
name string
492+
options *MustGatherOptions
493+
hasMasters bool
494+
node string
495+
affinity *corev1.Affinity
496+
expectedPod *corev1.Pod
497+
}{
498+
{
499+
name: "node set, affinity provided, affinity not set",
500+
options: &MustGatherOptions{
501+
VolumePercentage: defaultVolumePercentage,
502+
SourceDir: defaultSourceDir,
503+
},
504+
node: "node1",
505+
affinity: buildNodeAffinity([]string{"node2"}),
506+
expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) {
507+
pod.Spec.NodeName = "node1"
508+
}),
509+
},
510+
{
511+
name: "node not set, affinity provided, affinity set",
512+
options: &MustGatherOptions{
513+
VolumePercentage: defaultVolumePercentage,
514+
SourceDir: defaultSourceDir,
515+
},
516+
affinity: buildNodeAffinity([]string{"node2"}),
517+
expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) {
518+
pod.Spec.Affinity = buildNodeAffinity([]string{"node2"})
519+
}),
520+
},
521+
{
522+
name: "custom source dir",
523+
options: &MustGatherOptions{
524+
VolumePercentage: defaultVolumePercentage,
525+
SourceDir: "custom-source-dir",
526+
},
527+
expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) {
528+
volumeUsageChecker := fmt.Sprintf(volumeUsageCheckerScript, "custom-source-dir", defaultVolumePercentage)
529+
podCmd := buildPodCommand(volumeUsageChecker, defaultMustGatherCommand)
530+
pod.Spec.Containers[0].Command = []string{"/bin/bash", "-c", podCmd}
531+
pod.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{{
532+
Name: "must-gather-output",
533+
MountPath: "custom-source-dir",
534+
}}
535+
pod.Spec.Containers[1].VolumeMounts = []corev1.VolumeMount{{
536+
Name: "must-gather-output",
537+
MountPath: "custom-source-dir",
538+
}}
539+
}),
540+
},
541+
{
542+
name: "host network",
543+
options: &MustGatherOptions{
544+
VolumePercentage: defaultVolumePercentage,
545+
SourceDir: defaultSourceDir,
546+
HostNetwork: true,
547+
},
548+
expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) {
549+
pod.Spec.HostNetwork = true
550+
pod.Spec.Containers[0].SecurityContext = &corev1.SecurityContext{
551+
Capabilities: &corev1.Capabilities{
552+
Add: []corev1.Capability{
553+
corev1.Capability("CAP_NET_RAW"),
554+
},
555+
},
556+
}
557+
}),
558+
},
559+
{
560+
name: "with control plane nodes",
561+
options: &MustGatherOptions{
562+
VolumePercentage: defaultVolumePercentage,
563+
SourceDir: defaultSourceDir,
564+
},
565+
hasMasters: true,
566+
expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) {
567+
pod.Spec.NodeSelector[controlPlaneNodeRoleLabel] = ""
568+
}),
569+
},
570+
{
571+
name: "with custom command",
572+
options: &MustGatherOptions{
573+
VolumePercentage: defaultVolumePercentage,
574+
SourceDir: defaultSourceDir,
575+
Command: []string{"custom_command", "with_params"},
576+
},
577+
expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) {
578+
cleanSourceDir := path.Clean(defaultSourceDir)
579+
volumeUsageChecker := fmt.Sprintf(volumeUsageCheckerScript, cleanSourceDir, defaultVolumePercentage)
580+
podCmd := buildPodCommand(volumeUsageChecker, "custom_command with_params")
581+
pod.Spec.Containers[0].Command = []string{"/bin/bash", "-c", podCmd}
582+
pod.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{{
583+
Name: "must-gather-output",
584+
MountPath: cleanSourceDir,
585+
}}
586+
pod.Spec.Containers[1].VolumeMounts = []corev1.VolumeMount{{
587+
Name: "must-gather-output",
588+
MountPath: cleanSourceDir,
589+
}}
590+
}),
591+
},
592+
{
593+
name: "with since",
594+
options: &MustGatherOptions{
595+
VolumePercentage: defaultVolumePercentage,
596+
SourceDir: defaultSourceDir,
597+
Since: 2 * time.Minute,
598+
},
599+
expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) {
600+
pod.Spec.Containers[0].Env = append(pod.Spec.Containers[0].Env, corev1.EnvVar{
601+
Name: "MUST_GATHER_SINCE",
602+
Value: "2m0s",
603+
})
604+
}),
605+
},
606+
{
607+
name: "with sincetime",
608+
options: &MustGatherOptions{
609+
VolumePercentage: defaultVolumePercentage,
610+
SourceDir: defaultSourceDir,
611+
SinceTime: "2023-09-24T15:30:00Z",
612+
},
613+
expectedPod: generateMustGatherPod("image", func(pod *corev1.Pod) {
614+
pod.Spec.Containers[0].Env = append(pod.Spec.Containers[0].Env, corev1.EnvVar{
615+
Name: "MUST_GATHER_SINCE_TIME",
616+
Value: "2023-09-24T15:30:00Z",
617+
})
618+
}),
619+
},
620+
}
621+
622+
for _, test := range tests {
623+
t.Run(test.name, func(t *testing.T) {
624+
tPod := test.options.newPod(test.node, "image", test.hasMasters, test.affinity)
625+
if !cmp.Equal(test.expectedPod, tPod) {
626+
t.Errorf("Unexpected pod command was generated: \n%s\n", cmp.Diff(test.expectedPod, tPod))
627+
}
628+
})
629+
}
630+
}

0 commit comments

Comments
 (0)