Skip to content

Commit 009ba86

Browse files
committed
Capture process information in support export archive
This update adds the capture of running processes for each Pod of the selected PostgresCluster. The 'ps' command is run on any possible container in the cluster, but only successful results are stored in the archive. Issue: [sc-18017]
1 parent 5ab23e1 commit 009ba86

File tree

5 files changed

+132
-2
lines changed

5 files changed

+132
-2
lines changed

internal/cmd/exec.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,13 @@ func (exec Executor) patronictl(cmd string) (string, string, error) {
6969

7070
return stdout.String(), stderr.String(), err
7171
}
72+
73+
// processes returns the output of a ps command
74+
func (exec Executor) processes() (string, string, error) {
75+
var stdout, stderr bytes.Buffer
76+
77+
command := "ps aux --width 500"
78+
err := exec(nil, &stdout, &stderr, "bash", "-ceu", "--", command)
79+
80+
return stdout.String(), stderr.String(), err
81+
}

internal/cmd/exec_test.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,3 +126,22 @@ func TestPatronictl(t *testing.T) {
126126
})
127127

128128
}
129+
130+
func TestProcesses(t *testing.T) {
131+
132+
t.Run("default", func(t *testing.T) {
133+
expected := errors.New("pass-through")
134+
exec := func(
135+
stdin io.Reader, stdout, stderr io.Writer, command ...string,
136+
) error {
137+
assert.DeepEqual(t, command, []string{"bash", "-ceu", "--", "ps aux --width 500"})
138+
assert.Assert(t, stdout != nil, "should capture stdout")
139+
assert.Assert(t, stderr != nil, "should capture stderr")
140+
return expected
141+
}
142+
_, _, err := Executor(exec).processes()
143+
assert.ErrorContains(t, err, "pass-through")
144+
145+
})
146+
147+
}

internal/cmd/export.go

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,11 @@ kubectl pgo support export daisy --monitoring-namespace another-namespace --outp
360360
err = gatherPatroniInfo(ctx, clientset, restConfig, namespace, clusterName, tw, cmd)
361361
}
362362

363+
// Exec to get Container processes
364+
if err == nil {
365+
err = gatherProcessInfo(ctx, clientset, restConfig, namespace, clusterName, tw, cmd)
366+
}
367+
363368
// Print cli output
364369
path := clusterName + "/logs/cli"
365370
if logErr := writeTar(tw, cliOutput.Bytes(), path, cmd); logErr != nil {
@@ -805,7 +810,6 @@ func gatherPodLogs(ctx context.Context,
805810
tw *tar.Writer,
806811
cmd *cobra.Command,
807812
) error {
808-
// TODO: update to use specific client after SSA change
809813
// Get all Pods that match the given Label
810814
pods, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
811815
LabelSelector: labelSelector,
@@ -863,7 +867,6 @@ func gatherPatroniInfo(ctx context.Context,
863867
tw *tar.Writer,
864868
cmd *cobra.Command,
865869
) error {
866-
// TODO: update to use specific client after SSA change
867870
// Get the primary instance Pod by its labels
868871
pods, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
869872
LabelSelector: util.PrimaryInstanceLabels(clusterName),
@@ -931,6 +934,79 @@ func gatherPatroniInfo(ctx context.Context,
931934
return nil
932935
}
933936

937+
// gatherProcessInfo takes a client and buffer execs into relevant pods to grab
938+
// running process information for each Pod.
939+
func gatherProcessInfo(ctx context.Context,
940+
clientset *kubernetes.Clientset,
941+
config *rest.Config,
942+
namespace string,
943+
clusterName string,
944+
tw *tar.Writer,
945+
cmd *cobra.Command,
946+
) error {
947+
// Get the cluster Pods by label
948+
pods, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
949+
LabelSelector: "postgres-operator.crunchydata.com/cluster=" + clusterName,
950+
})
951+
if err != nil {
952+
if apierrors.IsForbidden(err) {
953+
cmd.Println(err.Error())
954+
return nil
955+
}
956+
return err
957+
}
958+
959+
if len(pods.Items) == 0 {
960+
// If we didn't find any resources, skip
961+
cmd.Println("PostgresCluster Pods not found when gathering process information, skipping")
962+
return nil
963+
}
964+
965+
podExec, err := util.NewPodExecutor(config)
966+
if err != nil {
967+
return err
968+
}
969+
970+
for _, pod := range pods.Items {
971+
for _, container := range pod.Spec.Containers {
972+
// Attempt to exec in and run 'ps' command in all available containers,
973+
// regardless of state, etc. Many of the resulting process lists will
974+
// be nearly identical because certain Pods use a shared process
975+
// namespace, but this function aims to gather as much detail as possible.
976+
// - https://kubernetes.io/docs/tasks/configure-pod-container/share-process-namespace/
977+
exec := func(stdin io.Reader, stdout, stderr io.Writer, command ...string,
978+
) error {
979+
return podExec(namespace, pod.GetName(), container.Name,
980+
stdin, stdout, stderr, command...)
981+
}
982+
983+
stdout, stderr, err := Executor(exec).processes()
984+
if err != nil {
985+
// If we get an RBAC error, let the user know. Otherwise, just
986+
// try the next container.
987+
if apierrors.IsForbidden(err) {
988+
cmd.Printf("Failed to get processes for Container \"%s\" in Pod \"%s\". Error: \"%s\"\n",
989+
container.Name, pod.GetName(), err.Error())
990+
}
991+
continue
992+
}
993+
994+
var buf bytes.Buffer
995+
buf.Write([]byte(stdout))
996+
if stderr != "" {
997+
buf.Write([]byte(stderr))
998+
}
999+
1000+
path := clusterName + "/" + "processes" + "/" + pod.GetName() + "/" + container.Name
1001+
if err := writeTar(tw, buf.Bytes(), path, cmd); err != nil {
1002+
return err
1003+
}
1004+
}
1005+
}
1006+
1007+
return nil
1008+
}
1009+
9341010
// translateTimestampSince returns the elapsed time since timestamp in
9351011
// human-readable approximation.
9361012
func translateTimestampSince(timestamp metav1.Time) string {

testing/kuttl/e2e/support-export/01--support_export.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,28 @@ commands:
6969
exit 1
7070
fi
7171
72+
PROCESSES_DIR="./kuttl-support-cluster/processes/"
73+
74+
# Check for the files that contain an expected pgBackRest server process.
75+
# Expected to be found in the Postgres instance Pod's 'database',
76+
# 'replication-cert-copy', 'pgbackrest', and 'pgbackrest-config' containers
77+
# and the pgBackRest repo Pod's 'pgbackrest' and 'pgbackrest-config'
78+
# containers, i.e. 6 files total.
79+
found=$(grep -lR "pgbackrest server" ${PROCESSES_DIR} | wc -l)
80+
if [ "${found}" -ne 6 ]; then
81+
echo "Expected to find 6 pgBackRest processes, got ${found}"
82+
eval "$CLEANUP"
83+
exit 1
84+
fi
85+
86+
# Check for the files that contain an expected Postgres process. Expected
87+
# to be found in the Postgres instance Pod's 'database', 'replication-cert-copy',
88+
# 'pgbackrest', and 'pgbackrest-config' containers, i.e. 4 files total.
89+
found=$(grep -lR "postgres -D /pgdata/pg" ${PROCESSES_DIR} | wc -l)
90+
if [ "${found}" -ne 4 ]; then
91+
echo "Expected to find 4 Postgres processes, got ${found}"
92+
eval "$CLEANUP"
93+
exit 1
94+
fi
95+
7296
- script: rm -r ./kuttl-support-cluster ./crunchy_k8s_support_export_*.tar.gz

testing/kuttl/e2e/support-export/README

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ in this test. Over time, we should build on these tests to improve coverage.
2222
- compression level
2323
- Node "list" and YAML files
2424
- Event file
25+
- Pod Processes
2526
The support export archive is deleted.
2627

2728
#### Invalid Cluster

0 commit comments

Comments
 (0)