From 346f719decb1d45de60141fb489b09bb10e55b49 Mon Sep 17 00:00:00 2001 From: Saylor Berman Date: Tue, 15 Jul 2025 14:47:50 -0600 Subject: [PATCH] Graceful recovery test improvements Updated the node debugger Job so it would no longer error when exiting. Also added some context to a few error messages, and allowed for test retries. Due to the nature of this test restarting the node (which is a kind container), I have a feeling that we get intermittent connection issues as things start back up, due to node issues and not necessarily NGF/nginx issues. --- tests/framework/request.go | 1 - tests/suite/graceful_recovery_test.go | 15 ++++++--------- .../graceful-recovery/node-debugger-job.yaml | 2 +- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/tests/framework/request.go b/tests/framework/request.go index 494cbfcf76..7cc1b0384c 100644 --- a/tests/framework/request.go +++ b/tests/framework/request.go @@ -25,7 +25,6 @@ func Get( if err != nil { return 0, "", err } - defer resp.Body.Close() body := new(bytes.Buffer) diff --git a/tests/suite/graceful_recovery_test.go b/tests/suite/graceful_recovery_test.go index 60b2bb6b45..71c37d9e68 100644 --- a/tests/suite/graceful_recovery_test.go +++ b/tests/suite/graceful_recovery_test.go @@ -30,7 +30,7 @@ const ( // Since this test involves restarting of the test node, it is recommended to be run separate from other tests // such that any issues in this test do not interfere with other tests. -var _ = Describe("Graceful Recovery test", Ordered, Label("graceful-recovery"), func() { +var _ = Describe("Graceful Recovery test", Ordered, FlakeAttempts(2), Label("graceful-recovery"), func() { var ( files = []string{ "graceful-recovery/cafe.yaml", @@ -120,13 +120,13 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("graceful-recovery"), return names, nil } - runNodeDebuggerJob := func(nginxPodName, jobScript string) (*v1.Job, error) { + runNodeDebuggerJob := func(nginxPodName string) (*v1.Job, error) { ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout) defer cancel() var nginxPod core.Pod if err := k8sClient.Get(ctx, types.NamespacedName{Namespace: ns.Name, Name: nginxPodName}, &nginxPod); err != nil { - return nil, fmt.Errorf("error retrieving NGF Pod: %w", err) + return nil, fmt.Errorf("error retrieving nginx Pod: %w", err) } b, err := resourceManager.GetFileContents("graceful-recovery/node-debugger-job.yaml") @@ -146,7 +146,6 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("graceful-recovery"), len(job.Spec.Template.Spec.Containers), ) } - job.Spec.Template.Spec.Containers[0].Args = []string{jobScript} job.Namespace = ns.Name if err = resourceManager.Apply([]client.Object{job}); err != nil { @@ -157,13 +156,11 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("graceful-recovery"), } restartNginxContainer := func(nginxPodName, namespace, containerName string) { - jobScript := "PID=$(pgrep -f \"nginx-agent\") && kill -9 $PID" - restartCount, err := getContainerRestartCount(nginxPodName, namespace, containerName) Expect(err).ToNot(HaveOccurred()) cleanUpPortForward() - job, err := runNodeDebuggerJob(nginxPodName, jobScript) + job, err := runNodeDebuggerJob(nginxPodName) Expect(err).ToNot(HaveOccurred()) Eventually( @@ -524,11 +521,11 @@ func expectRequestToSucceed(appURL, address string, responseBodyMessage string) status, body, err := framework.Get(appURL, address, timeoutConfig.RequestTimeout, nil, nil) if status != http.StatusOK { - return errors.New("http status was not 200") + return fmt.Errorf("http status was not 200, got %d: %w", status, err) } if !strings.Contains(body, responseBodyMessage) { - return errors.New("expected response body to contain correct body message") + return fmt.Errorf("expected response body to contain correct body message, got: %s", body) } return err diff --git a/tests/suite/manifests/graceful-recovery/node-debugger-job.yaml b/tests/suite/manifests/graceful-recovery/node-debugger-job.yaml index 5fc7aa5e6c..9b9d03c52c 100644 --- a/tests/suite/manifests/graceful-recovery/node-debugger-job.yaml +++ b/tests/suite/manifests/graceful-recovery/node-debugger-job.yaml @@ -13,7 +13,7 @@ spec: - name: node-debugger-container image: ubuntu:24.04 command: ["/bin/bash", "-c"] - args: ["to be replaced by the test"] + args: ["pkill -x \"nginx-agent\""] securityContext: privileged: true volumeMounts: