Skip to content

Commit 935e8a7

Browse files
Merge pull request #30219 from machine424/autht
OCPBUGS-61193: chore(extended/prometheus): make 'targets auth' test more lenient and more resilient.
2 parents 130265e + e4f1166 commit 935e8a7

File tree

1 file changed

+45
-28
lines changed

1 file changed

+45
-28
lines changed

test/extended/prometheus/prometheus.go

Lines changed: 45 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@ import (
88
"net/http"
99
"os"
1010
"regexp"
11+
"runtime"
1112
"strings"
1213
"time"
1314

1415
"github.com/openshift/origin/pkg/monitortestlibrary/allowedalerts"
1516
"github.com/openshift/origin/pkg/monitortestlibrary/platformidentification"
17+
"golang.org/x/sync/errgroup"
1618

1719
g "github.com/onsi/ginkgo/v2"
1820
o "github.com/onsi/gomega"
@@ -52,7 +54,7 @@ type TelemeterClientConfig struct {
5254
}
5355

5456
// Set $MONITORING_AUTH_TEST_NAMESPACE to focus on the targets from a single namespace
55-
var monitoringAuthTestNamespace = os.Getenv("MONITORING_AUTH_TEST_NAMESPACE")
57+
var namespaceUnderTest = os.Getenv("MONITORING_AUTH_TEST_NAMESPACE")
5658

5759
var _ = g.Describe("[sig-instrumentation][Late] Platform Prometheus targets", func() {
5860
defer g.GinkgoRecover()
@@ -87,14 +89,14 @@ var _ = g.Describe("[sig-instrumentation][Late] Platform Prometheus targets", fu
8789
bearerToken, err = helper.RequestPrometheusServiceAccountAPIToken(ctx, oc)
8890
o.Expect(err).NotTo(o.HaveOccurred(), "Request prometheus service account API token")
8991

90-
if namespacesToSkip.Has(monitoringAuthTestNamespace) {
91-
e2e.Logf("The namespace %s is not skipped because $MONITORING_AUTH_TEST_NAMESPACE is set to it", monitoringAuthTestNamespace)
92-
namespacesToSkip.Delete(monitoringAuthTestNamespace)
92+
if namespacesToSkip.Has(namespaceUnderTest) {
93+
e2e.Logf("The namespace %s is not skipped because $MONITORING_AUTH_TEST_NAMESPACE is set to it", namespaceUnderTest)
94+
namespacesToSkip.Delete(namespaceUnderTest)
9395
}
9496
})
9597

9698
g.It("should not be accessible without auth [Serial]", func() {
97-
var errs []error
99+
expectedStatusCodes := sets.New(http.StatusUnauthorized, http.StatusForbidden)
98100

99101
g.By("checking that targets reject the requests with 401 or 403")
100102
execPod := exutil.CreateExecPodOrFail(oc.AdminKubeClient(), oc.Namespace(), "execpod-targets-authorization")
@@ -111,35 +113,50 @@ var _ = g.Describe("[sig-instrumentation][Late] Platform Prometheus targets", fu
111113
o.Expect(err).NotTo(o.HaveOccurred())
112114
o.Expect(len(targets.Data.ActiveTargets)).Should(o.BeNumerically(">=", 5))
113115

114-
expected := sets.New[int](http.StatusUnauthorized, http.StatusForbidden)
116+
eg := errgroup.Group{}
117+
eg.SetLimit(runtime.GOMAXPROCS(0))
118+
errChan := make(chan error, len(targets.Data.ActiveTargets))
115119
for _, target := range targets.Data.ActiveTargets {
116-
ns := target.Labels["namespace"]
117-
o.Expect(ns).NotTo(o.BeEmpty())
118-
if monitoringAuthTestNamespace != "" && ns != monitoringAuthTestNamespace {
119-
continue
120-
}
121-
pod := target.Labels["pod"]
122-
e2e.Logf("Checking via pod exec status code from the scrape url %s for pod %s/%s without authorization (skip=%t)", target.ScrapeUrl, ns, pod, namespacesToSkip.Has(ns))
123-
err := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, time.Minute, true, func(context.Context) (bool, error) {
124-
statusCode, execError := helper.URLStatusCodeExecViaPod(execPod.Namespace, execPod.Name, target.ScrapeUrl)
125-
e2e.Logf("The scrape url %s for pod %s/%s without authorization returned %d, %v (skip=%t)", target.ScrapeUrl, ns, pod, statusCode, execError, namespacesToSkip.Has(ns))
126-
if expected.Has(statusCode) {
127-
return true, nil
120+
eg.Go(func() error {
121+
ns := target.Labels["namespace"]
122+
o.Expect(ns).NotTo(o.BeEmpty())
123+
if namespaceUnderTest != "" && ns != namespaceUnderTest {
124+
return nil
128125
}
129-
// retry on those cases
130-
if execError != nil ||
131-
statusCode/100 == 5 ||
132-
statusCode == http.StatusRequestTimeout ||
133-
statusCode == http.StatusTooManyRequests {
134-
return false, nil
126+
job, pod := target.Labels["job"], target.Labels["pod"]
127+
err := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, 5*time.Minute, true, func(context.Context) (bool, error) {
128+
statusCode, execError := helper.URLStatusCodeExecViaPod(execPod.Namespace, execPod.Name, target.ScrapeUrl)
129+
e2e.Logf("Scraping target %s of pod %s/%s/%s without auth returned %d, err: %v (skip=%t)", target.ScrapeUrl, ns, job, pod, statusCode, execError, namespacesToSkip.Has(ns))
130+
if expectedStatusCodes.Has(statusCode) {
131+
return true, nil
132+
}
133+
// retry on those cases
134+
if execError != nil ||
135+
statusCode/100 == 5 ||
136+
statusCode == http.StatusRequestTimeout ||
137+
statusCode == http.StatusTooManyRequests {
138+
return false, nil
139+
}
140+
return false, fmt.Errorf("expecting status code %v but returned %d", expectedStatusCodes.UnsortedList(), statusCode)
141+
})
142+
143+
// Decided to ignore targets that Prometheus itself failed to scrape; may be leftovers from earlier tests.
144+
// See: https://issues.redhat.com/browse/OCPBUGS-61193
145+
if err != nil && target.Health == "up" && !namespacesToSkip.Has(ns) {
146+
errChan <- fmt.Errorf("Scraping target %s of pod %s/%s/%s is probably possible without auth: %w", target.ScrapeUrl, ns, job, pod, err)
135147
}
136-
return false, fmt.Errorf("expecting status code %v but returned %d", expected.UnsortedList(), statusCode)
148+
149+
return nil
137150
})
138-
if err != nil && !namespacesToSkip.Has(ns) {
139-
errs = append(errs, fmt.Errorf("the scrape url %s for pod %s/%s is accessible without authorization: %w", target.ScrapeUrl, ns, pod, err))
140-
}
141151
}
152+
err = eg.Wait()
153+
o.Expect(err).NotTo(o.HaveOccurred())
154+
close(errChan)
142155

156+
var errs []error
157+
for err := range errChan {
158+
errs = append(errs, err)
159+
}
143160
o.Expect(errs).To(o.BeEmpty())
144161
})
145162

0 commit comments

Comments
 (0)