@@ -8,11 +8,13 @@ import (
88 "net/http"
99 "os"
1010 "regexp"
11+ "runtime"
1112 "strings"
1213 "time"
1314
1415 "github.com/openshift/origin/pkg/monitortestlibrary/allowedalerts"
1516 "github.com/openshift/origin/pkg/monitortestlibrary/platformidentification"
17+ "golang.org/x/sync/errgroup"
1618
1719 g "github.com/onsi/ginkgo/v2"
1820 o "github.com/onsi/gomega"
@@ -52,7 +54,7 @@ type TelemeterClientConfig struct {
5254}
5355
5456// Set $MONITORING_AUTH_TEST_NAMESPACE to focus on the targets from a single namespace
55- var monitoringAuthTestNamespace = os .Getenv ("MONITORING_AUTH_TEST_NAMESPACE" )
57+ var namespaceUnderTest = os .Getenv ("MONITORING_AUTH_TEST_NAMESPACE" )
5658
5759var _ = g .Describe ("[sig-instrumentation][Late] Platform Prometheus targets" , func () {
5860 defer g .GinkgoRecover ()
@@ -87,14 +89,14 @@ var _ = g.Describe("[sig-instrumentation][Late] Platform Prometheus targets", fu
8789 bearerToken , err = helper .RequestPrometheusServiceAccountAPIToken (ctx , oc )
8890 o .Expect (err ).NotTo (o .HaveOccurred (), "Request prometheus service account API token" )
8991
90- if namespacesToSkip .Has (monitoringAuthTestNamespace ) {
91- e2e .Logf ("The namespace %s is not skipped because $MONITORING_AUTH_TEST_NAMESPACE is set to it" , monitoringAuthTestNamespace )
92- namespacesToSkip .Delete (monitoringAuthTestNamespace )
92+ if namespacesToSkip .Has (namespaceUnderTest ) {
93+ e2e .Logf ("The namespace %s is not skipped because $MONITORING_AUTH_TEST_NAMESPACE is set to it" , namespaceUnderTest )
94+ namespacesToSkip .Delete (namespaceUnderTest )
9395 }
9496 })
9597
9698 g .It ("should not be accessible without auth [Serial]" , func () {
97- var errs [] error
99+ expectedStatusCodes := sets . New ( http . StatusUnauthorized , http . StatusForbidden )
98100
99101 g .By ("checking that targets reject the requests with 401 or 403" )
100102 execPod := exutil .CreateExecPodOrFail (oc .AdminKubeClient (), oc .Namespace (), "execpod-targets-authorization" )
@@ -111,35 +113,50 @@ var _ = g.Describe("[sig-instrumentation][Late] Platform Prometheus targets", fu
111113 o .Expect (err ).NotTo (o .HaveOccurred ())
112114 o .Expect (len (targets .Data .ActiveTargets )).Should (o .BeNumerically (">=" , 5 ))
113115
114- expected := sets .New [int ](http .StatusUnauthorized , http .StatusForbidden )
116+ eg := errgroup.Group {}
117+ eg .SetLimit (runtime .GOMAXPROCS (0 ))
118+ errChan := make (chan error , len (targets .Data .ActiveTargets ))
115119 for _ , target := range targets .Data .ActiveTargets {
116- ns := target .Labels ["namespace" ]
117- o .Expect (ns ).NotTo (o .BeEmpty ())
118- if monitoringAuthTestNamespace != "" && ns != monitoringAuthTestNamespace {
119- continue
120- }
121- pod := target .Labels ["pod" ]
122- e2e .Logf ("Checking via pod exec status code from the scrape url %s for pod %s/%s without authorization (skip=%t)" , target .ScrapeUrl , ns , pod , namespacesToSkip .Has (ns ))
123- err := wait .PollUntilContextTimeout (context .Background (), 10 * time .Second , time .Minute , true , func (context.Context ) (bool , error ) {
124- statusCode , execError := helper .URLStatusCodeExecViaPod (execPod .Namespace , execPod .Name , target .ScrapeUrl )
125- e2e .Logf ("The scrape url %s for pod %s/%s without authorization returned %d, %v (skip=%t)" , target .ScrapeUrl , ns , pod , statusCode , execError , namespacesToSkip .Has (ns ))
126- if expected .Has (statusCode ) {
127- return true , nil
120+ eg .Go (func () error {
121+ ns := target .Labels ["namespace" ]
122+ o .Expect (ns ).NotTo (o .BeEmpty ())
123+ if namespaceUnderTest != "" && ns != namespaceUnderTest {
124+ return nil
128125 }
129- // retry on those cases
130- if execError != nil ||
131- statusCode / 100 == 5 ||
132- statusCode == http .StatusRequestTimeout ||
133- statusCode == http .StatusTooManyRequests {
134- return false , nil
126+ job , pod := target .Labels ["job" ], target .Labels ["pod" ]
127+ err := wait .PollUntilContextTimeout (context .Background (), 10 * time .Second , 5 * time .Minute , true , func (context.Context ) (bool , error ) {
128+ statusCode , execError := helper .URLStatusCodeExecViaPod (execPod .Namespace , execPod .Name , target .ScrapeUrl )
129+ e2e .Logf ("Scraping target %s of pod %s/%s/%s without auth returned %d, err: %v (skip=%t)" , target .ScrapeUrl , ns , job , pod , statusCode , execError , namespacesToSkip .Has (ns ))
130+ if expectedStatusCodes .Has (statusCode ) {
131+ return true , nil
132+ }
133+ // retry on those cases
134+ if execError != nil ||
135+ statusCode / 100 == 5 ||
136+ statusCode == http .StatusRequestTimeout ||
137+ statusCode == http .StatusTooManyRequests {
138+ return false , nil
139+ }
140+ return false , fmt .Errorf ("expecting status code %v but returned %d" , expectedStatusCodes .UnsortedList (), statusCode )
141+ })
142+
143+ // Decided to ignore targets that Prometheus itself failed to scrape; may be leftovers from earlier tests.
144+ // See: https://issues.redhat.com/browse/OCPBUGS-61193
145+ if err != nil && target .Health == "up" && ! namespacesToSkip .Has (ns ) {
146+ errChan <- fmt .Errorf ("Scraping target %s of pod %s/%s/%s is probably possible without auth: %w" , target .ScrapeUrl , ns , job , pod , err )
135147 }
136- return false , fmt .Errorf ("expecting status code %v but returned %d" , expected .UnsortedList (), statusCode )
148+
149+ return nil
137150 })
138- if err != nil && ! namespacesToSkip .Has (ns ) {
139- errs = append (errs , fmt .Errorf ("the scrape url %s for pod %s/%s is accessible without authorization: %w" , target .ScrapeUrl , ns , pod , err ))
140- }
141151 }
152+ err = eg .Wait ()
153+ o .Expect (err ).NotTo (o .HaveOccurred ())
154+ close (errChan )
142155
156+ var errs []error
157+ for err := range errChan {
158+ errs = append (errs , err )
159+ }
143160 o .Expect (errs ).To (o .BeEmpty ())
144161 })
145162
0 commit comments