Skip to content

Commit 1fc70a9

Browse files
Merge pull request #30256 from machine424/rtt
OCPBUGS-61193: chore(extended/prometheus): 2/2: make 'targets auth' test more lenient and more resilient
2 parents da0ca22 + 882d6ca commit 1fc70a9

File tree

1 file changed

+54
-27
lines changed

1 file changed

+54
-27
lines changed

test/extended/prometheus/prometheus.go

Lines changed: 54 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"os"
1010
"regexp"
1111
"runtime"
12+
"slices"
1213
"strings"
1314
"time"
1415

@@ -105,33 +106,44 @@ var _ = g.Describe("[sig-instrumentation][Late] Platform Prometheus targets", fu
105106
o.Expect(err).NotTo(o.HaveOccurred(), fmt.Sprintf("Delete pod %s/%s", execPod.Namespace, execPod.Name))
106107
}()
107108

108-
contents, err := helper.GetURLWithToken(helper.MustJoinUrlPath(prometheusURL, "api/v1/targets"), bearerToken)
109-
o.Expect(err).NotTo(o.HaveOccurred())
109+
promTargets := func() (*prometheusTargets, error) {
110+
contents, err := helper.GetURLWithToken(helper.MustJoinUrlPath(prometheusURL, "api/v1/targets"), bearerToken)
111+
if err != nil {
112+
return nil, err
113+
}
114+
targets := &prometheusTargets{}
115+
err = json.Unmarshal([]byte(contents), targets)
116+
if err != nil {
117+
return nil, err
118+
}
119+
// sanity check.
120+
if len(targets.Data.ActiveTargets) < 5 {
121+
return nil, fmt.Errorf("only got %d targets, something is wrong", len(targets.Data.ActiveTargets))
122+
}
123+
return targets, nil
124+
}
110125

111-
targets := &prometheusTargets{}
112-
err = json.Unmarshal([]byte(contents), targets)
126+
initialPromTargets, err := promTargets()
113127
o.Expect(err).NotTo(o.HaveOccurred())
114-
o.Expect(len(targets.Data.ActiveTargets)).Should(o.BeNumerically(">=", 5))
115-
116128
eg := errgroup.Group{}
117129
eg.SetLimit(runtime.GOMAXPROCS(0))
118-
errChan := make(chan error, len(targets.Data.ActiveTargets))
119-
for _, target := range targets.Data.ActiveTargets {
130+
errChan := make(chan error, len(initialPromTargets.Data.ActiveTargets))
131+
for _, target := range initialPromTargets.Data.ActiveTargets {
120132
eg.Go(func() error {
121-
ns := target.Labels["namespace"]
122-
o.Expect(ns).NotTo(o.BeEmpty())
123-
if namespaceUnderTest != "" && ns != namespaceUnderTest {
133+
targetNs, targetJob, targetPod, targetScrapeURL := target.Labels["namespace"], target.Labels["job"], target.Labels["pod"], target.ScrapeUrl
134+
o.Expect(targetNs).NotTo(o.BeEmpty())
135+
if namespaceUnderTest != "" && targetNs != namespaceUnderTest {
124136
return nil
125137
}
126-
job, pod := target.Labels["job"], target.Labels["pod"]
127-
err := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, 5*time.Minute, true, func(context.Context) (bool, error) {
128-
statusCode, execError := helper.URLStatusCodeExecViaPod(execPod.Namespace, execPod.Name, target.ScrapeUrl)
129-
e2e.Logf("Scraping target %s of pod %s/%s/%s without auth returned %d, err: %v (skip=%t)", target.ScrapeUrl, ns, job, pod, statusCode, execError, namespacesToSkip.Has(ns))
138+
scrapeErr := wait.PollUntilContextTimeout(context.Background(), 10*time.Second, 5*time.Minute, true, func(context.Context) (bool, error) {
139+
statusCode, err := helper.URLStatusCodeExecViaPod(execPod.Namespace, execPod.Name, targetScrapeURL)
140+
e2e.Logf("scraping target %s of pod %s/%s/%s without auth returned %d, err: %v (skip=%t)", targetScrapeURL, targetNs, targetJob, targetPod, statusCode, err, namespacesToSkip.Has(targetNs))
130141
if expectedStatusCodes.Has(statusCode) {
131142
return true, nil
132143
}
133-
// retry on those cases
134-
if execError != nil ||
144+
145+
// retry
146+
if err != nil ||
135147
statusCode/100 == 5 ||
136148
statusCode == http.StatusRequestTimeout ||
137149
statusCode == http.StatusTooManyRequests {
@@ -140,12 +152,25 @@ var _ = g.Describe("[sig-instrumentation][Late] Platform Prometheus targets", fu
140152
return false, fmt.Errorf("expecting status code %v but returned %d", expectedStatusCodes.UnsortedList(), statusCode)
141153
})
142154

143-
// Decided to ignore targets that Prometheus itself failed to scrape; may be leftovers from earlier tests.
144-
// See: https://issues.redhat.com/browse/OCPBUGS-61193
145-
if err != nil && target.Health == "up" && !namespacesToSkip.Has(ns) {
146-
errChan <- fmt.Errorf("Scraping target %s of pod %s/%s/%s is probably possible without auth: %w", target.ScrapeUrl, ns, job, pod, err)
155+
// Ignoring targets that Prometheus no longer scrapes or fails to scrape.
156+
// These may be leftovers from earlier tests.
157+
// Reference: https://issues.redhat.com/browse/OCPBUGS-61193
158+
if scrapeErr != nil && !namespacesToSkip.Has(targetNs) {
159+
targets, err := promTargets()
160+
if err != nil {
161+
e2e.Logf("refreshing state of target %s of pod %s/%s/%s failed, err: %v (skip=%t)", targetScrapeURL, targetNs, targetJob, targetPod, err, namespacesToSkip.Has(targetNs))
162+
targets = initialPromTargets
163+
}
164+
idx := slices.IndexFunc(targets.Data.ActiveTargets, func(t prometheusTarget) bool {
165+
return t.Labels["namespace"] == targetNs &&
166+
t.Labels["job"] == targetJob &&
167+
t.Labels["pod"] == targetPod &&
168+
t.ScrapeUrl == targetScrapeURL
169+
})
170+
if idx >= 0 && targets.Data.ActiveTargets[idx].Health == "up" {
171+
errChan <- fmt.Errorf("failed to ensure scraping target %s of pod %s/%s/%s requires auth: %w", targetScrapeURL, targetNs, targetJob, targetPod, scrapeErr)
172+
}
147173
}
148-
149174
return nil
150175
})
151176
}
@@ -929,13 +954,15 @@ func all(errs ...error) []error {
929954
return result
930955
}
931956

957+
type prometheusTarget struct {
958+
Labels map[string]string
959+
Health string
960+
ScrapeUrl string
961+
}
962+
932963
type prometheusTargets struct {
933964
Data struct {
934-
ActiveTargets []struct {
935-
Labels map[string]string
936-
Health string
937-
ScrapeUrl string
938-
}
965+
ActiveTargets []prometheusTarget
939966
}
940967
Status string
941968
}

0 commit comments

Comments
 (0)