Skip to content

Commit 8864da1

Browse files
committed
OCPBUGS-61508: IngressOperator not exposing some metrics for degraded IngressController
Signed-off-by: Davide Salerno <[email protected]>
1 parent a35f1da commit 8864da1

File tree

11 files changed

+3966
-2
lines changed

11 files changed

+3966
-2
lines changed

pkg/operator/controller/ingress/status.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,10 @@ func (r *reconciler) syncIngressControllerStatus(ic *operatorv1.IngressControlle
104104
errs = append(errs, fmt.Errorf("failed to update ingresscontroller status: %v", err))
105105
} else {
106106
updatedIc = true
107-
SetIngressControllerConditionsMetric(updated)
108107
}
109108
}
110-
109+
//OCPBUGS-61508 set at every reconcile the ingress_controller_conditions metrics
110+
SetIngressControllerConditionsMetric(updated)
111111
return retryableerror.NewMaybeRetryableAggregate(errs), updatedIc
112112
}
113113

test/e2e/all_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,5 +136,6 @@ func TestAll(t *testing.T) {
136136
// Serializing the test ensures it runs in isolation with other tests,
137137
// preventing any impact of the mutating webhook on pod creation in the cluster
138138
t.Run("TestGatewayAPI", TestGatewayAPI)
139+
t.Run("TestIngressControllerConditionsMetricAfterRestart", TestIngressControllerConditionsMetricAfterRestart)
139140
})
140141
}
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
//go:build e2e
2+
// +build e2e
3+
4+
package e2e
5+
6+
import (
7+
"fmt"
8+
"os/exec"
9+
"strings"
10+
"testing"
11+
"time"
12+
13+
"github.com/stretchr/testify/require"
14+
)
15+
16+
func queryPrometheusForMetric(t *testing.T, metric string) string {
17+
// You might need to adjust the namespace and Prometheus route
18+
t.Log("Retrieving Prometheus metric from Ingress operator pod..")
19+
cmd := exec.Command("oc", "exec", "-n", "openshift-ingress-operator", "deploy/ingress-operator", "--",
20+
"curl", "-s", "localhost:60000/metrics")
21+
out, err := cmd.Output()
22+
require.NoError(t, err, "Failed to query Prometheus for metric due to error %s", err)
23+
return string(out)
24+
}
25+
26+
func restartOperatorPod(t *testing.T) {
27+
// Find the operator pod
28+
t.Logf("Restarting Ingress operator pod...")
29+
getPodCmd := exec.Command("oc", "get", "pod", "-n", "openshift-ingress-operator", "-l", "name=ingress-operator", "-o", "jsonpath={.items[0].metadata.name}")
30+
podNameBytes, err := getPodCmd.Output()
31+
require.NoError(t, err)
32+
podName := strings.TrimSpace(string(podNameBytes))
33+
34+
// Delete the pod
35+
delPodCmd := exec.Command("oc", "delete", "pod", podName, "-n", "openshift-ingress-operator")
36+
err = delPodCmd.Run()
37+
require.NoError(t, err)
38+
39+
// Wait for new pod to be ready
40+
for i := 0; i < 30; i++ {
41+
getPodCmd := exec.Command("oc", "get", "pod", "-n", "openshift-ingress-operator", "-l", "name=ingress-operator", "-o", "jsonpath={.items[0].status.phase}")
42+
phaseBytes, _ := getPodCmd.Output()
43+
if strings.TrimSpace(string(phaseBytes)) == "Running" {
44+
time.Sleep(10 * time.Second) // wait for full readiness
45+
return
46+
}
47+
time.Sleep(5 * time.Second)
48+
}
49+
t.Fatalf("Operator pod did not restart in time")
50+
}
51+
52+
// TestIngressControllerConditionsMetricAfterRestart verifies that metric ingress_controller_conditions(router,status) is
53+
// available after an operator pod restart too.
54+
//
55+
// This test:
56+
//
57+
// 1. Verifies that the metric is available in a normal situation when the operator pod is up&running (i.e. before restart)
58+
//
59+
// 2. Restarts the operator pod, waiting it will be available again
60+
//
61+
// 3. Repeats the step 1 expecting the same result again and so the presence of the metric
62+
//
63+
// NB: due to the fact that this test is restarting the operator pod it cannot be executed in parallel with other tests
64+
func TestIngressControllerConditionsMetricAfterRestart(t *testing.T) {
65+
metricName := "ingress_controller_conditions"
66+
67+
// Check metric before restart
68+
outBefore := queryPrometheusForMetric(t, metricName)
69+
t.Logf("Verifying that in Prometheus metrics there are %s metrics before resart", metricName)
70+
require.Contains(t, outBefore, metricName, fmt.Sprintf("Metrics %s not found before operator restart", metricName))
71+
72+
// Restart operator pod
73+
restartOperatorPod(t)
74+
75+
// Check metric after restart
76+
outAfter := queryPrometheusForMetric(t, metricName)
77+
t.Logf("Verifying that in Prometheus metrics there are %s after a pod restart too", metricName)
78+
require.Contains(t, outAfter, metricName, fmt.Sprintf("Metrics %s not found after operator restart", metricName))
79+
}

vendor/github.com/stretchr/testify/require/doc.go

Lines changed: 29 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/stretchr/testify/require/forward_requirements.go

Lines changed: 16 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)