From 62d048d2158fc55413a3682ee7f9d3f04837fbe8 Mon Sep 17 00:00:00 2001 From: Mikhail Nacharov <1246847+mnacharov@users.noreply.github.com> Date: Tue, 30 Sep 2025 18:35:14 +0200 Subject: [PATCH] Added metrics to count the number of rollout restarts Fixes #1129 --- helpers/rollout_restart.go | 3 +++ internal/metrics/metrics.go | 20 ++++++++++++++++++++ main.go | 1 + 3 files changed, 24 insertions(+) diff --git a/helpers/rollout_restart.go b/helpers/rollout_restart.go index 45cf4e183..f33cdc3a9 100644 --- a/helpers/rollout_restart.go +++ b/helpers/rollout_restart.go @@ -19,6 +19,7 @@ import ( "github.com/hashicorp/vault-secrets-operator/api/v1beta1" "github.com/hashicorp/vault-secrets-operator/consts" + "github.com/hashicorp/vault-secrets-operator/internal/metrics" ) // AnnotationRestartedAt is updated to trigger a rollout-restart @@ -62,9 +63,11 @@ func HandleRolloutRestarts(ctx context.Context, client ctrlclient.Client, obj ct errs = errors.Join(err) recorder.Eventf(obj, corev1.EventTypeWarning, consts.ReasonRolloutRestartFailed, "Rollout restart failed for target %#v: err=%s", target, err) + metrics.RolloutRestartsErrors.WithLabelValues(obj.GetNamespace(), obj.GetName(), target.Name, target.Kind).Inc() } else { recorder.Eventf(obj, corev1.EventTypeNormal, consts.ReasonRolloutRestartTriggered, "Rollout restart triggered for %v", target) + metrics.RolloutRestartsTotal.WithLabelValues(obj.GetNamespace(), obj.GetName(), target.Name, target.Kind).Inc() } } diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index 683fadd57..e8e7b1458 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -39,6 +39,7 @@ const ( NameRequestsTotal = "requests_total" NameRequestsErrorsTotal = "requests_errors_total" NameTaintedClients = "tainted_clients" + subsystemRolloutRestarts = "rollout_restarts" ) var ResourceStatus = prometheus.NewGaugeVec(prometheus.GaugeOpts{ @@ -56,6 +57,25 @@ func MustRegisterResourceStatus() { ) } +var RolloutRestartsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: prometheus.BuildFQName( + Namespace, subsystemRolloutRestarts, "total"), + Help: "Number of total rollout restarts.", +}, []string{"namespace", "name", "target_name", "target_kind"}) + +var RolloutRestartsErrors = prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: prometheus.BuildFQName( + Namespace, subsystemRolloutRestarts, "errors_total"), + Help: "Number of total rollout restarts.", +}, []string{"namespace", "name", "target_name", "target_kind"}) + +func MustRegisterRolloutRestarts() { + metrics.Registry.MustRegister( + RolloutRestartsTotal, + RolloutRestartsErrors, + ) +} + // SetResourceStatus for the given client.Object. If valid is true, then the // ResourceStatus gauge will be set 1, else 0. func SetResourceStatus(controller string, o client.Object, valid bool) { diff --git a/main.go b/main.go index df1d28a64..27c0e01c6 100644 --- a/main.go +++ b/main.go @@ -403,6 +403,7 @@ func main() { ) vclient.MustRegisterClientMetrics(cfc.MetricsRegistry) metrics.MustRegisterResourceStatus() + metrics.MustRegisterRolloutRestarts() metric := prometheus.NewGauge( prometheus.GaugeOpts{