diff --git a/pkg/controller/sync.go b/pkg/controller/sync.go index 27b8f258..8778f092 100644 --- a/pkg/controller/sync.go +++ b/pkg/controller/sync.go @@ -93,6 +93,8 @@ func (c *Controller) checkContainer(ctx context.Context, log *logrus.Entry, result, err := c.checker.Container(ctx, log, pod, container, opts) if err != nil { + // Report the error using ErrorsReporting + c.metrics.ErrorsReporting(pod.Namespace, pod.Name, container.Name, container.Image) return err } diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 413d8f60..c4265713 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -25,6 +25,7 @@ type Metrics struct { registry *prometheus.Registry containerImageVersion *prometheus.GaugeVec containerImageDuration *prometheus.GaugeVec + containerImageErrors *prometheus.CounterVec // Contains all metrics for the roundtripper roundTripper *RoundTripper @@ -62,12 +63,23 @@ func NewServer(log *logrus.Entry) *Metrics { }, []string{"namespace", "pod", "container", "image"}, ) + containerImageErrors := promauto.With(reg).NewCounterVec( + prometheus.CounterOpts{ + Namespace: "version_checker", + Name: "image_failures_total", + Help: "Total number of errors where the version-checker was unable to get the latest upstream registry version", + }, + []string{ + "namespace", "pod", "container", "image", + }, + ) return &Metrics{ log: log.WithField("module", "metrics"), registry: reg, containerImageVersion: containerImageVersion, containerImageDuration: containerImageDuration, + containerImageErrors: containerImageErrors, containerCache: make(map[string]cacheItem), roundTripper: NewRoundTripper(reg), } @@ -160,6 +172,11 @@ func (m *Metrics) latestImageIndex(namespace, pod, container, containerType stri return strings.Join([]string{namespace, pod, container, containerType}, "") } +func (m *Metrics) ErrorsReporting(namespace, pod, container, imageURL string) { + + m.containerImageErrors.WithLabelValues(namespace, pod, container, imageURL).Inc() +} + func (m *Metrics) buildLabels(namespace, pod, container, containerType, imageURL, currentVersion, latestVersion string) prometheus.Labels { return prometheus.Labels{ "namespace": namespace, diff --git a/pkg/metrics/metrics_test.go b/pkg/metrics/metrics_test.go index 2ad67b59..a4fd6648 100644 --- a/pkg/metrics/metrics_test.go +++ b/pkg/metrics/metrics_test.go @@ -4,8 +4,10 @@ import ( "fmt" "testing" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil" "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" ) func TestCache(t *testing.T) { @@ -37,3 +39,43 @@ func TestCache(t *testing.T) { } } } + +// TestErrorsReporting verifies that the error metric increments correctly +func TestErrorsReporting(t *testing.T) { + m := NewServer(logrus.NewEntry(logrus.New())) + + // Reset the metrics before testing + m.containerImageErrors.Reset() + + testCases := []struct { + namespace string + pod string + container string + image string + expected int + }{ + {"namespace", "pod", "container", "url", 1}, + {"namespace", "pod", "container", "url", 2}, + {"namespace2", "pod2", "container2", "url2", 1}, + } + + for i, tc := range testCases { + t.Run(fmt.Sprintf("Case %d", i+1), func(t *testing.T) { + // Report an error + m.ErrorsReporting(tc.namespace, tc.pod, tc.container, tc.image) + + // Retrieve metric + metric, err := m.containerImageErrors.GetMetricWith(prometheus.Labels{ + "namespace": tc.namespace, + "pod": tc.pod, + "container": tc.container, + "image": tc.image, + }) + assert.NoError(t, err, "Failed to get metric with labels") + + // Validate metric count + fetchErrorCount := testutil.ToFloat64(metric) + assert.Equal(t, float64(tc.expected), fetchErrorCount, "Expected error count to increment correctly") + }) + } +}