From 0c5d45631fb515b5c2cc50ff205aa0618d481c63 Mon Sep 17 00:00:00 2001 From: Anton Semenov Date: Tue, 18 Nov 2025 22:20:09 +0300 Subject: [PATCH 1/3] fix: separate nginx_up and scrape success metrics --- collector/helper.go | 40 ++++++++++++++++++++++++++++ collector/nginx.go | 64 ++++++++++++++++++++++++++++++++++++++------- 2 files changed, 95 insertions(+), 9 deletions(-) diff --git a/collector/helper.go b/collector/helper.go index 9eb6315ae..652b2ab2c 100644 --- a/collector/helper.go +++ b/collector/helper.go @@ -1,6 +1,8 @@ package collector import ( + "strings" + "github.com/prometheus/client_golang/prometheus" ) @@ -22,6 +24,33 @@ func newUpMetric(namespace string, constLabels map[string]string) prometheus.Gau }) } +func newScrapeSuccessMetric(namespace string, constLabels map[string]string) prometheus.Gauge { + return prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: namespace, + Name: "scrape_success", + Help: "Whether the last scrape of NGINX metrics was successful", + ConstLabels: constLabels, + }) +} + +func newScrapeDurationMetric(namespace string, constLabels map[string]string) prometheus.Gauge { + return prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: namespace, + Name: "scrape_duration_seconds", + Help: "Duration of the last scrape in seconds", + ConstLabels: constLabels, + }) +} + +func newScrapeErrorsTotalMetric(namespace string, constLabels map[string]string) *prometheus.CounterVec { + return prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: namespace, + Name: "scrape_errors_total", + Help: "Total number of scrape errors by type", + ConstLabels: constLabels, + }, []string{"type"}) +} + // MergeLabels merges two maps of labels. func MergeLabels(a map[string]string, b map[string]string) map[string]string { c := make(map[string]string) @@ -35,3 +64,14 @@ func MergeLabels(a map[string]string, b map[string]string) map[string]string { return c } + +func isNetworkError(errorMsg string) bool { + return strings.Contains(errorMsg, "failed to get") || + strings.Contains(errorMsg, "connection") || + strings.Contains(errorMsg, "timeout") || + strings.Contains(errorMsg, "refused") +} + +func isHTTPError(errorMsg string) bool { + return strings.Contains(errorMsg, "expected 200 response") +} diff --git a/collector/nginx.go b/collector/nginx.go index e7cc1b1bc..2788fed6e 100644 --- a/collector/nginx.go +++ b/collector/nginx.go @@ -3,6 +3,7 @@ package collector import ( "log/slog" "sync" + "time" "github.com/nginx/nginx-prometheus-exporter/client" "github.com/prometheus/client_golang/prometheus" @@ -10,11 +11,14 @@ import ( // NginxCollector collects NGINX metrics. It implements prometheus.Collector interface. type NginxCollector struct { - upMetric prometheus.Gauge - logger *slog.Logger - nginxClient *client.NginxClient - metrics map[string]*prometheus.Desc - mutex sync.Mutex + upMetric prometheus.Gauge + scrapeSuccessMetric prometheus.Gauge + scrapeDurationMetric prometheus.Gauge + scrapeErrorsTotal *prometheus.CounterVec + logger *slog.Logger + nginxClient *client.NginxClient + metrics map[string]*prometheus.Desc + mutex sync.Mutex } // NewNginxCollector creates an NginxCollector. @@ -31,7 +35,10 @@ func NewNginxCollector(nginxClient *client.NginxClient, namespace string, constL "connections_waiting": newGlobalMetric(namespace, "connections_waiting", "Idle client connections", constLabels), "http_requests_total": newGlobalMetric(namespace, "http_requests_total", "Total http requests", constLabels), }, - upMetric: newUpMetric(namespace, constLabels), + upMetric: newUpMetric(namespace, constLabels), + scrapeSuccessMetric: newScrapeSuccessMetric(namespace, constLabels), + scrapeDurationMetric: newScrapeDurationMetric(namespace, constLabels), + scrapeErrorsTotal: newScrapeErrorsTotalMetric(namespace, constLabels), } } @@ -39,6 +46,9 @@ func NewNginxCollector(nginxClient *client.NginxClient, namespace string, constL // to the provided channel. func (c *NginxCollector) Describe(ch chan<- *prometheus.Desc) { ch <- c.upMetric.Desc() + ch <- c.scrapeSuccessMetric.Desc() + ch <- c.scrapeDurationMetric.Desc() + c.scrapeErrorsTotal.Describe(ch) for _, m := range c.metrics { ch <- m @@ -50,16 +60,52 @@ func (c *NginxCollector) Collect(ch chan<- prometheus.Metric) { c.mutex.Lock() // To protect metrics from concurrent collects defer c.mutex.Unlock() + start := time.Now() stats, err := c.nginxClient.GetStubStats() + duration := time.Since(start).Seconds() + c.scrapeDurationMetric.Set(duration) + ch <- c.scrapeDurationMetric + if err != nil { - c.upMetric.Set(nginxDown) - ch <- c.upMetric - c.logger.Error("error getting stats", "error", err.Error()) + c.handleScrapeError(ch, err) return } + c.handleScrapeSuccess(ch, stats) +} + +func (c *NginxCollector) handleScrapeError(ch chan<- prometheus.Metric, err error) { + errorMsg := err.Error() + var errorType string + + if isNetworkError(errorMsg) { + c.upMetric.Set(nginxDown) + errorType = "network" + } else if isHTTPError(errorMsg) { + c.upMetric.Set(nginxUp) + errorType = "http" + } else { + c.upMetric.Set(nginxUp) + errorType = "parse" + } + + c.scrapeErrorsTotal.WithLabelValues(errorType).Inc() + c.scrapeSuccessMetric.Set(0) + + ch <- c.upMetric + ch <- c.scrapeSuccessMetric + c.scrapeErrorsTotal.Collect(ch) + + c.logger.Error("error getting stats", "error", err.Error(), "type", errorType) +} + +func (c *NginxCollector) handleScrapeSuccess(ch chan<- prometheus.Metric, stats *client.StubStats) { c.upMetric.Set(nginxUp) + c.scrapeSuccessMetric.Set(1) + ch <- c.upMetric + ch <- c.scrapeSuccessMetric + c.scrapeErrorsTotal.Collect(ch) ch <- prometheus.MustNewConstMetric(c.metrics["connections_active"], prometheus.GaugeValue, float64(stats.Connections.Active)) From 8429679d41f357228736ffb766aa9836fdceecb7 Mon Sep 17 00:00:00 2001 From: Anton Semenov Date: Tue, 18 Nov 2025 22:35:03 +0300 Subject: [PATCH 2/3] fix: add unit tests for error classification --- collector/nginx_test.go | 129 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 collector/nginx_test.go diff --git a/collector/nginx_test.go b/collector/nginx_test.go new file mode 100644 index 000000000..5f1389ffb --- /dev/null +++ b/collector/nginx_test.go @@ -0,0 +1,129 @@ +package collector + +import ( + "strings" + "testing" + + "github.com/prometheus/client_golang/prometheus" +) + +func TestIsNetworkError(t *testing.T) { + tests := []struct { + name string + errorMsg string + want bool + }{ + { + name: "network connection error", + errorMsg: "failed to get http://localhost:8080/stub_status: connection refused", + want: true, + }, + { + name: "network timeout error", + errorMsg: "failed to get http://localhost:8080/stub_status: timeout", + want: true, + }, + { + name: "HTTP error", + errorMsg: "expected 200 response, got 404", + want: false, + }, + { + name: "parse error", + errorMsg: "failed to parse response body", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := isNetworkError(tt.errorMsg); got != tt.want { + t.Errorf("isNetworkError() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestIsHTTPError(t *testing.T) { + tests := []struct { + name string + errorMsg string + want bool + }{ + { + name: "HTTP 404 error", + errorMsg: "expected 200 response, got 404", + want: true, + }, + { + name: "HTTP 500 error", + errorMsg: "expected 200 response, got 500", + want: true, + }, + { + name: "network error", + errorMsg: "failed to get http://localhost:8080/stub_status: connection refused", + want: false, + }, + { + name: "parse error", + errorMsg: "failed to parse response body", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := isHTTPError(tt.errorMsg); got != tt.want { + t.Errorf("isHTTPError() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestNewScrapeSuccessMetric(t *testing.T) { + metric := newScrapeSuccessMetric("nginx", map[string]string{"job": "nginx"}) + + if metric == nil { + t.Error("newScrapeSuccessMetric() returned nil") + } + + desc := metric.Desc().String() + if !strings.Contains(desc, "nginx_scrape_success") { + t.Errorf("metric description should contain 'nginx_scrape_success', got: %s", desc) + } +} + +func TestNewScrapeDurationMetric(t *testing.T) { + metric := newScrapeDurationMetric("nginx", map[string]string{"job": "nginx"}) + + if metric == nil { + t.Error("newScrapeDurationMetric() returned nil") + } + + desc := metric.Desc().String() + if !strings.Contains(desc, "nginx_scrape_duration_seconds") { + t.Errorf("metric description should contain 'nginx_scrape_duration_seconds', got: %s", desc) + } +} + +func TestNewScrapeErrorsTotalMetric(t *testing.T) { + metric := newScrapeErrorsTotalMetric("nginx", map[string]string{"job": "nginx"}) + + if metric == nil { + t.Error("newScrapeErrorsTotalMetric() returned nil") + } + + ch := make(chan *prometheus.Desc, 10) + metric.Describe(ch) + close(ch) + + count := 0 + for range ch { + count++ + } + + if count == 0 { + t.Error("metric should have descriptions") + } +} From d7ef3c12020a670469c81137b4c26a122281df23 Mon Sep 17 00:00:00 2001 From: Anton Semenov Date: Tue, 18 Nov 2025 23:11:56 +0300 Subject: [PATCH 3/3] fix: linter issues --- collector/nginx.go | 7 ++++--- collector/nginx_test.go | 13 +++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/collector/nginx.go b/collector/nginx.go index 2788fed6e..2c6470052 100644 --- a/collector/nginx.go +++ b/collector/nginx.go @@ -78,13 +78,14 @@ func (c *NginxCollector) handleScrapeError(ch chan<- prometheus.Metric, err erro errorMsg := err.Error() var errorType string - if isNetworkError(errorMsg) { + switch { + case isNetworkError(errorMsg): c.upMetric.Set(nginxDown) errorType = "network" - } else if isHTTPError(errorMsg) { + case isHTTPError(errorMsg): c.upMetric.Set(nginxUp) errorType = "http" - } else { + default: c.upMetric.Set(nginxUp) errorType = "parse" } diff --git a/collector/nginx_test.go b/collector/nginx_test.go index 5f1389ffb..5d5ca2ed5 100644 --- a/collector/nginx_test.go +++ b/collector/nginx_test.go @@ -8,6 +8,8 @@ import ( ) func TestIsNetworkError(t *testing.T) { + t.Parallel() + tests := []struct { name string errorMsg string @@ -37,6 +39,7 @@ func TestIsNetworkError(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + t.Parallel() if got := isNetworkError(tt.errorMsg); got != tt.want { t.Errorf("isNetworkError() = %v, want %v", got, tt.want) } @@ -45,6 +48,8 @@ func TestIsNetworkError(t *testing.T) { } func TestIsHTTPError(t *testing.T) { + t.Parallel() + tests := []struct { name string errorMsg string @@ -74,6 +79,7 @@ func TestIsHTTPError(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + t.Parallel() if got := isHTTPError(tt.errorMsg); got != tt.want { t.Errorf("isHTTPError() = %v, want %v", got, tt.want) } @@ -82,6 +88,8 @@ func TestIsHTTPError(t *testing.T) { } func TestNewScrapeSuccessMetric(t *testing.T) { + t.Parallel() + metric := newScrapeSuccessMetric("nginx", map[string]string{"job": "nginx"}) if metric == nil { @@ -95,6 +103,8 @@ func TestNewScrapeSuccessMetric(t *testing.T) { } func TestNewScrapeDurationMetric(t *testing.T) { + t.Parallel() + metric := newScrapeDurationMetric("nginx", map[string]string{"job": "nginx"}) if metric == nil { @@ -108,10 +118,13 @@ func TestNewScrapeDurationMetric(t *testing.T) { } func TestNewScrapeErrorsTotalMetric(t *testing.T) { + t.Parallel() + metric := newScrapeErrorsTotalMetric("nginx", map[string]string{"job": "nginx"}) if metric == nil { t.Error("newScrapeErrorsTotalMetric() returned nil") + return } ch := make(chan *prometheus.Desc, 10)