Skip to content

Commit b499ca7

Browse files
authored
Retry connection if nginx unavailable
Closes: #36 * Add nginx.retries cli flag * Add nginx.retry-interval cli flag * Add handling of SIGTERM signals * Add started log message * Add createClientWithRetries test * Add time.Duration env variables: timeout, nginxRetryInterval * Fix error context: On OSS Failed to create client would be appended twice from nginx client and exporter * Add environment variables to --help flag and readme.md
1 parent 455e79d commit b499ca7

File tree

4 files changed

+178
-26
lines changed

4 files changed

+178
-26
lines changed

README.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,17 @@ To start the exporter we use the [docker run](https://docs.docker.com/engine/ref
6464
Usage of ./nginx-prometheus-exporter:
6565
-nginx.plus
6666
Start the exporter for NGINX Plus. By default, the exporter is started for NGINX. The default value can be overwritten by NGINX_PLUS environment variable.
67+
-nginx.retries int
68+
A number of retries the exporter will make on start to connect to the NGINX stub_status page/NGINX Plus API before exiting with an error. The default value can be overwritten by NGINX_RETRIES environment variable.
69+
-nginx.retry-interval duration
70+
An interval between retries to connect to the NGINX stub_status page/NGINX Plus API on start. The default value can be overwritten by NGINX_RETRY_INTERVAL environment variable. (default 5s)
6771
-nginx.scrape-uri string
6872
A URI for scraping NGINX or NGINX Plus metrics.
6973
For NGINX, the stub_status page must be available through the URI. For NGINX Plus -- the API. The default value can be overwritten by SCRAPE_URI environment variable. (default "http://127.0.0.1:8080/stub_status")
7074
-nginx.ssl-verify
7175
Perform SSL certificate verification. The default value can be overwritten by SSL_VERIFY environment variable. (default true)
7276
-nginx.timeout duration
73-
A timeout for scraping metrics from NGINX or NGINX Plus. (default 5s)
77+
A timeout for scraping metrics from NGINX or NGINX Plus. The default value can be overwritten by TIMEOUT environment variable. (default 5s)
7478
-web.listen-address string
7579
An address to listen on for web interface and telemetry. The default value can be overwritten by LISTEN_ADDRESS environment variable. (default ":9113")
7680
-web.telemetry-path string
@@ -84,7 +88,7 @@ Usage of ./nginx-prometheus-exporter:
8488
* For NGINX, the following metrics are exported:
8589
* All [stub_status](http://nginx.org/en/docs/http/ngx_http_stub_status_module.html) metrics.
8690
* `nginx_up` -- shows the status of the last metric scrape: `1` for a successful scrape and `0` for a failed one.
87-
91+
8892
Connect to the `/metrics` page of the running exporter to see the complete list of metrics along with their descriptions.
8993
* For NGINX Plus, the following metrics are exported:
9094
* [Connections](http://nginx.org/en/docs/http/ngx_http_api_module.html#def_nginx_connections).

client/nginx.go

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,8 @@ func NewNginxClient(httpClient *http.Client, apiEndpoint string) (*NginxClient,
3737
httpClient: httpClient,
3838
}
3939

40-
if _, err := client.GetStubStats(); err != nil {
41-
return nil, fmt.Errorf("Failed to create NginxClient: %v", err)
42-
}
43-
44-
return client, nil
40+
_, err := client.GetStubStats()
41+
return client, err
4542
}
4643

4744
// GetStubStats fetches the stub_status metrics.

exporter.go

Lines changed: 87 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ import (
66
"log"
77
"net/http"
88
"os"
9+
"os/signal"
910
"strconv"
11+
"syscall"
1012
"time"
1113

1214
plusclient "github.com/nginxinc/nginx-plus-go-sdk/client"
@@ -24,43 +26,100 @@ func getEnv(key, defaultValue string) string {
2426
return value
2527
}
2628

29+
func getEnvInt(key string, defaultValue int) int {
30+
value, ok := os.LookupEnv(key)
31+
if !ok {
32+
return defaultValue
33+
}
34+
i, err := strconv.ParseInt(value, 10, 64)
35+
if err != nil {
36+
log.Fatalf("Environment variable value for %s must be an int: %v", key, err)
37+
}
38+
return int(i)
39+
}
40+
2741
func getEnvBool(key string, defaultValue bool) bool {
2842
value, ok := os.LookupEnv(key)
2943
if !ok {
3044
return defaultValue
3145
}
3246
b, err := strconv.ParseBool(value)
3347
if err != nil {
34-
log.Fatalf("Environment Variable value for %s must be a boolean", key)
48+
log.Fatalf("Environment variable value for %s must be a boolean: %v", key, err)
3549
}
3650
return b
3751
}
3852

53+
func getEnvDuration(key string, defaultValue time.Duration) time.Duration {
54+
value, ok := os.LookupEnv(key)
55+
if !ok {
56+
return defaultValue
57+
}
58+
d, err := time.ParseDuration(value)
59+
if err != nil {
60+
log.Fatalf("Environment variable value for %s must be a duration: %v", key, err)
61+
}
62+
return d
63+
}
64+
65+
func createClientWithRetries(getClient func() (interface{}, error), retries int, retryInterval time.Duration) (interface{}, error) {
66+
var err error
67+
var nginxClient interface{}
68+
69+
for i := retries; i >= 0; i-- {
70+
nginxClient, err = getClient()
71+
if err == nil {
72+
return nginxClient, nil
73+
}
74+
if i > 0 {
75+
log.Printf("Could not create Nginx Client. Retrying in %v...", retryInterval)
76+
time.Sleep(retryInterval)
77+
}
78+
}
79+
return nil, err
80+
}
81+
3982
var (
4083
// Set during go build
4184
version string
4285
gitCommit string
4386

4487
// Defaults values
45-
defaultListenAddress = getEnv("LISTEN_ADDRESS", ":9113")
46-
defaultMetricsPath = getEnv("TELEMETRY_PATH", "/metrics")
47-
defaultNginxPlus = getEnvBool("NGINX_PLUS", false)
48-
defaultScrapeURI = getEnv("SCRAPE_URI", "http://127.0.0.1:8080/stub_status")
49-
defaultSslVerify = getEnvBool("SSL_VERIFY", true)
88+
defaultListenAddress = getEnv("LISTEN_ADDRESS", ":9113")
89+
defaultMetricsPath = getEnv("TELEMETRY_PATH", "/metrics")
90+
defaultNginxPlus = getEnvBool("NGINX_PLUS", false)
91+
defaultScrapeURI = getEnv("SCRAPE_URI", "http://127.0.0.1:8080/stub_status")
92+
defaultSslVerify = getEnvBool("SSL_VERIFY", true)
93+
defaultTimeout = getEnvDuration("TIMEOUT", time.Second*5)
94+
defaultNginxRetries = getEnvInt("NGINX_RETRIES", 0)
95+
defaultNginxRetryInterval = getEnvDuration("NGINX_RETRY_INTERVAL", time.Second*5)
5096

5197
// Command-line flags
52-
listenAddr = flag.String("web.listen-address", defaultListenAddress,
98+
listenAddr = flag.String("web.listen-address",
99+
defaultListenAddress,
53100
"An address to listen on for web interface and telemetry. The default value can be overwritten by LISTEN_ADDRESS environment variable.")
54-
metricsPath = flag.String("web.telemetry-path", defaultMetricsPath,
101+
metricsPath = flag.String("web.telemetry-path",
102+
defaultMetricsPath,
55103
"A path under which to expose metrics. The default value can be overwritten by TELEMETRY_PATH environment variable.")
56-
nginxPlus = flag.Bool("nginx.plus", defaultNginxPlus,
104+
nginxPlus = flag.Bool("nginx.plus",
105+
defaultNginxPlus,
57106
"Start the exporter for NGINX Plus. By default, the exporter is started for NGINX. The default value can be overwritten by NGINX_PLUS environment variable.")
58-
scrapeURI = flag.String("nginx.scrape-uri", defaultScrapeURI,
107+
scrapeURI = flag.String("nginx.scrape-uri",
108+
defaultScrapeURI,
59109
`A URI for scraping NGINX or NGINX Plus metrics.
60110
For NGINX, the stub_status page must be available through the URI. For NGINX Plus -- the API. The default value can be overwritten by SCRAPE_URI environment variable.`)
61-
sslVerify = flag.Bool("nginx.ssl-verify", defaultSslVerify,
111+
sslVerify = flag.Bool("nginx.ssl-verify",
112+
defaultSslVerify,
62113
"Perform SSL certificate verification. The default value can be overwritten by SSL_VERIFY environment variable.")
63-
timeout = flag.Duration("nginx.timeout", 5*time.Second, "A timeout for scraping metrics from NGINX or NGINX Plus.")
114+
timeout = flag.Duration("nginx.timeout",
115+
defaultTimeout,
116+
"A timeout for scraping metrics from NGINX or NGINX Plus. The default value can be overwritten by TIMEOUT environment variable.")
117+
nginxRetries = flag.Int("nginx.retries",
118+
defaultNginxRetries,
119+
"A number of retries the exporter will make on start to connect to the NGINX stub_status page/NGINX Plus API before exiting with an error. The default value can be overwritten by NGINX_RETRIES environment variable.")
120+
nginxRetryInterval = flag.Duration("nginx.retry-interval",
121+
defaultNginxRetryInterval,
122+
"An interval between retries to connect to the NGINX stub_status page/NGINX Plus API on start. The default value can be overwritten by NGINX_RETRY_INTERVAL environment variable.")
64123
)
65124

66125
func main() {
@@ -91,22 +150,30 @@ func main() {
91150
},
92151
}
93152

153+
signalChan := make(chan os.Signal, 1)
154+
signal.Notify(signalChan, syscall.SIGTERM)
155+
go func() {
156+
log.Printf("SIGTERM received: %v. Exiting...", <-signalChan)
157+
os.Exit(0)
158+
}()
159+
94160
if *nginxPlus {
95-
client, err := plusclient.NewNginxClient(httpClient, *scrapeURI)
161+
plusClient, err := createClientWithRetries(func() (interface{}, error) {
162+
return plusclient.NewNginxClient(httpClient, *scrapeURI)
163+
}, *nginxRetries, *nginxRetryInterval)
96164
if err != nil {
97165
log.Fatalf("Could not create Nginx Plus Client: %v", err)
98166
}
99-
100-
registry.MustRegister(collector.NewNginxPlusCollector(client, "nginxplus"))
167+
registry.MustRegister(collector.NewNginxPlusCollector(plusClient.(*plusclient.NginxClient), "nginxplus"))
101168
} else {
102-
client, err := client.NewNginxClient(httpClient, *scrapeURI)
169+
ossClient, err := createClientWithRetries(func() (interface{}, error) {
170+
return client.NewNginxClient(httpClient, *scrapeURI)
171+
}, *nginxRetries, *nginxRetryInterval)
103172
if err != nil {
104173
log.Fatalf("Could not create Nginx Client: %v", err)
105174
}
106-
107-
registry.MustRegister(collector.NewNginxCollector(client, "nginx"))
175+
registry.MustRegister(collector.NewNginxCollector(ossClient.(*client.NginxClient), "nginx"))
108176
}
109-
110177
http.Handle(*metricsPath, promhttp.HandlerFor(registry, promhttp.HandlerOpts{}))
111178
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
112179
w.Write([]byte(`<html>
@@ -117,5 +184,6 @@ func main() {
117184
</body>
118185
</html>`))
119186
})
187+
log.Printf("NGINX Prometheus Exporter has successfully started")
120188
log.Fatal(http.ListenAndServe(*listenAddr, nil))
121189
}

exporter_test.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
package main
2+
3+
import (
4+
"errors"
5+
"reflect"
6+
"testing"
7+
"time"
8+
)
9+
10+
func TestCreateClientWithRetries(t *testing.T) {
11+
type args struct {
12+
client interface{}
13+
err error
14+
retries int
15+
retryInterval time.Duration
16+
}
17+
18+
tests := []struct {
19+
name string
20+
args args
21+
expectedRetries int
22+
want interface{}
23+
wantErr bool
24+
}{
25+
{
26+
name: "getClient returns a valid client",
27+
args: args{
28+
client: "client",
29+
err: nil,
30+
},
31+
expectedRetries: 0,
32+
want: "client",
33+
wantErr: false,
34+
},
35+
{
36+
name: "getClient returns an error after no retries",
37+
args: args{
38+
client: nil,
39+
err: errors.New("error"),
40+
},
41+
expectedRetries: 0,
42+
want: nil,
43+
wantErr: true,
44+
},
45+
{
46+
name: "getClient returns an error after retries",
47+
args: args{
48+
client: nil,
49+
err: errors.New("error"),
50+
retries: 3,
51+
retryInterval: time.Millisecond * 1,
52+
},
53+
expectedRetries: 3,
54+
want: nil,
55+
wantErr: true,
56+
},
57+
}
58+
for _, tt := range tests {
59+
t.Run(tt.name, func(t *testing.T) {
60+
invocations := 0
61+
getClient := func() (interface{}, error) {
62+
invocations++
63+
return tt.args.client, tt.args.err
64+
}
65+
66+
got, err := createClientWithRetries(getClient, tt.args.retries, tt.args.retryInterval)
67+
68+
actualRetries := invocations - 1
69+
70+
if actualRetries != tt.expectedRetries {
71+
t.Errorf("createClientWithRetries() got %v retries, expected %v", actualRetries, tt.expectedRetries)
72+
return
73+
} else if (err != nil) != tt.wantErr {
74+
t.Errorf("createClientWithRetries() error = %v, wantErr %v", err, tt.wantErr)
75+
return
76+
} else if err != nil && tt.wantErr {
77+
return
78+
} else if !reflect.DeepEqual(got, tt.want) {
79+
t.Errorf("createClientWithRetries() = %v, want %v", got, tt.want)
80+
}
81+
})
82+
}
83+
}

0 commit comments

Comments
 (0)