Skip to content

Commit b192be7

Browse files
authored
feat: add scrapte duration and error metrics (#48)
1 parent 3631b97 commit b192be7

6 files changed

Lines changed: 321 additions & 48 deletions

File tree

README.md

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -49,36 +49,38 @@ scrape_configs:
4949
5050
### Config
5151
52-
| Flag | Description | Default Value | Environment Variable |
53-
| ---------------- | -------------------------------------------------------------------------------------------------- | ------------------------ | ------------------------------ |
54-
| `--port` | The port of docker exporter server. | `8080` | `DOCKER_EXPORTER_PORT` |
55-
| `--host` | The host of docker exporter server. | | `DOCKER_EXPORTER_HOST` |
56-
| `--auth-token` | Optional auth token for the docker exporter server. If no token is set authentication is disabled. | | `DOCKER_EXPORTER_AUTH_TOKEN` |
57-
| `--log-level` | Log level for the exporter. | `info` | `DOCKER_EXPORTER_LOG_LEVEL` |
58-
| `--ignore-label` | Set the label name for ignoring docker containers. (See [Ignoring Containers](#ignoring-containers)) | `docker-exporter.ignore` | `DOCKER_EXPORTER_IGNORE_LABEL` |
52+
| Flag | Description | Default Value | Environment Variable |
53+
| ---------------- | ---------------------------------------------------------------------------------------------------- | ------------------------ | ------------------------------ |
54+
| `--port` | The port of docker exporter server. | `8080` | `DOCKER_EXPORTER_PORT` |
55+
| `--host` | The host of docker exporter server. | | `DOCKER_EXPORTER_HOST` |
56+
| `--auth-token` | Optional auth token for the docker exporter server. If no token is set authentication is disabled. | | `DOCKER_EXPORTER_AUTH_TOKEN` |
57+
| `--log-level` | Log level for the exporter. | `info` | `DOCKER_EXPORTER_LOG_LEVEL` |
58+
| `--ignore-label` | Set the label name for ignoring docker containers. (See [Ignoring Containers](#ignoring-containers)) | `docker-exporter.ignore` | `DOCKER_EXPORTER_IGNORE_LABEL` |
5959

6060
### Exported Metrics
6161

62-
| Metric Name | Description | Labels |
63-
| ------------------------------------------- | ------------------------------ | ----------------------- |
64-
| docker_container_block_io_read_bytes | Block I/O read bytes total | name |
65-
| docker_container_block_io_write_bytes | Block I/O write bytes total | name |
66-
| docker_container_cpu_usage_percentage | CPU usage in percentage | name |
67-
| docker_container_info | Infos about the container | name, image_name, image |
68-
| docker_container_memory_total_bytes | Total memory in bytes | name |
69-
| docker_container_memory_usage_bytes | Memory usage in bytes | name |
70-
| docker_container_memory_usage_percentage | Memory usage in percentage | name |
71-
| docker_container_network_rx_bytes | Network received bytes total | name, network |
72-
| docker_container_network_rx_dropped_packets | Network dropped packets total | name, network |
73-
| docker_container_network_rx_errors | Network received errors | name, network |
74-
| docker_container_network_rx_packets | Network received packets total | name, network |
75-
| docker_container_network_tx_bytes | Network sent bytes total | name, network |
76-
| docker_container_network_tx_dropped_packets | Network dropped packets total | name, network |
77-
| docker_container_network_tx_errors | Network sent errors | name, network |
78-
| docker_container_network_tx_packets | Network sent packets total | name, network |
79-
| docker_container_pids_current | Current number of pids | name |
80-
| docker_container_state | State of the container | name, state |
81-
| docker_container_uptime | Uptime of the container | name |
62+
| Metric Name | Description | Labels |
63+
| ------------------------------------------- | ---------------------------------- | ----------------------- |
64+
| docker_container_block_io_read_bytes | Block I/O read bytes total | name |
65+
| docker_container_block_io_write_bytes | Block I/O write bytes total | name |
66+
| docker_container_cpu_usage_percentage | CPU usage in percentage | name |
67+
| docker_container_info | Infos about the container | name, image_name, image |
68+
| docker_container_memory_total_bytes | Total memory in bytes | name |
69+
| docker_container_memory_usage_bytes | Memory usage in bytes | name |
70+
| docker_container_memory_usage_percentage | Memory usage in percentage | name |
71+
| docker_container_network_rx_bytes | Network received bytes total | name, network |
72+
| docker_container_network_rx_dropped_packets | Network dropped packets total | name, network |
73+
| docker_container_network_rx_errors | Network received errors | name, network |
74+
| docker_container_network_rx_packets | Network received packets total | name, network |
75+
| docker_container_network_tx_bytes | Network sent bytes total | name, network |
76+
| docker_container_network_tx_dropped_packets | Network dropped packets total | name, network |
77+
| docker_container_network_tx_errors | Network sent errors | name, network |
78+
| docker_container_network_tx_packets | Network sent packets total | name, network |
79+
| docker_container_pids_current | Current number of pids | name |
80+
| docker_container_state | State of the container | name, state |
81+
| docker_container_uptime | Uptime of the container in seconds | name |
82+
| docker_exporter_scrape_duration | Duration of the scrape in seconds | |
83+
| docker_exporter_scrape_errors | Number of scrape errors | |
8284

8385
### Ignoring Containers
8486

internal/clock/clock.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import "time"
77
type Clock interface {
88
Parse(layout, value string) (time.Time, error)
99
Since(t time.Time) time.Duration
10+
Now() time.Time
1011
}
1112

1213
type realClock struct{}
@@ -22,3 +23,7 @@ func (realClock) Parse(layout, value string) (time.Time, error) {
2223
func (realClock) Since(t time.Time) time.Duration {
2324
return time.Since(t)
2425
}
26+
27+
func (realClock) Now() time.Time {
28+
return time.Now()
29+
}

internal/collector/collector.go

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ func NewWithClient(client *client.Client, clk clock.Clock, ignoreLabel string) *
4242
func (c *DockerCollector) Describe(_ chan<- *prometheus.Desc) {}
4343

4444
func (c *DockerCollector) Collect(ch chan<- prometheus.Metric) {
45+
now := c.clock.Now()
46+
4547
ctx := context.Background()
4648

4749
containers, err := c.client.ContainerList(
@@ -54,17 +56,23 @@ func (c *DockerCollector) Collect(ch chan<- prometheus.Metric) {
5456
if err != nil {
5557
log.WithError(err).
5658
Error("failed to fetch container list")
57-
return
58-
}
59+
c.collectScrapeError(ch)
5960

60-
var wg sync.WaitGroup
61+
} else {
62+
var wg sync.WaitGroup
63+
64+
for _, container := range containers {
65+
wg.Add(1)
66+
go c.collectContainerMetrics(ctx, container, ch, &wg)
67+
}
6168

62-
for _, container := range containers {
63-
wg.Add(1)
64-
go c.collectContainerMetrics(ctx, container, ch, &wg)
69+
wg.Wait()
6570
}
6671

67-
wg.Wait()
72+
ch <- prometheus.MustNewConstMetric(scrapeDuration,
73+
prometheus.GaugeValue,
74+
c.clock.Since(now).Seconds(),
75+
)
6876
}
6977

7078
func (c *DockerCollector) collectContainerMetrics(ctx context.Context, container types.Container, ch chan<- prometheus.Metric, wg *sync.WaitGroup) {
@@ -79,6 +87,7 @@ func (c *DockerCollector) collectContainerMetrics(ctx context.Context, container
7987
if err != nil {
8088
log.WithError(err).WithField("id", container.ID).
8189
Error("error inspecting container")
90+
c.collectScrapeError(ch)
8291
return
8392
}
8493

@@ -108,6 +117,7 @@ func (c *DockerCollector) collectContainerMetrics(ctx context.Context, container
108117
if err != nil {
109118
log.WithError(err).WithField("id", container.ID).
110119
Error("error getting stats for container")
120+
c.collectScrapeError(ch)
111121
return
112122
}
113123

@@ -296,6 +306,10 @@ func (c *DockerCollector) isContainerIgnored(container types.Container) bool {
296306
return b
297307
}
298308

309+
func (c *DockerCollector) collectScrapeError(ch chan<- prometheus.Metric) {
310+
ch <- prometheus.MustNewConstMetric(scrapeErrors, prometheus.CounterValue, 1)
311+
}
312+
299313
func calculateMemUsageUnixNoCache(mem types.MemoryStats) float64 {
300314
if v, isCgroup1 := mem.Stats["total_inactive_file"]; isCgroup1 && v < mem.Usage {
301315
return float64(mem.Usage - v)

internal/collector/collector_test.go

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,18 +36,30 @@ func TestCollectMetrics(t *testing.T) {
3636
}
3737

3838
mockClock := mock.NewMockClock(ctrl)
39+
mockClock.EXPECT().
40+
Now().
41+
Return(time.Now()).
42+
Times(1)
43+
3944
mockClock.EXPECT().
4045
Parse(gomock.Any(), gomock.Any()).
4146
DoAndReturn(func(s1, s2 string) (time.Time, error) {
4247
return time.Parse(s1, s2)
4348
}).
4449
Times(1)
4550

51+
// The first call to Since is for the uptime of the container
4652
mockClock.EXPECT().
4753
Since(gomock.Any()).
4854
Return(1 * time.Second).
4955
Times(1)
5056

57+
// The second call to Since is for the scrape duration
58+
mockClock.EXPECT().
59+
Since(gomock.Any()).
60+
Return(2 * time.Second).
61+
Times(1)
62+
5163
dc := collector.NewWithClient(cli, mockClock, ignoreLabel)
5264

5365
const expected = `
@@ -108,6 +120,161 @@ func TestCollectMetrics(t *testing.T) {
108120
# HELP docker_container_uptime Uptime of the container in seconds
109121
# TYPE docker_container_uptime gauge
110122
docker_container_uptime{name="testName"} 1.0
123+
# HELP docker_exporter_scrape_duration Duration of the scrape in seconds
124+
# TYPE docker_exporter_scrape_duration gauge
125+
docker_exporter_scrape_duration 2
126+
`
127+
128+
if err := testutil.CollectAndCompare(dc, strings.NewReader(expected)); err != nil {
129+
t.Errorf("unexpected collecting result:\n%s", err)
130+
}
131+
}
132+
133+
func TestCollectMetricsShouldCollectErrorWhenContainerListFails(t *testing.T) {
134+
ctrl := gomock.NewController(t)
135+
defer ctrl.Finish()
136+
137+
srv := httptest.NewServer(http.HandlerFunc(mockErrorDockerApi))
138+
defer srv.Close()
139+
140+
cli, err := client.NewClientWithOpts(
141+
client.WithHost(srv.URL),
142+
client.WithHTTPClient(&http.Client{}),
143+
)
144+
145+
if err != nil {
146+
panic(err)
147+
}
148+
149+
mockClock := mock.NewMockClock(ctrl)
150+
mockClock.EXPECT().
151+
Now().
152+
Return(time.Now()).
153+
Times(1)
154+
155+
mockClock.EXPECT().
156+
Since(gomock.Any()).
157+
Return(2 * time.Second).
158+
Times(1)
159+
160+
dc := collector.NewWithClient(cli, mockClock, ignoreLabel)
161+
162+
const expected = `
163+
# HELP docker_exporter_scrape_errors Number of scrape errors
164+
# TYPE docker_exporter_scrape_errors counter
165+
docker_exporter_scrape_errors 1
166+
# HELP docker_exporter_scrape_duration Duration of the scrape in seconds
167+
# TYPE docker_exporter_scrape_duration gauge
168+
docker_exporter_scrape_duration 2
169+
`
170+
171+
if err := testutil.CollectAndCompare(dc, strings.NewReader(expected)); err != nil {
172+
t.Errorf("unexpected collecting result:\n%s", err)
173+
}
174+
}
175+
176+
func TestCollectMetricsShouldCollectErrorWhenContainerInspectFails(t *testing.T) {
177+
ctrl := gomock.NewController(t)
178+
defer ctrl.Finish()
179+
180+
srv := httptest.NewServer(http.HandlerFunc(mockContainerInspectErrorDockerApi))
181+
defer srv.Close()
182+
183+
cli, err := client.NewClientWithOpts(
184+
client.WithHost(srv.URL),
185+
client.WithHTTPClient(&http.Client{}),
186+
)
187+
188+
if err != nil {
189+
panic(err)
190+
}
191+
192+
mockClock := mock.NewMockClock(ctrl)
193+
mockClock.EXPECT().
194+
Now().
195+
Return(time.Now()).
196+
Times(1)
197+
198+
mockClock.EXPECT().
199+
Since(gomock.Any()).
200+
Return(2 * time.Second).
201+
Times(1)
202+
203+
dc := collector.NewWithClient(cli, mockClock, ignoreLabel)
204+
205+
const expected = `
206+
# HELP docker_exporter_scrape_errors Number of scrape errors
207+
# TYPE docker_exporter_scrape_errors counter
208+
docker_exporter_scrape_errors 1
209+
# HELP docker_exporter_scrape_duration Duration of the scrape in seconds
210+
# TYPE docker_exporter_scrape_duration gauge
211+
docker_exporter_scrape_duration 2
212+
`
213+
214+
if err := testutil.CollectAndCompare(dc, strings.NewReader(expected)); err != nil {
215+
t.Errorf("unexpected collecting result:\n%s", err)
216+
}
217+
}
218+
219+
func TestCollectMetricsShouldCollectErrorWhenContainerStatsFails(t *testing.T) {
220+
ctrl := gomock.NewController(t)
221+
defer ctrl.Finish()
222+
223+
srv := httptest.NewServer(http.HandlerFunc(mockContainerStatsErrorDockerApi))
224+
defer srv.Close()
225+
226+
cli, err := client.NewClientWithOpts(
227+
client.WithHost(srv.URL),
228+
client.WithHTTPClient(&http.Client{}),
229+
)
230+
231+
if err != nil {
232+
panic(err)
233+
}
234+
235+
mockClock := mock.NewMockClock(ctrl)
236+
mockClock.EXPECT().
237+
Now().
238+
Return(time.Now()).
239+
Times(1)
240+
241+
mockClock.EXPECT().
242+
Parse(gomock.Any(), gomock.Any()).
243+
DoAndReturn(func(s1, s2 string) (time.Time, error) {
244+
return time.Parse(s1, s2)
245+
}).
246+
Times(1)
247+
248+
// The first call to Since is for the uptime of the container
249+
mockClock.EXPECT().
250+
Since(gomock.Any()).
251+
Return(1 * time.Second).
252+
Times(1)
253+
254+
// The second call to Since is for the scrape duration
255+
mockClock.EXPECT().
256+
Since(gomock.Any()).
257+
Return(2 * time.Second).
258+
Times(1)
259+
260+
dc := collector.NewWithClient(cli, mockClock, ignoreLabel)
261+
262+
const expected = `
263+
# HELP docker_container_info Infos about the container
264+
# TYPE docker_container_info gauge
265+
docker_container_info{image="sha256:d3751d33f9cd5049c4af2b462735457e4d3baf130bcbb87f389e349fbaeb20b9",image_name="myImage",name="testName"} 1
266+
# HELP docker_container_state State of the container
267+
# TYPE docker_container_state gauge
268+
docker_container_state{name="testName",state="running"} 1
269+
# HELP docker_container_uptime Uptime of the container in seconds
270+
# TYPE docker_container_uptime gauge
271+
docker_container_uptime{name="testName"} 1
272+
# HELP docker_exporter_scrape_duration Duration of the scrape in seconds
273+
# TYPE docker_exporter_scrape_duration gauge
274+
docker_exporter_scrape_duration 2
275+
# HELP docker_exporter_scrape_errors Number of scrape errors
276+
# TYPE docker_exporter_scrape_errors counter
277+
docker_exporter_scrape_errors 1
111278
`
112279

113280
if err := testutil.CollectAndCompare(dc, strings.NewReader(expected)); err != nil {
@@ -226,3 +393,35 @@ func mockDockerApi(w http.ResponseWriter, r *http.Request) {
226393

227394
mockJsonResponse(w, r, buildContainerListResponse())
228395
}
396+
397+
func mockErrorDockerApi(w http.ResponseWriter, r *http.Request) {
398+
w.WriteHeader(http.StatusInternalServerError)
399+
}
400+
401+
func mockContainerInspectErrorDockerApi(w http.ResponseWriter, r *http.Request) {
402+
if strings.Contains(r.URL.Path, "stats") {
403+
mockJsonResponse(w, r, buildStatsResponse())
404+
return
405+
}
406+
407+
if strings.Contains(r.URL.Path, "testID") {
408+
w.WriteHeader(http.StatusInternalServerError)
409+
return
410+
}
411+
412+
mockJsonResponse(w, r, buildContainerListResponse())
413+
}
414+
415+
func mockContainerStatsErrorDockerApi(w http.ResponseWriter, r *http.Request) {
416+
if strings.Contains(r.URL.Path, "stats") {
417+
w.WriteHeader(http.StatusInternalServerError)
418+
return
419+
}
420+
421+
if strings.Contains(r.URL.Path, "testID") {
422+
mockJsonResponse(w, r, buildInspectResponse())
423+
return
424+
}
425+
426+
mockJsonResponse(w, r, buildContainerListResponse())
427+
}

internal/collector/metrics.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,20 @@ var (
2424
nil,
2525
)
2626

27+
scrapeDuration = prometheus.NewDesc(
28+
"docker_exporter_scrape_duration",
29+
"Duration of the scrape in seconds",
30+
nil,
31+
nil,
32+
)
33+
34+
scrapeErrors = prometheus.NewDesc(
35+
"docker_exporter_scrape_errors",
36+
"Number of scrape errors",
37+
nil,
38+
nil,
39+
)
40+
2741
/*
2842
CPU Metrics
2943
*/

0 commit comments

Comments
 (0)