Skip to content

Commit 5b90a21

Browse files
authored
Merge pull request #3077 from DataDog/parkr-docker-daemon-track-containers-volumes
[docker] container & volume metrics (wraps #2740)
2 parents 6e3514c + 6923524 commit 5b90a21

File tree

4 files changed

+71
-29
lines changed

4 files changed

+71
-29
lines changed

.travis.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ env:
3535
- TRAVIS_FLAVOR=cassandra FLAVOR_VERSION=2.0.13
3636
- TRAVIS_FLAVOR=cassandra FLAVOR_VERSION=2.1.3
3737
- TRAVIS_FLAVOR=couchdb
38+
# FIXME: cannot enable docker on Travis
39+
# because it needs docker and we run tests in a container
40+
# - TRAVIS_FLAVOR=docker_daemon
3841
- TRAVIS_FLAVOR=elasticsearch FLAVOR_VERSION=0.90.13
3942
- TRAVIS_FLAVOR=elasticsearch FLAVOR_VERSION=1.0.3
4043
- TRAVIS_FLAVOR=elasticsearch FLAVOR_VERSION=1.1.2

checks.d/docker_daemon.py

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -133,24 +133,6 @@ def compile_filter_rules(rules):
133133

134134
return patterns, tag_names
135135

136-
def get_filters(include, exclude):
137-
# The reasoning is to check exclude first, so we can skip if there is no exclude
138-
if not exclude:
139-
return
140-
141-
filtered_tag_names = []
142-
exclude_patterns = []
143-
include_patterns = []
144-
145-
# Compile regex
146-
exclude_patterns, tag_names = compile_filter_rules(exclude)
147-
filtered_tag_names.extend(tag_names)
148-
149-
include_patterns, tag_names = compile_filter_rules(include)
150-
filtered_tag_names.extend(tag_names)
151-
152-
return set(exclude_patterns), set(include_patterns), set(filtered_tag_names)
153-
154136

155137
class DockerDaemon(AgentCheck):
156138
"""Collect metrics and events from Docker API and cgroups."""
@@ -216,6 +198,8 @@ def init(self):
216198
# Other options
217199
self.collect_image_stats = _is_affirmative(instance.get('collect_images_stats', False))
218200
self.collect_container_size = _is_affirmative(instance.get('collect_container_size', False))
201+
self.collect_container_count = _is_affirmative(instance.get('collect_container_count', False))
202+
self.collect_volume_count = _is_affirmative(instance.get('collect_volume_count', False))
219203
self.collect_events = _is_affirmative(instance.get('collect_events', True))
220204
self.collect_image_size = _is_affirmative(instance.get('collect_image_size', False))
221205
self.collect_disk_stats = _is_affirmative(instance.get('collect_disk_stats', False))
@@ -271,6 +255,12 @@ def check(self, instance):
271255
if self.collect_container_size:
272256
self._report_container_size(containers_by_id)
273257

258+
if self.collect_container_count:
259+
self._report_container_count(containers_by_id)
260+
261+
if self.collect_volume_count:
262+
self._report_volume_count()
263+
274264
# Collect disk stats from Docker info command
275265
if self.collect_disk_stats:
276266
self._report_disk_stats()
@@ -347,7 +337,7 @@ def _get_and_count_containers(self, custom_cgroups=False, healthchecks=False):
347337
except Exception as e:
348338
self.log.debug("Unable to inspect Docker container: %s", e)
349339

350-
340+
# TODO: deprecate these 2, they should be replaced by _report_container_count
351341
for tags, count in running_containers_count.iteritems():
352342
self.gauge("docker.containers.running", count, tags=list(tags))
353343

@@ -503,7 +493,6 @@ def _report_container_size(self, containers_by_id):
503493
tags = self._get_tags(container, PERFORMANCE)
504494
m_func = FUNC_MAP[GAUGE][self.use_histogram]
505495
if "SizeRw" in container:
506-
507496
m_func(self, 'docker.container.size_rw', container['SizeRw'],
508497
tags=tags)
509498
if "SizeRootFs" in container:
@@ -540,6 +529,33 @@ def _submit_healthcheck_sc(self, container):
540529
tags = self._get_tags(container, CONTAINER)
541530
self.service_check(HEALTHCHECK_SERVICE_CHECK_NAME, status, tags=tags)
542531

532+
def _report_container_count(self, containers_by_id):
533+
"""Report container count per state"""
534+
m_func = FUNC_MAP[GAUGE][self.use_histogram]
535+
536+
per_state_count = defaultdict(int)
537+
538+
filterlambda = lambda ctr: not self._is_container_excluded(ctr)
539+
containers = list(filter(filterlambda, containers_by_id.values()))
540+
541+
for ctr in containers:
542+
per_state_count[ctr.get('State', '')] += 1
543+
544+
for state in per_state_count:
545+
if state:
546+
m_func(self, 'docker.container.count', per_state_count[state], tags=['container_state:%s' % state.lower()])
547+
548+
def _report_volume_count(self):
549+
"""Report volume count per state (dangling or not)"""
550+
m_func = FUNC_MAP[GAUGE][self.use_histogram]
551+
552+
attached_volumes = self.docker_client.volumes(filters={'dangling': False})
553+
dangling_volumes = self.docker_client.volumes(filters={'dangling': True})
554+
attached_count = len(attached_volumes['Volumes'])
555+
dangling_count = len(dangling_volumes['Volumes'])
556+
m_func(self, 'docker.volume.count', attached_count, tags=['volume_state:attached'])
557+
m_func(self, 'docker.volume.count', dangling_count, tags=['volume_state:dangling'])
558+
543559
def _report_image_size(self, images):
544560
for image in images:
545561
tags = self._get_tags(image, IMAGE)
@@ -558,6 +574,7 @@ def _report_performance_metrics(self, containers_by_id):
558574
continue
559575

560576
tags = self._get_tags(container, PERFORMANCE)
577+
561578
self._report_cgroup_metrics(container, tags)
562579
if "_proc_root" not in container:
563580
containers_without_proc_root.append(DockerUtil.container_name_extractor(container)[0])

conf.d/docker_daemon.yaml.example

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ instances:
4343
# ensure that `docker ps -a -q` run fast before enabling it.
4444
# Defaults to false.
4545
#
46-
# collect_container_size: false
46+
# collect_container_size: true
4747

4848
# Do you use custom cgroups for this particular instance?
4949
# Note: enabling this option modifies the way in which we inspect the containers and causes
@@ -63,18 +63,28 @@ instances:
6363
#
6464
# health_service_check_whitelist: []
6565

66+
# Collect the container count tagged by state (running, paused, exited, dead)
67+
# Defaults to false.
68+
#
69+
# collect_container_count: true
70+
71+
# Collect the volume count for attached and dangling volumes.
72+
# Defaults to false.
73+
#
74+
# collect_volume_count: true
75+
6676
# Collect images stats
6777
# Number of available active images and intermediate images as gauges.
6878
# Defaults to false.
6979
#
70-
# collect_images_stats: false
80+
# collect_images_stats: true
7181

7282
# Collect disk usage per image with docker.image.size and docker.image.virtual_size metrics.
7383
# The check gets this size with the `docker images` command.
7484
# Requires collect_images_stats to be enabled.
7585
# Defaults to false.
7686
#
77-
# collect_image_size: false
87+
# collect_image_size: true
7888

7989
# Collect disk metrics (total, used, free) through the docker info command for data and metadata.
8090
# This is useful when these values can't be obtained by the disk check.

tests/checks/integration/test_docker_daemon.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,8 @@ def test_exclude_filter(self):
291291
},
292292
],
293293
}
294-
DockerUtil().set_docker_settings(config['init_config'], config['instances'][0])
294+
DockerUtil._drop()
295+
DockerUtil(init_config=config['init_config'], instance=config['instances'][0])
295296

296297
self.run_check_twice(config, force_reload=True)
297298

@@ -346,7 +347,8 @@ def test_include_filter(self):
346347
},
347348
],
348349
}
349-
DockerUtil().set_docker_settings(config['init_config'], config['instances'][0])
350+
DockerUtil._drop()
351+
DockerUtil(init_config=config['init_config'], instance=config['instances'][0])
350352

351353
self.run_check_twice(config, force_reload=True)
352354

@@ -408,7 +410,8 @@ def test_tags_options(self):
408410
},
409411
],
410412
}
411-
DockerUtil().set_docker_settings(config['init_config'], config['instances'][0])
413+
DockerUtil._drop()
414+
DockerUtil(init_config=config['init_config'], instance=config['instances'][0])
412415

413416
self.run_check_twice(config, force_reload=True)
414417
for mname, tags in expected_metrics:
@@ -465,10 +468,17 @@ def test_labels_collection(self):
465468
"collect_labels_as_tags": ["label1"],
466469
"collect_image_size": True,
467470
"collect_images_stats": True,
471+
"collect_container_count": True,
472+
"collect_dead_container_count": True,
473+
"collect_exited_container_count": True,
474+
"collect_volume_count": True,
475+
"collect_dangling_volume_count": True,
468476
},
469477
],
470478
}
471-
DockerUtil().set_docker_settings(config['init_config'], config['instances'][0])
479+
DockerUtil._drop()
480+
DockerUtil(init_config=config['init_config'], instance=config['instances'][0])
481+
472482
self.run_check(config, force_reload=True)
473483
for mname, tags in expected_metrics:
474484
self.assertMetric(mname, tags=tags, count=1, at_least=1)
@@ -509,7 +519,8 @@ def test_histogram(self):
509519
},
510520
],
511521
}
512-
DockerUtil().set_docker_settings(config['init_config'], config['instances'][0])
522+
DockerUtil._drop()
523+
DockerUtil(init_config=config['init_config'], instance=config['instances'][0])
513524

514525
self.run_check(config, force_reload=True)
515526
for mname, tags in expected_metrics:
@@ -558,7 +569,8 @@ def test_healthcheck(self):
558569
],
559570
}
560571

561-
DockerUtil().set_docker_settings(config['init_config'], config['instances'][0])
572+
DockerUtil._drop()
573+
DockerUtil(init_config=config['init_config'], instance=config['instances'][0])
562574

563575
self.run_check(config, force_reload=True)
564576
self.assertServiceCheck('docker.container_health', count=0)

0 commit comments

Comments
 (0)