Skip to content

Commit 99bedbe

Browse files
committed
[docker_daemon] adding healthcheck service checks.
[docker_daemon] set proper tagging and SC name for healthcheck. [docker_daemon] put healtcheck sc behind feature flag - it requires inspect. [ci] fixing docker_daemon CI. [ci] fixing old setuptools setup_env issues. Bumping to newest version. [docker_daemon] adding test for container inspeciton logic. [docker_daemon] amending config yaml.
1 parent 3944053 commit 99bedbe

File tree

4 files changed

+56
-5
lines changed

4 files changed

+56
-5
lines changed

checks.d/docker_daemon.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
EVENT_TYPE = 'docker'
2525
SERVICE_CHECK_NAME = 'docker.service_up'
26+
HEALTHCHECK_SERVICE_CHECK_NAME = 'docker.container_health'
2627
SIZE_REFRESH_RATE = 5 # Collect container sizes every 5 iterations of the check
2728
MAX_CGROUP_LISTING_RETRIES = 3
2829
CONTAINER_ID_RE = re.compile('[0-9a-f]{64}')
@@ -250,8 +251,11 @@ def check(self, instance):
250251
# containers running with custom cgroups?
251252
custom_cgroups = _is_affirmative(instance.get('custom_cgroups', False))
252253

254+
# submit healtcheck service checks?
255+
health_service_checks = _is_affirmative(instance.get('health_service_checks', False))
256+
253257
# Get the list of containers and the index of their names
254-
containers_by_id = self._get_and_count_containers(custom_cgroups)
258+
containers_by_id = self._get_and_count_containers(custom_cgroups, health_service_checks)
255259
containers_by_id = self._crawl_container_pids(containers_by_id, custom_cgroups)
256260

257261
# Send events from Docker API
@@ -268,6 +272,10 @@ def check(self, instance):
268272
if self.collect_disk_stats:
269273
self._report_disk_stats()
270274

275+
# Report docker healthcheck SC's where available
276+
if health_service_checks:
277+
self._send_container_healthcheck_sc(containers_by_id)
278+
271279
def _count_and_weigh_images(self):
272280
try:
273281
tags = self._get_tags()
@@ -284,7 +292,7 @@ def _count_and_weigh_images(self):
284292
# It's not an important metric, keep going if it fails
285293
self.warning("Failed to count Docker images. Exception: {0}".format(e))
286294

287-
def _get_and_count_containers(self, custom_cgroups=False):
295+
def _get_and_count_containers(self, custom_cgroups=False, healthchecks=False):
288296
"""List all the containers from the API, filter and count them."""
289297

290298
# Querying the size of containers is slow, we don't do it at each run
@@ -328,10 +336,11 @@ def _get_and_count_containers(self, custom_cgroups=False):
328336

329337
# grab pid via API if custom cgroups - otherwise we won't find process when
330338
# crawling for pids.
331-
if custom_cgroups:
339+
if custom_cgroups or healthchecks:
332340
try:
333341
inspect_dict = self.docker_client.inspect_container(container_name)
334342
container['_pid'] = inspect_dict['State']['Pid']
343+
container['health'] = inspect_dict['State'].get('Health', {})
335344
except Exception as e:
336345
self.log.debug("Unable to inspect Docker container: %s", e)
337346

@@ -512,6 +521,20 @@ def _report_container_size(self, containers_by_id):
512521
self, 'docker.container.size_rootfs', container['SizeRootFs'],
513522
tags=tags)
514523

524+
def _send_container_healthcheck_sc(self, containers_by_id):
525+
for container in containers_by_id.itervalues():
526+
health = container.get('health', {})
527+
tags = self._get_tags(container, CONTAINER)
528+
status = AgentCheck.UNKNOWN
529+
if health:
530+
_health = health.get('Status', '')
531+
if _health == 'unhealthy':
532+
status = AgentCheck.CRITICAL
533+
elif _health == 'healthy':
534+
status = AgentCheck.OK
535+
536+
self.service_check(HEALTHCHECK_SERVICE_CHECK_NAME, status, tags=tags)
537+
515538
def _report_image_size(self, images):
516539
for image in images:
517540
tags = self._get_tags(image, IMAGE)

ci/docker_daemon.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
namespace :docker_daemon do |flavor|
99
task before_install: ['ci:common:before_install']
1010

11+
task install: ['ci:common:install']
12+
1113
task before_script: ['ci:common:before_script']
1214

1315
task script: ['ci:common:script'] do

conf.d/docker_daemon.yaml.example

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,18 @@ instances:
4747

4848
# Do you use custom cgroups for this particular instance?
4949
# Note: enabling this option modifies the way in which we inspect the containers and causes
50-
# some overhead - if you run a high volume of containers we may timeout. Please only
51-
# enable if absolutely necessary.
50+
# some overhead - if you run a high volume of containers we may timeout.
51+
#
5252
# custom_cgroups: false
5353

54+
# Report docker container healthcheck events as service checks
55+
# Note: enabling this option modifies the way in which we inspect the containers and causes
56+
# some overhead - if you run a high volume of containers we may timeout.
57+
# Container Healthchecks are available starting with docker 1.12, enabling with older
58+
# versions will result in an UNKNOWN state for the service check.
59+
#
60+
# health_service_checks: false
61+
5462
# Collect images stats
5563
# Number of available active images and intermediate images as gauges.
5664
# Defaults to false.

tests/checks/integration/test_docker_daemon.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,11 +529,29 @@ def test_events(self):
529529
},
530530
],
531531
}
532+
532533
DockerUtil().set_docker_settings(config['init_config'], config['instances'][0])
533534

534535
self.run_check(config, force_reload=True)
535536
self.assertEqual(len(self.events), 2)
536537

538+
def test_healthcheck(self):
539+
config = {
540+
"init_config": {},
541+
"instances": [{
542+
"url": "unix://var/run/docker.sock",
543+
"collect_images_stats": True,
544+
"health_service_checks": True,
545+
},
546+
],
547+
}
548+
549+
DockerUtil().set_docker_settings(config['init_config'], config['instances'][0])
550+
551+
self.run_check(config, force_reload=True)
552+
self.assertServiceCheck('docker.container_health', at_least=2)
553+
554+
537555
def test_container_size(self):
538556
expected_metrics = [
539557
('docker.containers.running', ['docker_image:nginx:latest', 'image_name:nginx', 'image_tag:latest']),

0 commit comments

Comments
 (0)