Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ env:
- TRAVIS_FLAVOR=cassandra FLAVOR_VERSION=2.0.13
- TRAVIS_FLAVOR=cassandra FLAVOR_VERSION=2.1.3
- TRAVIS_FLAVOR=couchdb
# FIXME: cannot enable docker on Travis
# because it needs docker and we run tests in a container
# - TRAVIS_FLAVOR=docker_daemon
- TRAVIS_FLAVOR=elasticsearch FLAVOR_VERSION=0.90.13
- TRAVIS_FLAVOR=elasticsearch FLAVOR_VERSION=1.0.3
- TRAVIS_FLAVOR=elasticsearch FLAVOR_VERSION=1.1.2
Expand Down
57 changes: 37 additions & 20 deletions checks.d/docker_daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,24 +133,6 @@ def compile_filter_rules(rules):

return patterns, tag_names

def get_filters(include, exclude):
# The reasoning is to check exclude first, so we can skip if there is no exclude
if not exclude:
return

filtered_tag_names = []
exclude_patterns = []
include_patterns = []

# Compile regex
exclude_patterns, tag_names = compile_filter_rules(exclude)
filtered_tag_names.extend(tag_names)

include_patterns, tag_names = compile_filter_rules(include)
filtered_tag_names.extend(tag_names)

return set(exclude_patterns), set(include_patterns), set(filtered_tag_names)


class DockerDaemon(AgentCheck):
"""Collect metrics and events from Docker API and cgroups."""
Expand Down Expand Up @@ -216,6 +198,8 @@ def init(self):
# Other options
self.collect_image_stats = _is_affirmative(instance.get('collect_images_stats', False))
self.collect_container_size = _is_affirmative(instance.get('collect_container_size', False))
self.collect_container_count = _is_affirmative(instance.get('collect_container_count', False))
self.collect_volume_count = _is_affirmative(instance.get('collect_volume_count', False))
self.collect_events = _is_affirmative(instance.get('collect_events', True))
self.collect_image_size = _is_affirmative(instance.get('collect_image_size', False))
self.collect_disk_stats = _is_affirmative(instance.get('collect_disk_stats', False))
Expand Down Expand Up @@ -271,6 +255,12 @@ def check(self, instance):
if self.collect_container_size:
self._report_container_size(containers_by_id)

if self.collect_container_count:
self._report_container_count(containers_by_id)

if self.collect_volume_count:
self._report_volume_count()

# Collect disk stats from Docker info command
if self.collect_disk_stats:
self._report_disk_stats()
Expand Down Expand Up @@ -347,7 +337,7 @@ def _get_and_count_containers(self, custom_cgroups=False, healthchecks=False):
except Exception as e:
self.log.debug("Unable to inspect Docker container: %s", e)


# TODO: deprecate these 2, they should be replaced by _report_container_count
for tags, count in running_containers_count.iteritems():
self.gauge("docker.containers.running", count, tags=list(tags))

Expand Down Expand Up @@ -503,7 +493,6 @@ def _report_container_size(self, containers_by_id):
tags = self._get_tags(container, PERFORMANCE)
m_func = FUNC_MAP[GAUGE][self.use_histogram]
if "SizeRw" in container:

m_func(self, 'docker.container.size_rw', container['SizeRw'],
tags=tags)
if "SizeRootFs" in container:
Expand Down Expand Up @@ -540,6 +529,33 @@ def _submit_healthcheck_sc(self, container):
tags = self._get_tags(container, CONTAINER)
self.service_check(HEALTHCHECK_SERVICE_CHECK_NAME, status, tags=tags)

def _report_container_count(self, containers_by_id):
"""Report container count per state"""
m_func = FUNC_MAP[GAUGE][self.use_histogram]

per_state_count = defaultdict(int)

filterlambda = lambda ctr: not self._is_container_excluded(ctr)
containers = list(filter(filterlambda, containers_by_id.values()))

for ctr in containers:
per_state_count[ctr.get('State', '')] += 1

for state in per_state_count:
if state:
m_func(self, 'docker.container.count', per_state_count[state], tags=['container_state:%s' % state.lower()])

def _report_volume_count(self):
"""Report volume count per state (dangling or not)"""
m_func = FUNC_MAP[GAUGE][self.use_histogram]

attached_volumes = self.docker_client.volumes(filters={'dangling': False})
dangling_volumes = self.docker_client.volumes(filters={'dangling': True})
attached_count = len(attached_volumes['Volumes'])
dangling_count = len(dangling_volumes['Volumes'])
m_func(self, 'docker.volume.count', attached_count, tags=['volume_state:attached'])
m_func(self, 'docker.volume.count', dangling_count, tags=['volume_state:dangling'])

def _report_image_size(self, images):
for image in images:
tags = self._get_tags(image, IMAGE)
Expand All @@ -558,6 +574,7 @@ def _report_performance_metrics(self, containers_by_id):
continue

tags = self._get_tags(container, PERFORMANCE)

self._report_cgroup_metrics(container, tags)
if "_proc_root" not in container:
containers_without_proc_root.append(DockerUtil.container_name_extractor(container)[0])
Expand Down
16 changes: 13 additions & 3 deletions conf.d/docker_daemon.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ instances:
# ensure that `docker ps -a -q` run fast before enabling it.
# Defaults to false.
#
# collect_container_size: false
# collect_container_size: true

# Do you use custom cgroups for this particular instance?
# Note: enabling this option modifies the way in which we inspect the containers and causes
Expand All @@ -63,18 +63,28 @@ instances:
#
# health_service_check_whitelist: []

# Collect the container count tagged by state (running, paused, exited, dead)
# Defaults to false.
#
# collect_container_count: true

# Collect the volume count for attached and dangling volumes.
# Defaults to false.
#
# collect_volume_count: true

# Collect images stats
# Number of available active images and intermediate images as gauges.
# Defaults to false.
#
# collect_images_stats: false
# collect_images_stats: true

# Collect disk usage per image with docker.image.size and docker.image.virtual_size metrics.
# The check gets this size with the `docker images` command.
# Requires collect_images_stats to be enabled.
# Defaults to false.
#
# collect_image_size: false
# collect_image_size: true

# Collect disk metrics (total, used, free) through the docker info command for data and metadata.
# This is useful when these values can't be obtained by the disk check.
Expand Down
24 changes: 18 additions & 6 deletions tests/checks/integration/test_docker_daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,8 @@ def test_exclude_filter(self):
},
],
}
DockerUtil().set_docker_settings(config['init_config'], config['instances'][0])
DockerUtil._drop()
DockerUtil(init_config=config['init_config'], instance=config['instances'][0])

self.run_check_twice(config, force_reload=True)

Expand Down Expand Up @@ -346,7 +347,8 @@ def test_include_filter(self):
},
],
}
DockerUtil().set_docker_settings(config['init_config'], config['instances'][0])
DockerUtil._drop()
DockerUtil(init_config=config['init_config'], instance=config['instances'][0])

self.run_check_twice(config, force_reload=True)

Expand Down Expand Up @@ -408,7 +410,8 @@ def test_tags_options(self):
},
],
}
DockerUtil().set_docker_settings(config['init_config'], config['instances'][0])
DockerUtil._drop()
DockerUtil(init_config=config['init_config'], instance=config['instances'][0])

self.run_check_twice(config, force_reload=True)
for mname, tags in expected_metrics:
Expand Down Expand Up @@ -465,10 +468,17 @@ def test_labels_collection(self):
"collect_labels_as_tags": ["label1"],
"collect_image_size": True,
"collect_images_stats": True,
"collect_container_count": True,
"collect_dead_container_count": True,
"collect_exited_container_count": True,
"collect_volume_count": True,
"collect_dangling_volume_count": True,
},
],
}
DockerUtil().set_docker_settings(config['init_config'], config['instances'][0])
DockerUtil._drop()
DockerUtil(init_config=config['init_config'], instance=config['instances'][0])

self.run_check(config, force_reload=True)
for mname, tags in expected_metrics:
self.assertMetric(mname, tags=tags, count=1, at_least=1)
Expand Down Expand Up @@ -509,7 +519,8 @@ def test_histogram(self):
},
],
}
DockerUtil().set_docker_settings(config['init_config'], config['instances'][0])
DockerUtil._drop()
DockerUtil(init_config=config['init_config'], instance=config['instances'][0])

self.run_check(config, force_reload=True)
for mname, tags in expected_metrics:
Expand Down Expand Up @@ -558,7 +569,8 @@ def test_healthcheck(self):
],
}

DockerUtil().set_docker_settings(config['init_config'], config['instances'][0])
DockerUtil._drop()
DockerUtil(init_config=config['init_config'], instance=config['instances'][0])

self.run_check(config, force_reload=True)
self.assertServiceCheck('docker.container_health', count=0)
Expand Down