diff --git a/.travis.yml b/.travis.yml index 97bee650d2..c93c59cb64 100644 --- a/.travis.yml +++ b/.travis.yml @@ -35,6 +35,9 @@ env: - TRAVIS_FLAVOR=cassandra FLAVOR_VERSION=2.0.13 - TRAVIS_FLAVOR=cassandra FLAVOR_VERSION=2.1.3 - TRAVIS_FLAVOR=couchdb + # FIXME: cannot enable docker on Travis + # because it needs docker and we run tests in a container + # - TRAVIS_FLAVOR=docker_daemon - TRAVIS_FLAVOR=elasticsearch FLAVOR_VERSION=0.90.13 - TRAVIS_FLAVOR=elasticsearch FLAVOR_VERSION=1.0.3 - TRAVIS_FLAVOR=elasticsearch FLAVOR_VERSION=1.1.2 diff --git a/checks.d/docker_daemon.py b/checks.d/docker_daemon.py index b31ff86bfd..a047fb1ca9 100644 --- a/checks.d/docker_daemon.py +++ b/checks.d/docker_daemon.py @@ -133,24 +133,6 @@ def compile_filter_rules(rules): return patterns, tag_names -def get_filters(include, exclude): - # The reasoning is to check exclude first, so we can skip if there is no exclude - if not exclude: - return - - filtered_tag_names = [] - exclude_patterns = [] - include_patterns = [] - - # Compile regex - exclude_patterns, tag_names = compile_filter_rules(exclude) - filtered_tag_names.extend(tag_names) - - include_patterns, tag_names = compile_filter_rules(include) - filtered_tag_names.extend(tag_names) - - return set(exclude_patterns), set(include_patterns), set(filtered_tag_names) - class DockerDaemon(AgentCheck): """Collect metrics and events from Docker API and cgroups.""" @@ -216,6 +198,8 @@ def init(self): # Other options self.collect_image_stats = _is_affirmative(instance.get('collect_images_stats', False)) self.collect_container_size = _is_affirmative(instance.get('collect_container_size', False)) + self.collect_container_count = _is_affirmative(instance.get('collect_container_count', False)) + self.collect_volume_count = _is_affirmative(instance.get('collect_volume_count', False)) self.collect_events = _is_affirmative(instance.get('collect_events', True)) self.collect_image_size = _is_affirmative(instance.get('collect_image_size', False)) self.collect_disk_stats = _is_affirmative(instance.get('collect_disk_stats', False)) @@ -271,6 +255,12 @@ def check(self, instance): if self.collect_container_size: self._report_container_size(containers_by_id) + if self.collect_container_count: + self._report_container_count(containers_by_id) + + if self.collect_volume_count: + self._report_volume_count() + # Collect disk stats from Docker info command if self.collect_disk_stats: self._report_disk_stats() @@ -347,7 +337,7 @@ def _get_and_count_containers(self, custom_cgroups=False, healthchecks=False): except Exception as e: self.log.debug("Unable to inspect Docker container: %s", e) - + # TODO: deprecate these 2, they should be replaced by _report_container_count for tags, count in running_containers_count.iteritems(): self.gauge("docker.containers.running", count, tags=list(tags)) @@ -503,7 +493,6 @@ def _report_container_size(self, containers_by_id): tags = self._get_tags(container, PERFORMANCE) m_func = FUNC_MAP[GAUGE][self.use_histogram] if "SizeRw" in container: - m_func(self, 'docker.container.size_rw', container['SizeRw'], tags=tags) if "SizeRootFs" in container: @@ -540,6 +529,33 @@ def _submit_healthcheck_sc(self, container): tags = self._get_tags(container, CONTAINER) self.service_check(HEALTHCHECK_SERVICE_CHECK_NAME, status, tags=tags) + def _report_container_count(self, containers_by_id): + """Report container count per state""" + m_func = FUNC_MAP[GAUGE][self.use_histogram] + + per_state_count = defaultdict(int) + + filterlambda = lambda ctr: not self._is_container_excluded(ctr) + containers = list(filter(filterlambda, containers_by_id.values())) + + for ctr in containers: + per_state_count[ctr.get('State', '')] += 1 + + for state in per_state_count: + if state: + m_func(self, 'docker.container.count', per_state_count[state], tags=['container_state:%s' % state.lower()]) + + def _report_volume_count(self): + """Report volume count per state (dangling or not)""" + m_func = FUNC_MAP[GAUGE][self.use_histogram] + + attached_volumes = self.docker_client.volumes(filters={'dangling': False}) + dangling_volumes = self.docker_client.volumes(filters={'dangling': True}) + attached_count = len(attached_volumes['Volumes']) + dangling_count = len(dangling_volumes['Volumes']) + m_func(self, 'docker.volume.count', attached_count, tags=['volume_state:attached']) + m_func(self, 'docker.volume.count', dangling_count, tags=['volume_state:dangling']) + def _report_image_size(self, images): for image in images: tags = self._get_tags(image, IMAGE) @@ -558,6 +574,7 @@ def _report_performance_metrics(self, containers_by_id): continue tags = self._get_tags(container, PERFORMANCE) + self._report_cgroup_metrics(container, tags) if "_proc_root" not in container: containers_without_proc_root.append(DockerUtil.container_name_extractor(container)[0]) diff --git a/conf.d/docker_daemon.yaml.example b/conf.d/docker_daemon.yaml.example index 5bfe2ea599..bc7a3c6609 100644 --- a/conf.d/docker_daemon.yaml.example +++ b/conf.d/docker_daemon.yaml.example @@ -43,7 +43,7 @@ instances: # ensure that `docker ps -a -q` run fast before enabling it. # Defaults to false. # - # collect_container_size: false + # collect_container_size: true # Do you use custom cgroups for this particular instance? # Note: enabling this option modifies the way in which we inspect the containers and causes @@ -63,18 +63,28 @@ instances: # # health_service_check_whitelist: [] + # Collect the container count tagged by state (running, paused, exited, dead) + # Defaults to false. + # + # collect_container_count: true + + # Collect the volume count for attached and dangling volumes. + # Defaults to false. + # + # collect_volume_count: true + # Collect images stats # Number of available active images and intermediate images as gauges. # Defaults to false. # - # collect_images_stats: false + # collect_images_stats: true # Collect disk usage per image with docker.image.size and docker.image.virtual_size metrics. # The check gets this size with the `docker images` command. # Requires collect_images_stats to be enabled. # Defaults to false. # - # collect_image_size: false + # collect_image_size: true # Collect disk metrics (total, used, free) through the docker info command for data and metadata. # This is useful when these values can't be obtained by the disk check. diff --git a/tests/checks/integration/test_docker_daemon.py b/tests/checks/integration/test_docker_daemon.py index 6da4300cbc..23576f16a3 100644 --- a/tests/checks/integration/test_docker_daemon.py +++ b/tests/checks/integration/test_docker_daemon.py @@ -291,7 +291,8 @@ def test_exclude_filter(self): }, ], } - DockerUtil().set_docker_settings(config['init_config'], config['instances'][0]) + DockerUtil._drop() + DockerUtil(init_config=config['init_config'], instance=config['instances'][0]) self.run_check_twice(config, force_reload=True) @@ -346,7 +347,8 @@ def test_include_filter(self): }, ], } - DockerUtil().set_docker_settings(config['init_config'], config['instances'][0]) + DockerUtil._drop() + DockerUtil(init_config=config['init_config'], instance=config['instances'][0]) self.run_check_twice(config, force_reload=True) @@ -408,7 +410,8 @@ def test_tags_options(self): }, ], } - DockerUtil().set_docker_settings(config['init_config'], config['instances'][0]) + DockerUtil._drop() + DockerUtil(init_config=config['init_config'], instance=config['instances'][0]) self.run_check_twice(config, force_reload=True) for mname, tags in expected_metrics: @@ -465,10 +468,17 @@ def test_labels_collection(self): "collect_labels_as_tags": ["label1"], "collect_image_size": True, "collect_images_stats": True, + "collect_container_count": True, + "collect_dead_container_count": True, + "collect_exited_container_count": True, + "collect_volume_count": True, + "collect_dangling_volume_count": True, }, ], } - DockerUtil().set_docker_settings(config['init_config'], config['instances'][0]) + DockerUtil._drop() + DockerUtil(init_config=config['init_config'], instance=config['instances'][0]) + self.run_check(config, force_reload=True) for mname, tags in expected_metrics: self.assertMetric(mname, tags=tags, count=1, at_least=1) @@ -509,7 +519,8 @@ def test_histogram(self): }, ], } - DockerUtil().set_docker_settings(config['init_config'], config['instances'][0]) + DockerUtil._drop() + DockerUtil(init_config=config['init_config'], instance=config['instances'][0]) self.run_check(config, force_reload=True) for mname, tags in expected_metrics: @@ -558,7 +569,8 @@ def test_healthcheck(self): ], } - DockerUtil().set_docker_settings(config['init_config'], config['instances'][0]) + DockerUtil._drop() + DockerUtil(init_config=config['init_config'], instance=config['instances'][0]) self.run_check(config, force_reload=True) self.assertServiceCheck('docker.container_health', count=0)