Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions etc/kayobe/inventory/group_vars/all/docker
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
---
# Address for prometheus metrics endpoint
docker_metrics_addr: "{{ internal_net_name | net_ip + ':9323'}}"
33 changes: 33 additions & 0 deletions etc/kayobe/kolla/config/prometheus/docker.rules
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@

{% raw %}

groups:
- name: Docker
rules:

- alert: DockerContainerStopped
expr: 'engine_daemon_container_states_containers{state="stopped"} > 0'
labels:
severity: warning
annotations:
summary: "Containers not running (instance {{ $labels.instance }})"
description: "One or more container are stopped"

- alert: DockerContainerPaused
expr: 'engine_daemon_container_states_containers{state="paused"} > 0'
labels:
severity: warning
annotations:
summary: "Containers not running (instance {{ $labels.instance }})"
description: "One or more container are stopped"

- alert: DockerContainerHealthCheckFail
expr: rate(engine_daemon_health_checks_failed_total[1m]) > 1
labels:
severity: warning
annotations:
summary: "Containers health check failed (instance {{ $labels.instance }})"
description: "One or more container health checks failed"

{% endraw %}

4 changes: 4 additions & 0 deletions releasenotes/notes/docker-alerts-30e8d870f25e500b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
features:
- |
Added new default alerting rules for containers being unhealthy or stopped.
Loading