Skip to content

Commit 936b140

Browse files
mrnicegyu11kaiser
andauthored
Adds second prometheus (#179)
* init * Make second prometheus work * Fix traefik integration --------- Co-authored-by: kaiser <[email protected]>
1 parent 0661932 commit 936b140

File tree

7 files changed

+174
-18
lines changed

7 files changed

+174
-18
lines changed

services/monitoring/docker-compose.aws.yml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
version: "3.7"
22
services:
3-
prometheus:
3+
prometheus-catchall:
44
dns: 8.8.8.8
55
deploy:
66
placement:
@@ -11,6 +11,17 @@ services:
1111
memory: 24576M
1212
reservations:
1313
memory: 24576M
14+
prometheus-cadvisor:
15+
dns: 8.8.8.8
16+
deploy:
17+
placement:
18+
constraints:
19+
- node.labels.prometheus==true
20+
resources:
21+
limits:
22+
memory: 4096M
23+
reservations:
24+
memory: 4096M
1425
grafana:
1526
dns: 8.8.8.8
1627
deploy:

services/monitoring/docker-compose.dalco.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,12 @@ services:
1414
constraints:
1515
- node.labels.grafana==true
1616

17-
prometheus:
17+
prometheus-catchall:
18+
deploy:
19+
placement:
20+
constraints:
21+
- node.labels.prometheus==true
22+
prometheus-cadvisor:
1823
deploy:
1924
placement:
2025
constraints:
Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
version: "3.7"
22
services:
3-
prometheus:
3+
prometheus-catchall:
44
deploy:
55
labels:
66
- traefik.http.routers.prometheus.tls.certresolver=myresolver
@@ -9,3 +9,8 @@ services:
99
deploy:
1010
labels:
1111
- traefik.http.routers.grafana.tls.certresolver=myresolver
12+
13+
prometheus-cadvisor:
14+
deploy:
15+
labels:
16+
- traefik.http.routers.prometheus.tls.certresolver=myresolver
Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
version: "3.7"
22
services:
3-
prometheus:
3+
prometheus-catchall:
44
deploy:
55
labels:
66
- traefik.http.routers.prometheus.tls.certresolver=lehttpchallenge
7-
87
grafana:
98
deploy:
109
labels:
1110
- traefik.http.routers.grafana.tls.certresolver=lehttpchallenge
11+
prometheus-cadvisor:
12+
deploy:
13+
placement:
14+
constraints:
15+
- traefik.http.routers.prometheus.tls.certresolver=lehttpchallenge

services/monitoring/docker-compose.master.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,12 @@ services:
55
placement:
66
constraints:
77
- node.labels.grafana==true
8-
prometheus:
8+
prometheus-catchall:
9+
deploy:
10+
placement:
11+
constraints:
12+
- node.labels.prometheus==true
13+
prometheus-cadvisor:
914
deploy:
1015
placement:
1116
constraints:

services/monitoring/docker-compose.yml.j2

Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ volumes:
44
prometheus_data: {}
55
grafana_data: {}
66
alertmanager_data: {}
7+
prometheus_data_cadvisor: {}
78

89
networks:
910
internal:
@@ -23,6 +24,8 @@ configs:
2324
file: ./node-exporter/docker-entrypoint.sh
2425
prometheus_config:
2526
file: ./prometheus/prometheus.yml
27+
prometheus_config_cadvisor:
28+
file: ./prometheus/prometheus-cadvisor.yml
2629
prometheus_rules:
2730
file: ./prometheus/prometheus.rules.yml
2831
grafana_image_renderer_config:
@@ -32,8 +35,9 @@ configs:
3235
smokeping_prober_config:
3336
file: ./smokeping_prober_config.yaml
3437
services:
35-
prometheus:
36-
image: prom/prometheus:v2.40.7
38+
prometheus-catchall:
39+
hostname: "{% raw %}{{.Service.Name}}{% endraw %}"
40+
image: prom/prometheus:v2.44.0
3741
volumes:
3842
- prometheus_data:/prometheus
3943
- /var/run/docker.sock:/var/run/docker.sock:ro
@@ -63,20 +67,64 @@ services:
6367
- traefik.enable=true
6468
- traefik.docker.network=${PUBLIC_NETWORK}
6569
# direct access through port
66-
- traefik.http.services.prometheus.loadbalancer.server.port=${MONITORING_PROMETHEUS_PORT}
67-
- traefik.http.routers.prometheus.rule=Host(`${MONITORING_DOMAIN}`) && PathPrefix(`/prometheus`)
68-
- traefik.http.routers.prometheus.entrypoints=https
69-
- traefik.http.routers.prometheus.tls=true
70-
- traefik.http.middlewares.prometheus_stripprefixregex.stripprefixregex.regex=^/prometheus
71-
- traefik.http.routers.prometheus.middlewares=ops_whitelist_ips@docker, ops_auth@docker, ops_gzip@docker, prometheus_stripprefixregex
72-
- prometheus-job=prometheus
70+
- traefik.http.services.prometheuscatchall.loadbalancer.server.port=${MONITORING_PROMETHEUS_PORT}
71+
- traefik.http.routers.prometheuscatchall.rule=Host(`${MONITORING_DOMAIN}`) && PathPrefix(`/prometheus`)
72+
- traefik.http.routers.prometheuscatchall.entrypoints=https
73+
- traefik.http.routers.prometheuscatchall.tls=true
74+
- traefik.http.middlewares.prometheuscatchall_stripprefixregex.stripprefixregex.regex=^/prometheus
75+
- traefik.http.routers.prometheuscatchall.middlewares=ops_whitelist_ips@docker, ops_auth@docker, ops_gzip@docker, prometheuscatchall_stripprefixregex
76+
- prometheus-job=prometheus-catchall
7377
- prometheus-port=${MONITORING_PROMETHEUS_PORT}
7478
resources:
7579
limits:
7680
memory: 4096M
7781
reservations:
78-
memory: 64M
79-
82+
memory: 4096M
83+
prometheus-cadvisor:
84+
hostname: "{% raw %}{{.Service.Name}}{% endraw %}"
85+
image: prom/prometheus:v2.44.0
86+
volumes:
87+
- prometheus_data_cadvisor:/prometheus
88+
- /var/run/docker.sock:/var/run/docker.sock:ro
89+
user: root # only user root can use the docker socket
90+
configs:
91+
- source: prometheus_config_cadvisor
92+
target: /etc/prometheus/prometheus.yml
93+
- source: prometheus_rules
94+
target: /etc/prometheus/prometheus.rules.yml
95+
command:
96+
- "--config.file=/etc/prometheus/prometheus.yml"
97+
- "--storage.tsdb.path=/prometheus"
98+
- "--storage.tsdb.retention=30d"
99+
- "--web.console.libraries=/usr/share/prometheus/console_libraries"
100+
- "--web.console.templates=/usr/share/prometheus/consoles"
101+
- "--web.external-url=https://${MONITORING_DOMAIN}/prometheus-cadvisor/"
102+
- "--web.route-prefix=/"
103+
- "--storage.tsdb.allow-overlapping-blocks" # via https://jessicagreben.medium.com/prometheus-fill-in-data-for-new-recording-rules-30a14ccb8467
104+
#- "--web.enable-admin-api" This allows messing with prometheus using its API from the CLI. Disabled for security reasons by default.
105+
networks:
106+
- internal
107+
- monitored
108+
- public
109+
extra_hosts: []
110+
deploy:
111+
labels:
112+
- traefik.enable=true
113+
- traefik.docker.network=${PUBLIC_NETWORK}
114+
# direct access through port
115+
- traefik.http.services.prometheuscadvisor.loadbalancer.server.port=${MONITORING_PROMETHEUS_PORT}
116+
- traefik.http.routers.prometheuscadvisor.rule=Host(`${MONITORING_DOMAIN}`) && PathPrefix(`/prometheuscadvisor`)
117+
- traefik.http.routers.prometheuscadvisor.entrypoints=https
118+
- traefik.http.routers.prometheuscadvisor.tls=true
119+
- traefik.http.middlewares.prometheuscadvisor_stripprefixregex.stripprefixregex.regex=^/prometheuscadvisor
120+
- traefik.http.routers.prometheuscadvisor.middlewares=ops_whitelist_ips@docker, ops_auth@docker, ops_gzip@docker, prometheuscadvisor_stripprefixregex
121+
- prometheus-job=prometheus-cadvisor
122+
- prometheus-port=${MONITORING_PROMETHEUS_PORT}
123+
resources:
124+
limits:
125+
memory: 4096M
126+
reservations:
127+
memory: 4096M
80128
node-exporter:
81129
image: prom/node-exporter:v0.18.1
82130
volumes:
@@ -156,7 +204,7 @@ services:
156204
memory: 64M
157205

158206
cadvisor-exporter:
159-
image: gcr.io/cadvisor/cadvisor:v0.46.0
207+
image: gcr.io/cadvisor/cadvisor:v0.47.0
160208
volumes:
161209
- /:/rootfs:ro
162210
- /var/run:/var/run:ro
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# global config
2+
# DOLLAR SIGNS NEED TO BE EXCAPED (see https://stackoverflow.com/a/61259844/10198629)
3+
global:
4+
scrape_interval: 16s # By default, scrape targets every 15 seconds.
5+
evaluation_interval: 16s # By default, scrape targets every 15 seconds.
6+
# scrape_timeout global default would be (10s).
7+
8+
# Attach these labels to any time series or alerts when communicating with
9+
# external systems (federation, remote storage, Alertmanager).
10+
external_labels:
11+
monitor: "sim-core-monitor"
12+
13+
# A scrape configuration containing exactly one endpoint to scrape:
14+
scrape_configs:
15+
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
16+
17+
# Create a job for Docker daemons. see [https://prometheus.io/docs/guides/dockerswarm/]
18+
19+
# Create a job for Docker Swarm containers.
20+
# Prometheus docker swarm discovery will automatically discover services that need to be scraped by prometheus
21+
# All services that need to be monitored must at least define the following SERVICE labels (in deploy/labels section):
22+
# deploy:
23+
# labels:
24+
# # prometheus labels
25+
# - prometheus-job=traefik_ops
26+
# - prometheus-port=8082
27+
#
28+
- job_name: "dockerswarm-tasks"
29+
dockerswarm_sd_configs:
30+
- host: unix:///var/run/docker.sock
31+
role: tasks # this scrapes docker tasks
32+
relabel_configs:
33+
# Only keep containers that should be running
34+
- source_labels: [__meta_dockerswarm_task_desired_state]
35+
regex: running
36+
action: keep
37+
# Only keep containers that have a `prometheus-job` label.
38+
- source_labels: [__meta_dockerswarm_service_label_prometheus_job]
39+
regex: cadvisor
40+
action: keep
41+
# Keep the containers IP and Port, very necessary for our setup DONT MESS WITH THIS
42+
- source_labels:
43+
[__address__, __meta_dockerswarm_service_label_prometheus_port]
44+
separator: ";"
45+
regex: "(.*):.*;(.*)"
46+
target_label: __address__
47+
replacement: $1:$2
48+
# Use the prometheus-port Swarm label as Prometheus job port.
49+
- source_labels: [__meta_dockerswarm_service_label_prometheus_job]
50+
target_label: job
51+
# Set hostname as instance label
52+
- source_labels: [__meta_dockerswarm_node_hostname]
53+
target_label: instance
54+
metric_relabel_configs:
55+
- regex: "container_label_com_docker_compose_config_hash"
56+
action: labeldrop # cAdvisor pruning
57+
- regex: "container_label_com_docker_compose_container_number"
58+
action: labeldrop # cAdvisor pruning
59+
- regex: "container_label_io_simcore_.*"
60+
action: labeldrop # cAdvisor pruning
61+
- regex: "container_label_simcore_service_compose_spec"
62+
action: labeldrop # cAdvisor pruning
63+
- regex: "container_label_simcore_service_container_http_entrypoint"
64+
action: labeldrop # cAdvisor pruning
65+
- regex: "container_label_simcore_service_paths_mapping"
66+
action: labeldrop # cAdvisor pruning
67+
- regex: "container_label_org_.*"
68+
action: labeldrop # cAdvisor pruning
69+
- regex: "container_label_com_docker_compose_project"
70+
action: labeldrop # cAdvisor pruning
71+
- regex: "container_label_com_docker_compose_project_config_files"
72+
action: labeldrop # cAdvisor pruning
73+
- regex: "container_label_com_docker_compose_service"
74+
action: labeldrop # cAdvisor pruning
75+
- regex: "container_label_com_docker_compose_project"
76+
action: labeldrop # cAdvisor pruning
77+
- regex: "container_label_maintainer"
78+
action: labeldrop # cAdvisor pruning

0 commit comments

Comments
 (0)