Skip to content

Commit 4493ed3

Browse files
SMPROD-6724 Updated improved Consul integration (#201)
* SMPROD-6724 Updated improved Consul integration * SMPROD-6724 Updated Consul prerequisites * SMPROD-6724 Fix Consul prerequisites
1 parent 462205d commit 4493ed3

File tree

4 files changed

+22
-19
lines changed

4 files changed

+22
-19
lines changed

resources/consul/INSTALL.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,13 @@
11
# Prerequisites
22
Consul instruments Prometheus metrics and annotates the pods with Prometheus annotations.
3+
4+
As seen in Consul documentation pages (https://www.consul.io/docs/k8s/helm#v-global-metrics and https://www.consul.io/docs/agent/options#telemetry-prometheus_retention_time), to make Consul expose an endpoint for scraping metrics, you need to enable a few global.metrics configurations.
5+
You also need to enable the telemetry.disable_hostname "extra configurations" in the Consul Server and Client, so the metrics don't contain the name of the instances.
6+
7+
If you install Consul with Helm, you need to use the following flags:
8+
```
9+
--set 'global.metrics.enabled=true'
10+
--set 'global.metrics.enableAgentMetrics=true'
11+
--set 'server.extraConfig="{"telemetry": {"disable_hostname": true}}"'
12+
--set 'client.extraConfig="{"telemetry": {"disable_hostname": true}}"'
13+
```

resources/consul/alerts.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,15 +74,15 @@ configurations:
7474
description: There are too many elections for leadership."
7575
- alert: Server cluster unhealthy
7676
expr: |
77-
consul_per_server_autopilot_healthy == 0
77+
consul_autopilot_healthy == 0
7878
for: 5m
7979
labels:
8080
severity: high
8181
annotations:
8282
description: One or many Consul servers in the cluster are unhealthy.
8383
- alert: Zero failure tolerance
8484
expr: |
85-
consul_per_server_autopilot_failure_tolerance == 0
85+
consul_autopilot_failure_tolerance == 0
8686
for: 5m
8787
labels:
8888
severity: medium
@@ -138,7 +138,7 @@ configurations:
138138
description: Garbage Collection stop-the-world pauses were greater than 5 seconds per minute.
139139
- alert: Raft restore duration too high
140140
expr: |
141-
consul_per_server_raft_leader_oldestLogAge < 2* max(consul_raft_fsm_lastRestoreDuration{kube_pod_label_component="server"})
141+
consul_raft_leader_oldestLogAge < 2* max(consul_raft_fsm_lastRestoreDuration{kube_pod_label_component="server"})
142142
for: 5m
143143
labels:
144144
severity: medium

resources/consul/include/consul_sysdig.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,7 @@
456456
"unit": "number",
457457
"yAxis": "auto"
458458
},
459-
"query": "min(consul_per_server_autopilot_healthy{kube_cluster_name=~$cluster, kube_namespace_name=~$namespace})"
459+
"query": "min(consul_autopilot_healthy{kube_cluster_name=~$cluster, kube_namespace_name=~$namespace})"
460460
}
461461
],
462462
"description": "",
@@ -655,7 +655,7 @@
655655
"unit": "number",
656656
"yAxis": "auto"
657657
},
658-
"query": "consul_per_server_autopilot_failure_tolerance{kube_cluster_name=~$cluster, kube_namespace_name=~$namespace}"
658+
"query": "consul_autopilot_failure_tolerance{kube_cluster_name=~$cluster, kube_namespace_name=~$namespace}"
659659
}
660660
],
661661
"axesConfiguration": {
@@ -718,7 +718,7 @@
718718
"unit": "number",
719719
"yAxis": "auto"
720720
},
721-
"query": "consul_per_server_autopilot_healthy{kube_cluster_name=~$cluster, kube_namespace_name=~$namespace}"
721+
"query": "consul_autopilot_healthy{kube_cluster_name=~$cluster, kube_namespace_name=~$namespace}"
722722
}
723723
],
724724
"axesConfiguration": {
@@ -1433,7 +1433,7 @@
14331433
"unit": "relativeTime",
14341434
"yAxis": "auto"
14351435
},
1436-
"query": "consul_per_server_raft_leader_oldestLogAge{kube_cluster_name=~$cluster, kube_namespace_name=~$namespace} > 0\n"
1436+
"query": "consul_raft_leader_oldestLogAge{kube_cluster_name=~$cluster, kube_namespace_name=~$namespace} > 0\n"
14371437
},
14381438
{
14391439
"displayInfo": {

resources/consul/include/sysdig-agent.yaml

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ data:
2929
scrape_interval: 10s
3030
scrape_configs:
3131
- job_name: 'consul-envoy-default'
32+
metrics_path: '/v1/agent/metrics'
33+
params:
34+
format: ['prometheus']
3235
tls_config:
3336
insecure_skip_verify: true
3437
kubernetes_sd_configs:
@@ -102,20 +105,9 @@ data:
102105
- action: keep
103106
source_labels: [__address__]
104107
regex: (.*:8500)
105-
- action: replace
106-
source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
107-
target_label: __metrics_path__
108-
regex: (.+)
109-
replacement: '/v1/agent/metrics'
110108
- action: replace
111109
source_labels: [__meta_kubernetes_pod_uid]
112110
target_label: sysdig_k8s_pod_uid
113111
- action: replace
114112
source_labels: [__meta_kubernetes_pod_container_name]
115-
target_label: sysdig_k8s_pod_container_name
116-
metric_relabel_configs:
117-
# Change the name of the metric to remove the name of the pod
118-
- source_labels: ['__name__']
119-
target_label: '__name__'
120-
regex: '(consul_)([a-z]+_)+[0-9]+_(.+)'
121-
replacement: ${1}per_server_${3}
113+
target_label: sysdig_k8s_pod_container_name

0 commit comments

Comments
 (0)