Skip to content

Commit 1b3f8dc

Browse files
committed
Working monitoring example
1 parent 5fdfc35 commit 1b3f8dc

File tree

6 files changed

+57
-70
lines changed

6 files changed

+57
-70
lines changed

examples/monitoring/README.md

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,46 @@
1-
# Basic Deployment
2-
This example deploys a basic deployment. No RBAC/LDAP. Just a single topic 'foobar' is added as part of the pipeline.
3-
### Deploy CRDs
4-
Deploy the CRDS using the standard way:
1+
# Monitoring (JMX/Prometheus/Grafana)
2+
In this example, we deploy an RBAC enabled Confluent cluster with Prometheus/Grafana integration.
3+
4+
## Deploy Stack
5+
From within this present directory(./examples/monitoring), run the following command:
6+
57
```shell
6-
kubectl apply -k ../../kustomize/crds
8+
kubectl apply -k ../../kustomize/crds && sleep 1 && kubectl apply -k .
79
```
8-
### Deploy Confluent Operator and Confluent Services
9-
Deploy the confluent operator and services:
10+
11+
Once all the the pods are in a 'Running' status, we can start to investigate the rest of the stack.
12+
13+
## Prometheus
14+
Prometheus has a UI you can view by forwarding port 9090 with the following command, and then accessing `http://127.0.0.1:9090` from a local browser.
1015
```shell
11-
kubectl apply -k .
16+
kubectl port-forward \
17+
$(kubectl get pods -n sandbox -l app=prometheus -l component=server -o name) \
18+
9090 --namespace sandbox
1219
```
1320

21+
If you navigate to `http://localhost:9090/targets` you should hopefully see a screen such as this which indicates that Prometheus is successfully scraping from the Confluent Services
22+
![](../../resources/images/prometheus_targets.png)0
1423

15-
Portforward Grafana
16-
Login with admin/password
17-
18-
opensofttools/kafka_exporter:latest
24+
The configuration file for Prometheus can be found at `./examples/monitoring/prometheus/server/cm.yaml`
1925

26+
## Grafana
27+
Grafana's Web UI runs on port 3000. Similar to Prometheus, forward that port to your local machines with the following command:
28+
```shell
2029
kubectl port-forward \
21-
$(kubectl get pods -n default -l app.kubernetes.io/name=grafana,app.kubernetes.io/instance=grafana -o name) \
22-
3000 --namespace default
30+
$(kubectl get pods -n sandbox kubectl get pods -n sandbox -l app.kubernetes.io/component=grafana -o name) \
31+
3000 --namespace sandbox
32+
```
33+
34+
Navigate to http://localhost:3000
35+
36+
You will need to login with the username 'admin' and the password 'password'. At the home screen you will see a dashboard called 'Confluent Platform'. Click on this
37+
![](../../resources/images/grafana-dashboard.png)
38+
39+
This dashboard will display various metrics/alerts for the various Confluent services. This dashboard has been captured in code, and can be edited at `./examples/grafana/dashboards.yaml`
40+
![](../../resources/images/grafana-dashboard2.png)
41+
42+
43+
44+
45+
46+

examples/monitoring/grafana/dashboards.yaml

Lines changed: 19 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ data:
100100
"targets": [
101101
{
102102
"exemplar": true,
103-
"expr": "io_confluent_caas_volumemetrics_used{ platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}",
103+
"expr": "io_confluent_caas_volumemetrics_used{ platform_confluent_io_type=~\"kafka\",kubernetes_namespace=~\"$namespace\"}",
104104
"format": "time_series",
105105
"interval": "",
106106
"intervalFactor": 1,
@@ -109,7 +109,7 @@ data:
109109
},
110110
{
111111
"exemplar": true,
112-
"expr": "io_confluent_caas_volumemetrics_total{ platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}",
112+
"expr": "io_confluent_caas_volumemetrics_total{ platform_confluent_io_type=~\"kafka\",kubernetes_namespace=~\"$namespace\"}",
113113
"hide": false,
114114
"instant": false,
115115
"interval": "",
@@ -205,7 +205,7 @@ data:
205205
"targets": [
206206
{
207207
"exemplar": true,
208-
"expr": "io_confluent_caas_volumemetrics_percentused{ platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}",
208+
"expr": "io_confluent_caas_volumemetrics_percentused{ platform_confluent_io_type=~\"kafka\",kubernetes_namespace=~\"$namespace\"}",
209209
"format": "time_series",
210210
"interval": "",
211211
"intervalFactor": 1,
@@ -301,7 +301,7 @@ data:
301301
"targets": [
302302
{
303303
"exemplar": true,
304-
"expr": "io_confluent_caas_volumemetrics_percentavailable{ platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}",
304+
"expr": "io_confluent_caas_volumemetrics_percentavailable{ platform_confluent_io_type=~\"kafka\",kubernetes_namespace=~\"$namespace\"}",
305305
"format": "time_series",
306306
"interval": "",
307307
"intervalFactor": 1,
@@ -648,7 +648,7 @@ data:
648648
"repeatDirection": "h",
649649
"targets": [
650650
{
651-
"expr": "count(kafka_server_replicamanager_value{name=\"LeaderCount\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"$controller_type\"})",
651+
"expr": "count(kafka_server_replicamanager_value{name=\"LeaderCount\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"kafka\"})",
652652
"format": "time_series",
653653
"intervalFactor": 1,
654654
"legendFormat": "",
@@ -726,7 +726,7 @@ data:
726726
"pluginVersion": "8.1.2",
727727
"targets": [
728728
{
729-
"expr": "sum(sum(kafka_controller_kafkacontroller_value{name=\"ActiveControllerCount\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"$controller_type\"}))",
729+
"expr": "sum(sum(kafka_controller_kafkacontroller_value{name=\"ActiveControllerCount\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"kafka\"}))",
730730
"format": "time_series",
731731
"intervalFactor": 1,
732732
"refId": "A"
@@ -803,7 +803,7 @@ data:
803803
"pluginVersion": "8.1.2",
804804
"targets": [
805805
{
806-
"expr": "sum(kafka_controller_controllerstats_oneminuterate{name=~\"UncleanLeaderElectionsPerSec\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"$controller_type\"})",
806+
"expr": "sum(kafka_controller_controllerstats_oneminuterate{name=~\"UncleanLeaderElectionsPerSec\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"kafka\"})",
807807
"format": "time_series",
808808
"intervalFactor": 1,
809809
"refId": "A"
@@ -881,7 +881,7 @@ data:
881881
"pluginVersion": "8.1.2",
882882
"targets": [
883883
{
884-
"expr": "sum(kafka_server_replicamanager_value{name=~\"PartitionCount\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"$controller_type\"})",
884+
"expr": "sum(kafka_server_replicamanager_value{name=~\"PartitionCount\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"kafka\"})",
885885
"format": "time_series",
886886
"intervalFactor": 1,
887887
"refId": "A"
@@ -959,7 +959,7 @@ data:
959959
"pluginVersion": "8.1.2",
960960
"targets": [
961961
{
962-
"expr": "sum(kafka_server_replicamanager_value{name=~\"UnderReplicatedPartitions\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"$controller_type\"})",
962+
"expr": "sum(kafka_server_replicamanager_value{name=~\"UnderReplicatedPartitions\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"kafka\"})",
963963
"format": "time_series",
964964
"hide": false,
965965
"intervalFactor": 2,
@@ -1038,7 +1038,7 @@ data:
10381038
"pluginVersion": "8.1.2",
10391039
"targets": [
10401040
{
1041-
"expr": "sum(kafka_server_replicamanager_value{name=~\"OfflineReplicaCount\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"$controller_type\"})",
1041+
"expr": "sum(kafka_server_replicamanager_value{name=~\"OfflineReplicaCount\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"kafka\"})",
10421042
"format": "time_series",
10431043
"intervalFactor": 1,
10441044
"refId": "A"
@@ -1095,7 +1095,7 @@ data:
10951095
"steppedLine": false,
10961096
"targets": [
10971097
{
1098-
"expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"BytesInPerSec\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"$controller_type\"})",
1098+
"expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"BytesInPerSec\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"kafka\"})",
10991099
"format": "time_series",
11001100
"hide": false,
11011101
"interval": "",
@@ -1104,7 +1104,7 @@ data:
11041104
"refId": "A"
11051105
},
11061106
{
1107-
"expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"BytesOutPerSec\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"$controller_type\"})",
1107+
"expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"BytesOutPerSec\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"kafka\"})",
11081108
"format": "time_series",
11091109
"hide": false,
11101110
"intervalFactor": 1,
@@ -1199,7 +1199,7 @@ data:
11991199
"targets": [
12001200
{
12011201
"exemplar": true,
1202-
"expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"MessagesInPerSec\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"$controller_type\"})",
1202+
"expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"MessagesInPerSec\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"kafka\"})",
12031203
"format": "time_series",
12041204
"interval": "",
12051205
"intervalFactor": 1,
@@ -1293,7 +1293,7 @@ data:
12931293
"targets": [
12941294
{
12951295
"exemplar": true,
1296-
"expr": "kafka_server_kafkarequesthandlerpool_oneminuterate{name=~\"RequestHandlerAvgIdlePercent\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"$controller_type\"}*100",
1296+
"expr": "kafka_server_kafkarequesthandlerpool_oneminuterate{name=~\"RequestHandlerAvgIdlePercent\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"kafka\"}*100",
12971297
"format": "time_series",
12981298
"interval": "",
12991299
"intervalFactor": 1,
@@ -1386,14 +1386,14 @@ data:
13861386
"steppedLine": false,
13871387
"targets": [
13881388
{
1389-
"expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"TotalProduceRequestsPerSec\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"$controller_type\"})",
1389+
"expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"TotalProduceRequestsPerSec\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"kafka\"})",
13901390
"format": "time_series",
13911391
"intervalFactor": 1,
13921392
"legendFormat": "Total Produce Request Rate",
13931393
"refId": "A"
13941394
},
13951395
{
1396-
"expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"FailedProduceRequestsPerSec\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"$controller_type\"})",
1396+
"expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"FailedProduceRequestsPerSec\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"kafka\"})",
13971397
"format": "time_series",
13981398
"intervalFactor": 1,
13991399
"legendFormat": "Failed Produce Request Rate",
@@ -1486,15 +1486,15 @@ data:
14861486
"targets": [
14871487
{
14881488
"exemplar": true,
1489-
"expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=~\"TotalFetchRequestsPerSec\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"$controller_type\"})",
1489+
"expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=~\"TotalFetchRequestsPerSec\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"kafka\"})",
14901490
"format": "time_series",
14911491
"interval": "",
14921492
"intervalFactor": 1,
14931493
"legendFormat": "Fetch Request Rate",
14941494
"refId": "A"
14951495
},
14961496
{
1497-
"expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=~\"FailedFetchRequestsPerSec\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"$controller_type\"})",
1497+
"expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=~\"FailedFetchRequestsPerSec\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"kafka\"})",
14981498
"format": "time_series",
14991499
"intervalFactor": 1,
15001500
"legendFormat": "Failed Fetch Request Rate",
@@ -1587,7 +1587,7 @@ data:
15871587
"targets": [
15881588
{
15891589
"exemplar": true,
1590-
"expr": "kafka_network_socketserver_value{name=~\"NetworkProcessorAvgIdlePercent\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"$controller_type\"}*100",
1590+
"expr": "kafka_network_socketserver_value{name=~\"NetworkProcessorAvgIdlePercent\",kubernetes_namespace=~\"$namespace\",platform_confluent_io_type=~\"kafka\"}*100",
15911591
"format": "time_series",
15921592
"interval": "",
15931593
"intervalFactor": 1,
@@ -4419,36 +4419,6 @@ data:
44194419
"tagsQuery": "",
44204420
"type": "query",
44214421
"useTags": false
4422-
},
4423-
{
4424-
"allValue": ".+",
4425-
"current": {
4426-
"selected": true,
4427-
"text": "kafka",
4428-
"value": "kafka"
4429-
},
4430-
"datasource": "prometheus",
4431-
"definition": "label_values(platform_confluent_io_type)",
4432-
"description": null,
4433-
"error": null,
4434-
"hide": 0,
4435-
"includeAll": false,
4436-
"label": "ClusterType",
4437-
"multi": false,
4438-
"name": "controller_type",
4439-
"options": [],
4440-
"query": {
4441-
"query": "label_values(platform_confluent_io_type)",
4442-
"refId": "StandardVariableQuery"
4443-
},
4444-
"refresh": 1,
4445-
"regex": "",
4446-
"skipUrlSync": false,
4447-
"sort": 1,
4448-
"tagValuesQuery": "",
4449-
"tagsQuery": "",
4450-
"type": "query",
4451-
"useTags": false
44524422
}
44534423
]
44544424
},

examples/monitoring/prometheus/server/cm.yaml

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -83,20 +83,13 @@ data:
8383
# - 'producer:1234'
8484
# labels:
8585
# env: 'dev'
86-
8786
# No consumer for the moment in cp-demo
8887
# - job_name: 'consumer'
8988
# static_configs:
9089
# - targets:
9190
# - "consumer:1234"
9291
# labels:
9392
# env: 'dev'
94-
- job_name: "kafka-lag-exporter"
95-
static_configs:
96-
- targets:
97-
- "kafka-lag-exporter:9999"
98-
labels:
99-
env: "dev"
10093
- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
10194
job_name: kubernetes-apiservers
10295
kubernetes_sd_configs:
115 KB
Loading
113 KB
Loading
146 KB
Loading

0 commit comments

Comments
 (0)