@@ -40,10 +40,12 @@ spec:
4040 - airflow
4141 - druid
4242 - hive
43- - nifi # This only works for NiFi 1, NiFi 2 has a special ServiceMonitor below
43+ - kafka
44+ - nifi # This only works for NiFi 1, NiFi 2 works via stackable-generic
4445 - opa
4546 - superset
4647 - trino
48+ - zookeeper
4749 endpoints :
4850 - scheme : http
4951 port : metrics
@@ -55,10 +57,25 @@ spec:
5557 - app.kubernetes.io/role-group
5658 - app.kubernetes.io/version
5759---
60+ # Utilize `prometheus.io/scheme`, `prometheus.io/port`, `prometheus.io/path` annotations set by the operators
61+ # to scrape all Stackable products.
62+ # [x] Airflow - relabel drop filter on airflow container
63+ # [x] Druid
64+ # [x] HBase
65+ # [X] Hadoop HDFS - relabel drop filter on empty container
66+ # [x] Hive
67+ # [~] Kafka - TODO: listener services have metrics?
68+ # [x] NiFi 1 + 2
69+ # [ ] OpenSearch
70+ # [x] Spark: Connect, HistoryServer
71+ # [x] Superset - relabel drop filter on superset container
72+ # [x] Trino
73+ # [x] ZooKeeper
74+ # [x] OPA
5875apiVersion : monitoring.coreos.com/v1
5976kind : ServiceMonitor
6077metadata :
61- name : stackable-native-metrics
78+ name : stackable-generic
6279 labels :
6380 stackable.tech/vendor : Stackable
6481 release : prometheus
@@ -69,46 +86,49 @@ spec:
6986 matchLabels :
7087 stackable.tech/vendor : Stackable
7188 prometheus.io/scrape : " true"
72- matchExpressions :
73- - key : app.kubernetes.io/name
74- operator : In
75- values :
76- - zookeeper
7789 endpoints :
78- - scheme : http
79- port : native-metrics
80- path : /metrics
81- podTargetLabels :
82- - app.kubernetes.io/name
83- - app.kubernetes.io/instance
84- - app.kubernetes.io/component
85- - app.kubernetes.io/role-group
86- - app.kubernetes.io/version
87- ---
88- # Kafka is special in that the operator totally messes up services:
89- # 1. The metrics Service is missing
90- # 2. The role level simple-kafka-broker-default has the prometheus.io/scrape label, but exposes no ports...
91- # 3. The role level simple-kafka-broker-default is labeled with app.kubernetes.io/name: listener???
92- # So we have a dedicated config for it
93- apiVersion : monitoring.coreos.com/v1
94- kind : ServiceMonitor
95- metadata :
96- name : stackable-kafka
97- labels :
98- stackable.tech/vendor : Stackable
99- release : prometheus
100- spec :
101- namespaceSelector :
102- any : true
103- selector :
104- matchLabels :
105- stackable.tech/vendor : Stackable
106- app.kubernetes.io/name : listener # Dafuq?
107- app.kubernetes.io/component : broker # We need to filter on brokers instead, as the app.kubernetes.io/name is messed up
108- endpoints :
109- - scheme : http
110- port : metrics
111- path : /metrics
90+ - relabelings :
91+ - sourceLabels :
92+ - __meta_kubernetes_pod_container_name
93+ # Pods show up twice due to multiple containers, we only keep the main / product container.
94+ # Except for Airflow and Superset, where we chose the metrics container (otherwise scheduler, worker etc.
95+ # which only have the metrics container are not getting picked up).
96+ # - airflow: airflow
97+ # - superset: superset
98+ # - empty: filter when container label does not exist: hdfs
99+ regex : ^(airflow|superset|)$
100+ action : drop
101+ - sourceLabels :
102+ - __meta_kubernetes_service_annotation_prometheus_io_scheme
103+ action : replace
104+ targetLabel : __scheme__
105+ regex : (https?)
106+ - sourceLabels :
107+ - __meta_kubernetes_service_annotation_prometheus_io_path
108+ action : replace
109+ targetLabel : __metrics_path__
110+ regex : (.+)
111+ - sourceLabels :
112+ - __meta_kubernetes_service_name
113+ - __meta_kubernetes_namespace
114+ - __meta_kubernetes_service_annotation_prometheus_io_port
115+ action : replace
116+ targetLabel : __address__
117+ regex : (.+);(.+);(\d+)
118+ # TODO: We could set the cluster domain via annotation as well and pick it up here.
119+ replacement : $1.$2.svc.cluster.local:$3
120+ tlsConfig :
121+ ca :
122+ secret :
123+ name : prometheus-tls-certificate
124+ key : ca.crt
125+ cert :
126+ secret :
127+ name : prometheus-tls-certificate
128+ key : tls.crt
129+ keySecret :
130+ name : prometheus-tls-certificate
131+ key : tls.key
112132 podTargetLabels :
113133 - app.kubernetes.io/name
114134 - app.kubernetes.io/instance
@@ -219,63 +239,6 @@ spec:
219239 - app.kubernetes.io/role-group
220240 - app.kubernetes.io/version
221241---
222- # NiFI 2 is a beast on it's own...
223- # We need to use mTLS (otherwise we get a 401) and can not use the PodIP
224- apiVersion : monitoring.coreos.com/v1
225- kind : ServiceMonitor
226- metadata :
227- name : stackable-nifi-2
228- labels :
229- stackable.tech/vendor : Stackable
230- release : prometheus
231- spec :
232- namespaceSelector :
233- any : true
234- selector :
235- matchLabels :
236- stackable.tech/vendor : Stackable
237- prometheus.io/scrape : " true"
238- matchExpressions :
239- - key : app.kubernetes.io/name
240- operator : In
241- values :
242- - nifi
243- endpoints :
244- - scheme : https
245- port : https
246- path : /nifi-api/flow/metrics/prometheus
247- # See https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#monitoring.coreos.com/v1.TLSConfig
248- tlsConfig :
249- ca :
250- secret :
251- name : prometheus-tls-certificate
252- key : ca.crt
253- cert :
254- secret :
255- name : prometheus-tls-certificate
256- key : tls.crt
257- keySecret :
258- name : prometheus-tls-certificate
259- key : tls.key
260- # We need to talk to the Pod via the FQDN of the Pod because of the stupid SNI check of NiFi.
261- # We can not use the typical PodIP, as it is not contained in the NiFi certificate,
262- # see https://github.com/stackabletech/secret-operator/issues/620
263- relabelings :
264- - sourceLabels :
265- - __meta_kubernetes_pod_name
266- - __meta_kubernetes_service_name
267- - __meta_kubernetes_namespace
268- - __meta_kubernetes_pod_container_port_number
269- targetLabel : __address__
270- replacement : ${1}.${2}-headless.${3}.svc.cluster.local:${4}
271- regex : (.+);(.+?)(?:-metrics)?;(.+);(.+)
272- podTargetLabels :
273- - app.kubernetes.io/name
274- - app.kubernetes.io/instance
275- - app.kubernetes.io/component
276- - app.kubernetes.io/role-group
277- - app.kubernetes.io/version
278- ---
279242# spark-k8s-operator does not deploy any Services at all (at least for SparkApplications).
280243# We currently only scrape the driver, going forward we might want to scrape the executors as well.
281244# In the future we might also want to scrape SparkConnect and HistoryServers.
0 commit comments