Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Note: This CHANGELOG is only for the monitoring team to track all monitoring related changes. Please see OpenShift release notes for official changes.

## 4.XX
Copy link
Member Author

@rexagod rexagod Oct 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since adding external support may take a while.


- [#2694](https://github.com/openshift/cluster-monitoring-operator/pull/2694) Add "telemetry" profile to the set of supported collection profiles. Switching to this profile will disable collection of all metrics except those required for telemetry purposes.

## 4.20

- [#2595](https://github.com/openshift/cluster-monitoring-operator/pull/2595) Multi-tenant support for KSM's CRS feature-set downstream.
Expand Down
34 changes: 34 additions & 0 deletions assets/alertmanager/telemetry-service-monitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: alert-router
app.kubernetes.io/instance: main
app.kubernetes.io/managed-by: cluster-monitoring-operator
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: openshift-monitoring
app.kubernetes.io/version: 0.28.1
monitoring.openshift.io/collection-profile: telemetry
name: alertmanager-main-telemetry
namespace: openshift-monitoring
spec:
endpoints:
- bearerTokenFile: ""
interval: 30s
metricRelabelings:
- action: keep
regex: (alertmanager_integrations|scrape_samples_post_metric_relabeling|scrape_series_added|up)
sourceLabels:
- __name__
port: metrics
scheme: https
tlsConfig:
insecureSkipVerify: false
serverName: alertmanager-main.openshift-monitoring.svc
scrapeClass: tls-client-certificate-auth
selector:
matchLabels:
app.kubernetes.io/component: alert-router
app.kubernetes.io/instance: main
app.kubernetes.io/name: alertmanager
app.kubernetes.io/part-of: openshift-monitoring
27 changes: 27 additions & 0 deletions assets/cluster-monitoring-operator/telemetry-service-monitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/managed-by: cluster-monitoring-operator
app.kubernetes.io/name: cluster-monitoring-operator
app.kubernetes.io/part-of: openshift-monitoring
monitoring.openshift.io/collection-profile: telemetry
name: cluster-monitoring-operator-telemetry
namespace: openshift-monitoring
spec:
endpoints:
- bearerTokenFile: ""
metricRelabelings:
- action: keep
regex: (cluster_monitoring_operator_collection_profile|scrape_samples_post_metric_relabeling|scrape_series_added|up)
sourceLabels:
- __name__
port: https
scheme: https
tlsConfig:
insecureSkipVerify: false
serverName: cluster-monitoring-operato.openshift-monitoring.svc
scrapeClass: tls-client-certificate-auth
selector:
matchLabels:
app.kubernetes.io/name: cluster-monitoring-operator
118 changes: 118 additions & 0 deletions assets/control-plane/telemetry-service-monitor-kubelet.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: kubernetes
app.kubernetes.io/managed-by: cluster-monitoring-operator
app.kubernetes.io/name: kubelet
app.kubernetes.io/part-of: openshift-monitoring
k8s-app: kubelet
monitoring.openshift.io/collection-profile: telemetry
name: kubelet-telemetry
namespace: openshift-monitoring
spec:
attachMetadata:
node: true
endpoints:
- bearerTokenFile: ""
honorLabels: true
interval: 30s
metricRelabelings:
- action: keep
regex: (apiserver_storage_objects|container_cpu_usage_seconds_total|container_memory_working_set_bytes|kubelet_containers_per_pod_count_sum|up)
sourceLabels:
- __name__
port: https-metrics
relabelings:
- action: replace
sourceLabels:
- __metrics_path__
targetLabel: metrics_path
scheme: https
scrapeTimeout: 30s
tlsConfig:
caFile: /etc/prometheus/configmaps/kubelet-serving-ca-bundle/ca-bundle.crt
insecureSkipVerify: false
- bearerTokenFile: ""
honorLabels: true
honorTimestamps: true
interval: 30s
metricRelabelings:
- action: labeldrop
regex: __tmp_keep_metric
- action: keep
regex: (apiserver_storage_objects|container_cpu_usage_seconds_total|container_memory_working_set_bytes|kubelet_containers_per_pod_count_sum|up)
sourceLabels:
- __name__
path: /metrics/cadvisor
port: https-metrics
relabelings:
- action: replace
sourceLabels:
- __metrics_path__
targetLabel: metrics_path
scheme: https
scrapeTimeout: 30s
tlsConfig:
caFile: /etc/prometheus/configmaps/kubelet-serving-ca-bundle/ca-bundle.crt
insecureSkipVerify: false
trackTimestampsStaleness: true
- bearerTokenFile: ""
honorLabels: true
interval: 30s
metricRelabelings:
- action: keep
regex: (apiserver_storage_objects|container_cpu_usage_seconds_total|container_memory_working_set_bytes|kubelet_containers_per_pod_count_sum|up)
sourceLabels:
- __name__
path: /metrics/probes
port: https-metrics
relabelings:
- action: replace
sourceLabels:
- __metrics_path__
targetLabel: metrics_path
scheme: https
scrapeTimeout: 30s
tlsConfig:
caFile: /etc/prometheus/configmaps/kubelet-serving-ca-bundle/ca-bundle.crt
insecureSkipVerify: false
- bearerTokenFile: ""
interval: 30s
metricRelabelings:
- action: keep
regex: (apiserver_storage_objects|container_cpu_usage_seconds_total|container_memory_working_set_bytes|kubelet_containers_per_pod_count_sum|up)
sourceLabels:
- __name__
port: https-metrics
relabelings:
- action: keep
regex: (linux|)
sourceLabels:
- __meta_kubernetes_node_label_kubernetes_io_os
- action: replace
regex: (.+)(?::\d+)
replacement: $1:9637
sourceLabels:
- __address__
targetLabel: __address__
- action: replace
replacement: crio
sourceLabels:
- endpoint
targetLabel: endpoint
- action: replace
replacement: crio
targetLabel: job
scheme: https
tlsConfig:
caFile: /etc/prometheus/configmaps/kubelet-serving-ca-bundle/ca-bundle.crt
insecureSkipVerify: false
jobLabel: k8s-app
namespaceSelector:
matchNames:
- kube-system
scrapeClass: tls-client-certificate-auth
selector:
matchLabels:
k8s-app: kubelet
53 changes: 53 additions & 0 deletions assets/kube-state-metrics/telemetry-service-monitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/managed-by: cluster-monitoring-operator
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: openshift-monitoring
app.kubernetes.io/version: 2.17.0
monitoring.openshift.io/collection-profile: telemetry
name: kube-state-metrics-telemetry
namespace: openshift-monitoring
spec:
endpoints:
- bearerTokenFile: ""
honorLabels: true
interval: 1m
metricRelabelings:
- action: labeldrop
regex: instance
- action: keep
regex: (kube_node_labels|kube_node_role|kube_node_spec_unschedulable|kube_node_status_capacity|kube_node_status_condition|kube_pod_info|kube_pod_restart_policy|kube_running_pod_ready|scrape_samples_post_metric_relabeling|scrape_series_added|up)
sourceLabels:
- __name__
port: https-main
relabelings:
- action: labeldrop
regex: pod
scheme: https
scrapeTimeout: 1m
tlsConfig:
insecureSkipVerify: false
serverName: kube-state-metrics.openshift-monitoring.svc
- bearerTokenFile: ""
interval: 1m
metricRelabelings:
- action: keep
regex: (kube_node_labels|kube_node_role|kube_node_spec_unschedulable|kube_node_status_capacity|kube_node_status_condition|kube_pod_info|kube_pod_restart_policy|kube_running_pod_ready|scrape_samples_post_metric_relabeling|scrape_series_added|up)
sourceLabels:
- __name__
port: https-self
scheme: https
scrapeTimeout: 1m
tlsConfig:
insecureSkipVerify: false
serverName: kube-state-metrics.openshift-monitoring.svc
jobLabel: app.kubernetes.io/name
scrapeClass: tls-client-certificate-auth
selector:
matchLabels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: kube-state-metrics
app.kubernetes.io/part-of: openshift-monitoring
30 changes: 30 additions & 0 deletions assets/metrics-server/telemetry-service-monitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: metrics-server
app.kubernetes.io/managed-by: cluster-monitoring-operator
app.kubernetes.io/name: metrics-server
app.kubernetes.io/part-of: openshift-monitoring
monitoring.openshift.io/collection-profile: telemetry
name: metrics-server-telemetry
namespace: openshift-monitoring
spec:
endpoints:
- bearerTokenFile: ""
metricRelabelings:
- action: keep
regex: (scrape_samples_post_metric_relabeling|scrape_samples_post_metric_relabeling|scrape_series_added|up)
sourceLabels:
- __name__
port: https
scheme: https
tlsConfig:
insecureSkipVerify: false
serverName: metrics-serv.openshift-monitoring.svc
scrapeClass: tls-client-certificate-auth
selector:
matchLabels:
app.kubernetes.io/component: metrics-server
app.kubernetes.io/name: metrics-server
app.kubernetes.io/part-of: openshift-monitoring
48 changes: 48 additions & 0 deletions assets/node-exporter/telemetry-service-monitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/component: exporter
app.kubernetes.io/managed-by: cluster-monitoring-operator
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: openshift-monitoring
app.kubernetes.io/version: 1.9.1
monitoring.openshift.io/collection-profile: telemetry
name: node-exporter-telemetry
namespace: openshift-monitoring
spec:
endpoints:
- bearerTokenFile: ""
interval: 15s
metricRelabelings:
- action: replace
regex: (node_mountstats_nfs_read_bytes_total|node_mountstats_nfs_write_bytes_total|node_mountstats_nfs_operations_requests_total)
replacement: "true"
sourceLabels:
- __name__
targetLabel: __tmp_keep
- action: labeldrop
regex: __tmp_keep
- action: keep
regex: (node_cpu_info|virt_platform|node_memory_MemTotal_bytes|node_memory_MemAvailable_bytes|node_cpu_seconds_total|up|scrape_series_added|scrape_samples_post_metric_relabeling|node_accelerator_card_info)
sourceLabels:
- __name__
port: https
relabelings:
- action: replace
regex: (.*)
replacement: $1
sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: instance
scheme: https
tlsConfig:
insecureSkipVerify: false
serverName: node-expo.openshift-monitoring.svc
jobLabel: app.kubernetes.io/name
scrapeClass: tls-client-certificate-auth
selector:
matchLabels:
app.kubernetes.io/component: exporter
app.kubernetes.io/name: node-exporter
app.kubernetes.io/part-of: openshift-monitoring
46 changes: 46 additions & 0 deletions assets/openshift-state-metrics/telemetry-service-monitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app.kubernetes.io/managed-by: cluster-monitoring-operator
app.kubernetes.io/part-of: openshift-monitoring
k8s-app: openshift-state-metrics
monitoring.openshift.io/collection-profile: telemetry
name: openshift-state-metrics-telemetry
namespace: openshift-monitoring
spec:
endpoints:
- bearerTokenFile: ""
honorLabels: true
interval: 2m
metricRelabelings:
- action: keep
regex: (scrape_samples_post_metric_relabeling|scrape_series_added|up)
sourceLabels:
- __name__
port: https-main
scheme: https
scrapeTimeout: 2m
tlsConfig:
caFile: /etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt
insecureSkipVerify: false
serverName: openshift-state-metrics.openshift-monitoring.svc
- bearerTokenFile: ""
interval: 2m
metricRelabelings:
- action: keep
regex: (scrape_samples_post_metric_relabeling|scrape_series_added|up)
sourceLabels:
- __name__
port: https-self
scheme: https
scrapeTimeout: 2m
tlsConfig:
caFile: /etc/prometheus/configmaps/serving-certs-ca-bundle/service-ca.crt
insecureSkipVerify: false
serverName: openshift-state-metrics.openshift-monitoring.svc
jobLabel: k8s-app
scrapeClass: tls-client-certificate-auth
selector:
matchLabels:
k8s-app: openshift-state-metrics
Loading