diff --git a/components/monitoring/prometheus/base/monitoringstack/monitoringstack.yaml b/components/monitoring/prometheus/base/monitoringstack/monitoringstack.yaml index d1f3fd5bf93..1e2e96ea90d 100644 --- a/components/monitoring/prometheus/base/monitoringstack/monitoringstack.yaml +++ b/components/monitoring/prometheus/base/monitoringstack/monitoringstack.yaml @@ -42,23 +42,7 @@ spec: audience: # added by overlays tokenUrl: https://sso.redhat.com/auth/realms/redhat-external/protocol/openid-connect/token url: # added by overlays - writeRelabelConfigs: - - action: LabelKeep - regex: "__name__|source_environment|source_cluster|namespace|app|pod|container|\ - label_pipelines_appstudio_openshift_io_type|health_status|dest_namespace|\ - controller|service|reason|phase|type|resource|resourcequota|le|app|image|\ - commit_hash|job|operation|tokenName|rateLimited|state|persistentvolumeclaim|\ - storageclass|volumename|release_reason|instance|result|deployment_reason|\ - validation_reason|strategy|succeeded|target|name|method|code|sp|le|\ - unexpected_status|failure|hostname|label_app_kubernetes_io_managed_by|status|\ - pipeline|pipelinename|pipelinerun|schedule|check|grpc_service|grpc_code|\ - grpc_method|lease|lease_holder|deployment|platform|mode|cpu|role|node|kind|\ - verb|request_kind|tested_cluster|resource_type|exported_job|http_method|\ - http_route|http_status_code|gin_errors|rule_result|rule_execution_cause|\ - policy_name|policy_background_mode|rule_type|policy_type|policy_validation_mode|\ - resource_request_operation|resource_kind|policy_change_type|event_type" - - + # writeRelabelConfigs: # added by overlays --- # Grant permission to Federate In-Cluster Prometheus apiVersion: rbac.authorization.k8s.io/v1 @@ -94,195 +78,7 @@ spec: app.kubernetes.io/managed-by: observability-operator app.kubernetes.io/name: appstudio-federate-ms-prometheus endpoints: - - params: - 'match[]': # scrape only required metrics from in-cluster prometheus - - '{__name__="pipeline_service_schedule_overhead_percentage_sum"}' - - '{__name__="pipeline_service_schedule_overhead_percentage_count"}' - - '{__name__="pipeline_service_execution_overhead_percentage_sum"}' - - '{__name__="pipeline_service_execution_overhead_percentage_count"}' - - '{__name__="pipelinerun_duration_scheduled_seconds_sum"}' - - '{__name__="pipelinerun_duration_scheduled_seconds_count"}' - - '{__name__="pipelinerun_gap_between_taskruns_milliseconds_sum"}' - - '{__name__="pipelinerun_gap_between_taskruns_milliseconds_count"}' - - '{__name__="pipelinerun_kickoff_not_attempted_count"}' - - '{__name__="pending_resolutionrequest_count"}' - - '{__name__="taskrun_pod_create_not_attempted_or_pending_count"}' - - '{__name__="tekton_pipelines_controller_pipelinerun_count"}' - - '{__name__="tekton_pipelines_controller_running_pipelineruns_count"}' - - '{__name__="tekton_pipelines_controller_running_taskruns_throttled_by_quota_count"}' - - '{__name__="tekton_pipelines_controller_running_taskruns_throttled_by_node_count"}' - - '{__name__="tekton_pipelines_controller_running_taskruns_throttled_by_quota"}' - - '{__name__="tekton_pipelines_controller_running_taskruns_throttled_by_node"}' - - '{__name__="tekton_pipelines_controller_pipelinerun_duration_seconds_sum"}' - - '{__name__="tekton_pipelines_controller_pipelinerun_duration_seconds_count"}' - - '{__name__="watcher_workqueue_depth"}' - - '{__name__="watcher_client_latency_bucket"}' - - '{__name__="pac_watcher_work_queue_depth"}' - - '{__name__="pac_watcher_client_latency_bucket"}' - - '{__name__="grpc_server_handled_total", namespace=~"tekton-results|openshift-pipelines"}' - - '{__name__="grpc_server_handled_total", namespace=~"openshift-etcd"}' - - '{__name__="grpc_server_handling_seconds_bucket", namespace=~"tekton-results|openshift-pipelines"}' - - '{__name__="grpc_server_handling_seconds_bucket", namespace="openshift-etcd"}' - - '{__name__="grpc_server_msg_received_total", namespace="openshift-etcd"}' - - '{__name__="controller_runtime_reconcile_errors_total", namespace!~".*-tenant|openshift-.*|kube-.*"}' - - '{__name__="controller_runtime_reconcile_total", namespace!~".*-tenant|openshift-.*|kube-.*"}' - - '{__name__="kube_lease_owner", namespace="openshift-pipelines", lease=~"controller.tektonresolverframework.bundleresolver..*"}' - - '{__name__="kube_lease_owner", namespace="openshift-pipelines", lease=~"tekton-pipelines-controller.github.com.tektoncd.pipeline.pkg.reconciler..*"}' - - '{__name__="kube_pod_status_unschedulable", namespace!~".*-tenant|openshift-.*|kube-.*"}' - - '{__name__="kube_pod_container_status_restarts_total", namespace=~"openshift-pipelines|release-service"}' - - '{__name__="kube_pod_container_status_waiting_reason", namespace!~".*-tenant|openshift-.*|kube-.*"}' - - '{__name__="kube_pod_status_phase", namespace!~".*-tenant|openshift-.*|kube-.*"}' - - '{__name__="kube_pod_container_resource_limits", namespace="release-service"}' - - '{__name__="kube_pod_container_status_terminated_reason", namespace="release-service"}' - - '{__name__="kube_pod_container_status_last_terminated_reason", namespace="release-service"}' - - '{__name__="kube_pod_container_status_ready", namespace="release-service"}' - - '{__name__="kube_persistentvolume_status_phase", namespace!~".*-tenant|openshift-.*|kube-.*"}' - - '{__name__="kube_resourcequota", namespace!~".*-tenant|openshift-.*|kube-.*"}' - - '{__name__="kube_statefulset_status_replicas_ready", namespace="gitops-service-argocd"}' - - '{__name__="kube_statefulset_replicas", namespace="gitops-service-argocd"}' - - '{__name__="openshift_route_status", namespace="gitops-service-argocd"}' - - - '{__name__="kube_deployment_status_replicas_ready", namespace="gitops-service-argocd"}' - - '{__name__="kube_deployment_spec_replicas", namespace=~"gitops-service-argocd"}' - - # Namespace (expression): "build-service" - - '{__name__="kube_deployment_status_replicas_ready", namespace="build-service"}' - - '{__name__="kube_deployment_status_replicas_available", namespace="build-service"}' - - '{__name__="kube_deployment_spec_replicas", namespace="build-service"}' - - # Namespace (expression): "integration-service" - - '{__name__="kube_deployment_status_replicas_ready", namespace="integration-service"}' - - '{__name__="kube_deployment_status_replicas_available", namespace="integration-service"}' - - '{__name__="kube_deployment_spec_replicas", namespace="integration-service"}' - - # Namespace (expression): "konflux-ui" - - '{__name__="kube_deployment_status_replicas_ready", namespace="konflux-ui"}' - - '{__name__="kube_deployment_status_replicas_available", namespace="konflux-ui"}' - - '{__name__="kube_deployment_spec_replicas", namespace="konflux-ui"}' - - '{__name__="kube_running_pods_ready", namespace="konflux-ui"}' - - '{__name__="kube_endpoint_address", namespace="konflux-ui"}' - - '{__name__="kube_pod_container_status_restarts_total", namespace="konflux-ui"}' - - # Namespace (expression): "mintmaker" - - '{__name__="kube_deployment_status_replicas_ready", namespace="mintmaker"}' - - '{__name__="kube_deployment_status_replicas_available", namespace="mintmaker"}' - - '{__name__="kube_deployment_spec_replicas", namespace="mintmaker"}' - - '{__name__="cluster_ram_requested_perc"}' - - '{__name__="node_memory_pressured_perc"}' - - '{__name__="redis_node_memory_usage_perc"}' - - # Namespace (expression): ~".*monitoring.*" - - '{__name__="kube_deployment_status_replicas_ready", namespace=~".*monitoring.*"}' - - '{__name__="kube_deployment_status_replicas_available", namespace=~".*monitoring.*"}' - - '{__name__="kube_deployment_spec_replicas", namespace=~".*monitoring.*"}' - - # Namespace (expression): "multi-platform-controller" - - '{__name__="kube_deployment_status_replicas_ready", namespace="multi-platform-controller"}' - - '{__name__="kube_deployment_status_replicas_available", namespace="multi-platform-controller"}' - - '{__name__="kube_deployment_spec_replicas", namespace="multi-platform-controller"}' - - # Namespace (expression): "namespace-lister" - - '{__name__="kube_deployment_status_replicas_ready", namespace="namespace-lister"}' - - '{__name__="kube_deployment_status_replicas_available", namespace="namespace-lister"}' - - '{__name__="kube_deployment_spec_replicas", namespace="namespace-lister"}' - - # Namespace (expression): "openshift-pipelines" - - '{__name__="kube_deployment_status_replicas_ready", namespace="openshift-pipelines"}' - - '{__name__="kube_deployment_status_replicas_available", namespace="openshift-pipelines"}' - - '{__name__="kube_deployment_spec_replicas", namespace="openshift-pipelines"}' - - # Namespace (expression): "product-kubearchive" - - '{__name__="kube_deployment_status_replicas_ready", namespace="product-kubearchive"}' - - '{__name__="kube_deployment_status_replicas_available", namespace="product-kubearchive"}' - - '{__name__="kube_deployment_spec_replicas", namespace="product-kubearchive"}' - - # Namespace (expression): "project-controller" - - '{__name__="kube_deployment_status_replicas_ready", namespace="project-controller"}' - - '{__name__="kube_deployment_status_replicas_available", namespace="project-controller"}' - - '{__name__="kube_deployment_spec_replicas", namespace="project-controller"}' - - # Namespace (expression): "release-service" - - '{__name__="kube_deployment_status_replicas_ready", namespace="release-service"}' - - '{__name__="kube_deployment_status_replicas_available", namespace="release-service"}' - - '{__name__="kube_deployment_spec_replicas", namespace="release-service"}' - - # Namespace (expression): ~"smee.*" - - '{__name__="kube_deployment_status_replicas_ready", namespace=~"smee.*"}' - - '{__name__="kube_deployment_status_replicas_available", namespace=~"smee.*"}' - - '{__name__="kube_deployment_spec_replicas", namespace=~"smee.*"}' - - # Namespace (expression): "openshift-apiserver" - - '{__name__="kube_deployment_status_replicas_ready", namespace="openshift-apiserver"}' - - '{__name__="kube_deployment_status_replicas_available", namespace="openshift-apiserver"}' - - '{__name__="kube_deployment_spec_replicas", namespace="openshift-apiserver"}' - - # Namespace (expression): "openshift-oauth-apiserver" - - '{__name__="kube_deployment_status_replicas_ready", namespace="openshift-oauth-apiserver"}' - - '{__name__="kube_deployment_status_replicas_available", namespace="openshift-oauth-apiserver"}' - - '{__name__="kube_deployment_spec_replicas", namespace="openshift-oauth-apiserver"}' - - # Namespace (expression): "konflux-kyverno" - - '{__name__="kube_deployment_status_replicas_ready", namespace="konflux-kyverno"}' - - '{__name__="kube_deployment_status_replicas_available", namespace="konflux-kyverno"}' - - '{__name__="kube_deployment_spec_replicas", namespace="konflux-kyverno"}' - - # Namespace (expression): "openshift-kube-apiserver" - - '{__name__="kube_deployment_status_replicas_ready", namespace="openshift-kube-apiserver"}' - - '{__name__="kube_deployment_status_replicas_available", namespace="openshift-kube-apiserver"}' - - '{__name__="kube_deployment_spec_replicas", namespace="openshift-kube-apiserver"}' - - # Namespace (expression): "konflux-user-support" - - '{__name__="kube_deployment_status_replicas_available", namespace="konflux-user-support"}' - - '{__name__="kube_deployment_spec_replicas", namespace="konflux-user-support"}' - - - '{__name__="argocd_app_reconcile_bucket", namespace="gitops-service-argocd"}' - - '{__name__="argocd_app_info", namespace="gitops-service-argocd"}' - - '{__name__="container_cpu_usage_seconds_total", namespace="release-service"}' - - '{__name__="container_cpu_usage_seconds_total", namespace="openshift-etcd"}' - - '{__name__="container_memory_usage_bytes", namespace="release-service"}' - - '{__name__="container_memory_usage_bytes", namespace="openshift-etcd"}' - - '{__name__="etcd_disk_wal_fsync_duration_seconds_bucket"}' - - '{__name__="etcd_disk_backend_commit_duration_seconds_bucket"}' - - '{__name__="etcd_server_proposals_failed_total"}' - - '{__name__="etcd_server_leader_changes_seen_total", namespace="openshift-etcd"}' - - '{__name__="etcd_server_has_leader", namespace="openshift-etcd"}' - - '{__name__="etcd_server_is_leader", namespace="openshift-etcd"}' - - '{__name__="etcd_server_id", namespace="openshift-etcd"}' - - '{__name__="etcd_server_quota_backend_bytes", namespace="openshift-etcd"}' - - '{__name__="etcd_mvcc_db_total_size_in_bytes", namespace="openshift-etcd"}' - - '{__name__="etcd_server_received_total", namespace="openshift-etcd"}' - - '{__name__="etcd_network_active_peers", namespace="openshift-etcd"}' - - '{__name__="etcd_network_peer_round_trip_time_seconds_bucket"}' - - '{__name__="etcd_disk_defrag_inflight"}' - - '{__name__="kube_job_spec_completions"}' - - '{__name__="kube_job_status_succeeded"}' - - '{__name__="kube_job_status_failed"}' - - '{__name__="node_cpu_seconds_total", mode="idle"}' - - '{__name__="node_memory_MemTotal_bytes"}' - - '{__name__="node_memory_MemAvailable_bytes"}' - - '{__name__="platform:hypershift_hostedclusters:max"}' - - '{__name__="kube_node_role"}' - - '{__name__="etcd_shield_trigger"}' - - '{__name__="etcd_shield_alert_triggered"}' - - '{__name__="apiserver_admission_webhook_rejection_count", name="vpipelineruns.konflux-ci.dev"}' - - '{__name__="apiserver_watch_events_total"}' - - '{__name__="apiserver_storage_objects"}' - - '{__name__="apiserver_current_inflight_requests"}' - - '{__name__="resource_verb:apiserver_request_total:rate5m"}' - - '{__name__="code:apiserver_request_total:rate5m"}' - - '{__name__="instance:apiserver_request_total:rate5m"}' - - '{__name__="prometheus_ready"}' - - '{__name__="process_cpu_seconds_total", job="apiserver"}' - - '{__name__="namespace:container_memory_usage_bytes:sum", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' - - '{__name__="namespace:container_cpu_usage:sum", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' - - '{__name__="node_namespace_pod:kube_pod_info:", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' - - '{__name__="kube_node_status_allocatable", resource=~"cpu|memory"}' - - '{__name__="kube_node_status_condition", condition="MemoryPressure", status="true"}' - - '{__name__="namespace_memory:kube_pod_container_resource_requests:sum", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' - - '{__name__="namespace_cpu:kube_pod_container_resource_requests:sum", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' - - '{__name__="namespace_memory:kube_pod_container_resource_limits:sum", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' - - '{__name__="namespace_cpu:kube_pod_container_resource_limits:sum", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' - + - params: {} # added by overlays relabelings: # override the target's address by the prometheus-k8s service name. - action: replace diff --git a/components/monitoring/prometheus/development/monitoringstack/cluster-type-patch.yaml b/components/monitoring/prometheus/development/monitoringstack/cluster-type-patch.yaml index b1bd0a71f9e..e71a82172f3 100644 --- a/components/monitoring/prometheus/development/monitoringstack/cluster-type-patch.yaml +++ b/components/monitoring/prometheus/development/monitoringstack/cluster-type-patch.yaml @@ -1,5 +1,5 @@ --- -- op: add +- op: replace path: /spec/endpoints/0/relabelings/0 value: targetLabel: source_environment diff --git a/components/monitoring/prometheus/development/monitoringstack/kustomization.yaml b/components/monitoring/prometheus/development/monitoringstack/kustomization.yaml index dbe28f09be3..3d1488f0b6a 100644 --- a/components/monitoring/prometheus/development/monitoringstack/kustomization.yaml +++ b/components/monitoring/prometheus/development/monitoringstack/kustomization.yaml @@ -2,7 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - ../../base/observability-operator - - ../../base/monitoringstack + - ../../staging/base/monitoringstack patches: - path: cluster-type-patch.yaml target: diff --git a/components/monitoring/prometheus/production/base/monitoringstack/endpoints-params.yaml b/components/monitoring/prometheus/production/base/monitoringstack/endpoints-params.yaml new file mode 100644 index 00000000000..5eef6feb9df --- /dev/null +++ b/components/monitoring/prometheus/production/base/monitoringstack/endpoints-params.yaml @@ -0,0 +1,191 @@ +--- +- op: replace + path: /spec/endpoints/0/params + value: + 'match[]': # scrape only required metrics from in-cluster prometheus + - '{__name__="pipeline_service_schedule_overhead_percentage_sum"}' + - '{__name__="pipeline_service_schedule_overhead_percentage_count"}' + - '{__name__="pipeline_service_execution_overhead_percentage_sum"}' + - '{__name__="pipeline_service_execution_overhead_percentage_count"}' + - '{__name__="pipelinerun_duration_scheduled_seconds_sum"}' + - '{__name__="pipelinerun_duration_scheduled_seconds_count"}' + - '{__name__="pipelinerun_gap_between_taskruns_milliseconds_sum"}' + - '{__name__="pipelinerun_gap_between_taskruns_milliseconds_count"}' + - '{__name__="pipelinerun_kickoff_not_attempted_count"}' + - '{__name__="pending_resolutionrequest_count"}' + - '{__name__="taskrun_pod_create_not_attempted_or_pending_count"}' + - '{__name__="tekton_pipelines_controller_pipelinerun_count"}' + - '{__name__="tekton_pipelines_controller_running_pipelineruns_count"}' + - '{__name__="tekton_pipelines_controller_running_taskruns_throttled_by_quota_count"}' + - '{__name__="tekton_pipelines_controller_running_taskruns_throttled_by_node_count"}' + - '{__name__="tekton_pipelines_controller_running_taskruns_throttled_by_quota"}' + - '{__name__="tekton_pipelines_controller_running_taskruns_throttled_by_node"}' + - '{__name__="tekton_pipelines_controller_pipelinerun_duration_seconds_sum"}' + - '{__name__="tekton_pipelines_controller_pipelinerun_duration_seconds_count"}' + - '{__name__="watcher_workqueue_depth"}' + - '{__name__="watcher_client_latency_bucket"}' + - '{__name__="pac_watcher_work_queue_depth"}' + - '{__name__="pac_watcher_client_latency_bucket"}' + - '{__name__="grpc_server_handled_total", namespace=~"tekton-results|openshift-pipelines"}' + - '{__name__="grpc_server_handled_total", namespace=~"openshift-etcd"}' + - '{__name__="grpc_server_handling_seconds_bucket", namespace=~"tekton-results|openshift-pipelines"}' + - '{__name__="grpc_server_handling_seconds_bucket", namespace="openshift-etcd"}' + - '{__name__="grpc_server_msg_received_total", namespace="openshift-etcd"}' + - '{__name__="controller_runtime_reconcile_errors_total", namespace!~".*-tenant|openshift-.*|kube-.*"}' + - '{__name__="controller_runtime_reconcile_total", namespace!~".*-tenant|openshift-.*|kube-.*"}' + - '{__name__="kube_lease_owner", namespace="openshift-pipelines", lease=~"controller.tektonresolverframework.bundleresolver..*"}' + - '{__name__="kube_lease_owner", namespace="openshift-pipelines", lease=~"tekton-pipelines-controller.github.com.tektoncd.pipeline.pkg.reconciler..*"}' + - '{__name__="kube_pod_status_unschedulable", namespace!~".*-tenant|openshift-.*|kube-.*"}' + - '{__name__="kube_pod_container_status_restarts_total", namespace=~"openshift-pipelines|release-service"}' + - '{__name__="kube_pod_container_status_waiting_reason", namespace!~".*-tenant|openshift-.*|kube-.*"}' + - '{__name__="kube_pod_status_phase", namespace!~".*-tenant|openshift-.*|kube-.*"}' + - '{__name__="kube_pod_container_resource_limits", namespace="release-service"}' + - '{__name__="kube_pod_container_status_terminated_reason", namespace="release-service"}' + - '{__name__="kube_pod_container_status_last_terminated_reason", namespace="release-service"}' + - '{__name__="kube_pod_container_status_ready", namespace="release-service"}' + - '{__name__="kube_persistentvolume_status_phase", namespace!~".*-tenant|openshift-.*|kube-.*"}' + - '{__name__="kube_resourcequota", namespace!~".*-tenant|openshift-.*|kube-.*"}' + - '{__name__="kube_statefulset_status_replicas_ready", namespace="gitops-service-argocd"}' + - '{__name__="kube_statefulset_replicas", namespace="gitops-service-argocd"}' + - '{__name__="openshift_route_status", namespace="gitops-service-argocd"}' + + - '{__name__="kube_deployment_status_replicas_ready", namespace="gitops-service-argocd"}' + - '{__name__="kube_deployment_spec_replicas", namespace=~"gitops-service-argocd"}' + + # Namespace (expression): "build-service" + - '{__name__="kube_deployment_status_replicas_ready", namespace="build-service"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="build-service"}' + - '{__name__="kube_deployment_spec_replicas", namespace="build-service"}' + + # Namespace (expression): "integration-service" + - '{__name__="kube_deployment_status_replicas_ready", namespace="integration-service"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="integration-service"}' + - '{__name__="kube_deployment_spec_replicas", namespace="integration-service"}' + + # Namespace (expression): "konflux-ui" + - '{__name__="kube_deployment_status_replicas_ready", namespace="konflux-ui"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="konflux-ui"}' + - '{__name__="kube_deployment_spec_replicas", namespace="konflux-ui"}' + - '{__name__="kube_running_pods_ready", namespace="konflux-ui"}' + - '{__name__="kube_endpoint_address", namespace="konflux-ui"}' + - '{__name__="kube_pod_container_status_restarts_total", namespace="konflux-ui"}' + + # Namespace (expression): "mintmaker" + - '{__name__="kube_deployment_status_replicas_ready", namespace="mintmaker"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="mintmaker"}' + - '{__name__="kube_deployment_spec_replicas", namespace="mintmaker"}' + - '{__name__="cluster_ram_requested_perc"}' + - '{__name__="node_memory_pressured_perc"}' + - '{__name__="redis_node_memory_usage_perc"}' + + # Namespace (expression): ~".*monitoring.*" + - '{__name__="kube_deployment_status_replicas_ready", namespace=~".*monitoring.*"}' + - '{__name__="kube_deployment_status_replicas_available", namespace=~".*monitoring.*"}' + - '{__name__="kube_deployment_spec_replicas", namespace=~".*monitoring.*"}' + + # Namespace (expression): "multi-platform-controller" + - '{__name__="kube_deployment_status_replicas_ready", namespace="multi-platform-controller"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="multi-platform-controller"}' + - '{__name__="kube_deployment_spec_replicas", namespace="multi-platform-controller"}' + + # Namespace (expression): "namespace-lister" + - '{__name__="kube_deployment_status_replicas_ready", namespace="namespace-lister"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="namespace-lister"}' + - '{__name__="kube_deployment_spec_replicas", namespace="namespace-lister"}' + + # Namespace (expression): "openshift-pipelines" + - '{__name__="kube_deployment_status_replicas_ready", namespace="openshift-pipelines"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="openshift-pipelines"}' + - '{__name__="kube_deployment_spec_replicas", namespace="openshift-pipelines"}' + + # Namespace (expression): "product-kubearchive" + - '{__name__="kube_deployment_status_replicas_ready", namespace="product-kubearchive"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="product-kubearchive"}' + - '{__name__="kube_deployment_spec_replicas", namespace="product-kubearchive"}' + + # Namespace (expression): "project-controller" + - '{__name__="kube_deployment_status_replicas_ready", namespace="project-controller"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="project-controller"}' + - '{__name__="kube_deployment_spec_replicas", namespace="project-controller"}' + + # Namespace (expression): "release-service" + - '{__name__="kube_deployment_status_replicas_ready", namespace="release-service"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="release-service"}' + - '{__name__="kube_deployment_spec_replicas", namespace="release-service"}' + + # Namespace (expression): ~"smee.*" + - '{__name__="kube_deployment_status_replicas_ready", namespace=~"smee.*"}' + - '{__name__="kube_deployment_status_replicas_available", namespace=~"smee.*"}' + - '{__name__="kube_deployment_spec_replicas", namespace=~"smee.*"}' + + # Namespace (expression): "openshift-apiserver" + - '{__name__="kube_deployment_status_replicas_ready", namespace="openshift-apiserver"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="openshift-apiserver"}' + - '{__name__="kube_deployment_spec_replicas", namespace="openshift-apiserver"}' + + # Namespace (expression): "openshift-oauth-apiserver" + - '{__name__="kube_deployment_status_replicas_ready", namespace="openshift-oauth-apiserver"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="openshift-oauth-apiserver"}' + - '{__name__="kube_deployment_spec_replicas", namespace="openshift-oauth-apiserver"}' + + # Namespace (expression): "konflux-kyverno" + - '{__name__="kube_deployment_status_replicas_ready", namespace="konflux-kyverno"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="konflux-kyverno"}' + - '{__name__="kube_deployment_spec_replicas", namespace="konflux-kyverno"}' + + # Namespace (expression): "openshift-kube-apiserver" + - '{__name__="kube_deployment_status_replicas_ready", namespace="openshift-kube-apiserver"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="openshift-kube-apiserver"}' + - '{__name__="kube_deployment_spec_replicas", namespace="openshift-kube-apiserver"}' + + # Namespace (expression): "konflux-user-support" + - '{__name__="kube_deployment_status_replicas_available", namespace="konflux-user-support"}' + - '{__name__="kube_deployment_spec_replicas", namespace="konflux-user-support"}' + + - '{__name__="argocd_app_reconcile_bucket", namespace="gitops-service-argocd"}' + - '{__name__="argocd_app_info", namespace="gitops-service-argocd"}' + - '{__name__="container_cpu_usage_seconds_total", namespace="release-service"}' + - '{__name__="container_cpu_usage_seconds_total", namespace="openshift-etcd"}' + - '{__name__="container_memory_usage_bytes", namespace="release-service"}' + - '{__name__="container_memory_usage_bytes", namespace="openshift-etcd"}' + - '{__name__="etcd_disk_wal_fsync_duration_seconds_bucket"}' + - '{__name__="etcd_disk_backend_commit_duration_seconds_bucket"}' + - '{__name__="etcd_server_proposals_failed_total"}' + - '{__name__="etcd_server_leader_changes_seen_total", namespace="openshift-etcd"}' + - '{__name__="etcd_server_has_leader", namespace="openshift-etcd"}' + - '{__name__="etcd_server_is_leader", namespace="openshift-etcd"}' + - '{__name__="etcd_server_id", namespace="openshift-etcd"}' + - '{__name__="etcd_server_quota_backend_bytes", namespace="openshift-etcd"}' + - '{__name__="etcd_mvcc_db_total_size_in_bytes", namespace="openshift-etcd"}' + - '{__name__="etcd_server_received_total", namespace="openshift-etcd"}' + - '{__name__="etcd_network_active_peers", namespace="openshift-etcd"}' + - '{__name__="etcd_network_peer_round_trip_time_seconds_bucket"}' + - '{__name__="etcd_disk_defrag_inflight"}' + - '{__name__="kube_job_spec_completions"}' + - '{__name__="kube_job_status_succeeded"}' + - '{__name__="kube_job_status_failed"}' + - '{__name__="node_cpu_seconds_total", mode="idle"}' + - '{__name__="node_memory_MemTotal_bytes"}' + - '{__name__="node_memory_MemAvailable_bytes"}' + - '{__name__="platform:hypershift_hostedclusters:max"}' + - '{__name__="kube_node_role"}' + - '{__name__="etcd_shield_trigger"}' + - '{__name__="etcd_shield_alert_triggered"}' + - '{__name__="apiserver_admission_webhook_rejection_count", name="vpipelineruns.konflux-ci.dev"}' + - '{__name__="apiserver_watch_events_total"}' + - '{__name__="apiserver_storage_objects"}' + - '{__name__="apiserver_current_inflight_requests"}' + - '{__name__="resource_verb:apiserver_request_total:rate5m"}' + - '{__name__="code:apiserver_request_total:rate5m"}' + - '{__name__="instance:apiserver_request_total:rate5m"}' + - '{__name__="prometheus_ready"}' + - '{__name__="process_cpu_seconds_total", job="apiserver"}' + - '{__name__="namespace:container_memory_usage_bytes:sum", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' + - '{__name__="namespace:container_cpu_usage:sum", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' + - '{__name__="node_namespace_pod:kube_pod_info:", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' + - '{__name__="kube_node_status_allocatable", resource=~"cpu|memory"}' + - '{__name__="kube_node_status_condition", condition="MemoryPressure", status="true"}' + - '{__name__="namespace_memory:kube_pod_container_resource_requests:sum", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' + - '{__name__="namespace_cpu:kube_pod_container_resource_requests:sum", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' + - '{__name__="namespace_memory:kube_pod_container_resource_limits:sum", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' + - '{__name__="namespace_cpu:kube_pod_container_resource_limits:sum", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' diff --git a/components/monitoring/prometheus/production/base/monitoringstack/kustomization.yaml b/components/monitoring/prometheus/production/base/monitoringstack/kustomization.yaml index 43dbb53ef49..b10a5f4ce28 100644 --- a/components/monitoring/prometheus/production/base/monitoringstack/kustomization.yaml +++ b/components/monitoring/prometheus/production/base/monitoringstack/kustomization.yaml @@ -7,6 +7,10 @@ patches: target: name: appstudio-federate-smon kind: ServiceMonitor + - path: endpoints-params.yaml + target: + name: appstudio-federate-smon + kind: ServiceMonitor - path: cluster-type-patch.yaml target: name: appstudio-federate-uwm-smon @@ -15,6 +19,10 @@ patches: target: name: appstudio-federate-ms kind: MonitoringStack + - path: writeRelabelConfigs.yaml + target: + name: appstudio-federate-ms + kind: MonitoringStack commonAnnotations: argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true diff --git a/components/monitoring/prometheus/production/base/monitoringstack/writeRelabelConfigs.yaml b/components/monitoring/prometheus/production/base/monitoringstack/writeRelabelConfigs.yaml new file mode 100644 index 00000000000..8caaa8f4642 --- /dev/null +++ b/components/monitoring/prometheus/production/base/monitoringstack/writeRelabelConfigs.yaml @@ -0,0 +1,18 @@ +--- +- op: replace + path: /spec/prometheusConfig/remoteWrite/0/writeRelabelConfigs + value: + - action: LabelKeep + regex: "__name__|source_environment|source_cluster|namespace|app|pod|container|\ + label_pipelines_appstudio_openshift_io_type|health_status|dest_namespace|\ + controller|service|reason|phase|type|resource|resourcequota|le|app|image|\ + commit_hash|job|operation|tokenName|rateLimited|state|persistentvolumeclaim|\ + storageclass|volumename|release_reason|instance|result|deployment_reason|\ + validation_reason|strategy|succeeded|target|name|method|code|sp|le|\ + unexpected_status|failure|hostname|label_app_kubernetes_io_managed_by|status|\ + pipeline|pipelinename|pipelinerun|schedule|check|grpc_service|grpc_code|\ + grpc_method|lease|lease_holder|deployment|platform|mode|cpu|role|node|kind|\ + verb|request_kind|tested_cluster|resource_type|exported_job|http_method|\ + http_route|http_status_code|gin_errors|rule_result|rule_execution_cause|\ + policy_name|policy_background_mode|rule_type|policy_type|policy_validation_mode|\ + resource_request_operation|resource_kind|policy_change_type|event_type" diff --git a/components/monitoring/prometheus/staging/base/monitoringstack/endpoints-params.yaml b/components/monitoring/prometheus/staging/base/monitoringstack/endpoints-params.yaml new file mode 100644 index 00000000000..5eef6feb9df --- /dev/null +++ b/components/monitoring/prometheus/staging/base/monitoringstack/endpoints-params.yaml @@ -0,0 +1,191 @@ +--- +- op: replace + path: /spec/endpoints/0/params + value: + 'match[]': # scrape only required metrics from in-cluster prometheus + - '{__name__="pipeline_service_schedule_overhead_percentage_sum"}' + - '{__name__="pipeline_service_schedule_overhead_percentage_count"}' + - '{__name__="pipeline_service_execution_overhead_percentage_sum"}' + - '{__name__="pipeline_service_execution_overhead_percentage_count"}' + - '{__name__="pipelinerun_duration_scheduled_seconds_sum"}' + - '{__name__="pipelinerun_duration_scheduled_seconds_count"}' + - '{__name__="pipelinerun_gap_between_taskruns_milliseconds_sum"}' + - '{__name__="pipelinerun_gap_between_taskruns_milliseconds_count"}' + - '{__name__="pipelinerun_kickoff_not_attempted_count"}' + - '{__name__="pending_resolutionrequest_count"}' + - '{__name__="taskrun_pod_create_not_attempted_or_pending_count"}' + - '{__name__="tekton_pipelines_controller_pipelinerun_count"}' + - '{__name__="tekton_pipelines_controller_running_pipelineruns_count"}' + - '{__name__="tekton_pipelines_controller_running_taskruns_throttled_by_quota_count"}' + - '{__name__="tekton_pipelines_controller_running_taskruns_throttled_by_node_count"}' + - '{__name__="tekton_pipelines_controller_running_taskruns_throttled_by_quota"}' + - '{__name__="tekton_pipelines_controller_running_taskruns_throttled_by_node"}' + - '{__name__="tekton_pipelines_controller_pipelinerun_duration_seconds_sum"}' + - '{__name__="tekton_pipelines_controller_pipelinerun_duration_seconds_count"}' + - '{__name__="watcher_workqueue_depth"}' + - '{__name__="watcher_client_latency_bucket"}' + - '{__name__="pac_watcher_work_queue_depth"}' + - '{__name__="pac_watcher_client_latency_bucket"}' + - '{__name__="grpc_server_handled_total", namespace=~"tekton-results|openshift-pipelines"}' + - '{__name__="grpc_server_handled_total", namespace=~"openshift-etcd"}' + - '{__name__="grpc_server_handling_seconds_bucket", namespace=~"tekton-results|openshift-pipelines"}' + - '{__name__="grpc_server_handling_seconds_bucket", namespace="openshift-etcd"}' + - '{__name__="grpc_server_msg_received_total", namespace="openshift-etcd"}' + - '{__name__="controller_runtime_reconcile_errors_total", namespace!~".*-tenant|openshift-.*|kube-.*"}' + - '{__name__="controller_runtime_reconcile_total", namespace!~".*-tenant|openshift-.*|kube-.*"}' + - '{__name__="kube_lease_owner", namespace="openshift-pipelines", lease=~"controller.tektonresolverframework.bundleresolver..*"}' + - '{__name__="kube_lease_owner", namespace="openshift-pipelines", lease=~"tekton-pipelines-controller.github.com.tektoncd.pipeline.pkg.reconciler..*"}' + - '{__name__="kube_pod_status_unschedulable", namespace!~".*-tenant|openshift-.*|kube-.*"}' + - '{__name__="kube_pod_container_status_restarts_total", namespace=~"openshift-pipelines|release-service"}' + - '{__name__="kube_pod_container_status_waiting_reason", namespace!~".*-tenant|openshift-.*|kube-.*"}' + - '{__name__="kube_pod_status_phase", namespace!~".*-tenant|openshift-.*|kube-.*"}' + - '{__name__="kube_pod_container_resource_limits", namespace="release-service"}' + - '{__name__="kube_pod_container_status_terminated_reason", namespace="release-service"}' + - '{__name__="kube_pod_container_status_last_terminated_reason", namespace="release-service"}' + - '{__name__="kube_pod_container_status_ready", namespace="release-service"}' + - '{__name__="kube_persistentvolume_status_phase", namespace!~".*-tenant|openshift-.*|kube-.*"}' + - '{__name__="kube_resourcequota", namespace!~".*-tenant|openshift-.*|kube-.*"}' + - '{__name__="kube_statefulset_status_replicas_ready", namespace="gitops-service-argocd"}' + - '{__name__="kube_statefulset_replicas", namespace="gitops-service-argocd"}' + - '{__name__="openshift_route_status", namespace="gitops-service-argocd"}' + + - '{__name__="kube_deployment_status_replicas_ready", namespace="gitops-service-argocd"}' + - '{__name__="kube_deployment_spec_replicas", namespace=~"gitops-service-argocd"}' + + # Namespace (expression): "build-service" + - '{__name__="kube_deployment_status_replicas_ready", namespace="build-service"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="build-service"}' + - '{__name__="kube_deployment_spec_replicas", namespace="build-service"}' + + # Namespace (expression): "integration-service" + - '{__name__="kube_deployment_status_replicas_ready", namespace="integration-service"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="integration-service"}' + - '{__name__="kube_deployment_spec_replicas", namespace="integration-service"}' + + # Namespace (expression): "konflux-ui" + - '{__name__="kube_deployment_status_replicas_ready", namespace="konflux-ui"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="konflux-ui"}' + - '{__name__="kube_deployment_spec_replicas", namespace="konflux-ui"}' + - '{__name__="kube_running_pods_ready", namespace="konflux-ui"}' + - '{__name__="kube_endpoint_address", namespace="konflux-ui"}' + - '{__name__="kube_pod_container_status_restarts_total", namespace="konflux-ui"}' + + # Namespace (expression): "mintmaker" + - '{__name__="kube_deployment_status_replicas_ready", namespace="mintmaker"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="mintmaker"}' + - '{__name__="kube_deployment_spec_replicas", namespace="mintmaker"}' + - '{__name__="cluster_ram_requested_perc"}' + - '{__name__="node_memory_pressured_perc"}' + - '{__name__="redis_node_memory_usage_perc"}' + + # Namespace (expression): ~".*monitoring.*" + - '{__name__="kube_deployment_status_replicas_ready", namespace=~".*monitoring.*"}' + - '{__name__="kube_deployment_status_replicas_available", namespace=~".*monitoring.*"}' + - '{__name__="kube_deployment_spec_replicas", namespace=~".*monitoring.*"}' + + # Namespace (expression): "multi-platform-controller" + - '{__name__="kube_deployment_status_replicas_ready", namespace="multi-platform-controller"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="multi-platform-controller"}' + - '{__name__="kube_deployment_spec_replicas", namespace="multi-platform-controller"}' + + # Namespace (expression): "namespace-lister" + - '{__name__="kube_deployment_status_replicas_ready", namespace="namespace-lister"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="namespace-lister"}' + - '{__name__="kube_deployment_spec_replicas", namespace="namespace-lister"}' + + # Namespace (expression): "openshift-pipelines" + - '{__name__="kube_deployment_status_replicas_ready", namespace="openshift-pipelines"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="openshift-pipelines"}' + - '{__name__="kube_deployment_spec_replicas", namespace="openshift-pipelines"}' + + # Namespace (expression): "product-kubearchive" + - '{__name__="kube_deployment_status_replicas_ready", namespace="product-kubearchive"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="product-kubearchive"}' + - '{__name__="kube_deployment_spec_replicas", namespace="product-kubearchive"}' + + # Namespace (expression): "project-controller" + - '{__name__="kube_deployment_status_replicas_ready", namespace="project-controller"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="project-controller"}' + - '{__name__="kube_deployment_spec_replicas", namespace="project-controller"}' + + # Namespace (expression): "release-service" + - '{__name__="kube_deployment_status_replicas_ready", namespace="release-service"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="release-service"}' + - '{__name__="kube_deployment_spec_replicas", namespace="release-service"}' + + # Namespace (expression): ~"smee.*" + - '{__name__="kube_deployment_status_replicas_ready", namespace=~"smee.*"}' + - '{__name__="kube_deployment_status_replicas_available", namespace=~"smee.*"}' + - '{__name__="kube_deployment_spec_replicas", namespace=~"smee.*"}' + + # Namespace (expression): "openshift-apiserver" + - '{__name__="kube_deployment_status_replicas_ready", namespace="openshift-apiserver"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="openshift-apiserver"}' + - '{__name__="kube_deployment_spec_replicas", namespace="openshift-apiserver"}' + + # Namespace (expression): "openshift-oauth-apiserver" + - '{__name__="kube_deployment_status_replicas_ready", namespace="openshift-oauth-apiserver"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="openshift-oauth-apiserver"}' + - '{__name__="kube_deployment_spec_replicas", namespace="openshift-oauth-apiserver"}' + + # Namespace (expression): "konflux-kyverno" + - '{__name__="kube_deployment_status_replicas_ready", namespace="konflux-kyverno"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="konflux-kyverno"}' + - '{__name__="kube_deployment_spec_replicas", namespace="konflux-kyverno"}' + + # Namespace (expression): "openshift-kube-apiserver" + - '{__name__="kube_deployment_status_replicas_ready", namespace="openshift-kube-apiserver"}' + - '{__name__="kube_deployment_status_replicas_available", namespace="openshift-kube-apiserver"}' + - '{__name__="kube_deployment_spec_replicas", namespace="openshift-kube-apiserver"}' + + # Namespace (expression): "konflux-user-support" + - '{__name__="kube_deployment_status_replicas_available", namespace="konflux-user-support"}' + - '{__name__="kube_deployment_spec_replicas", namespace="konflux-user-support"}' + + - '{__name__="argocd_app_reconcile_bucket", namespace="gitops-service-argocd"}' + - '{__name__="argocd_app_info", namespace="gitops-service-argocd"}' + - '{__name__="container_cpu_usage_seconds_total", namespace="release-service"}' + - '{__name__="container_cpu_usage_seconds_total", namespace="openshift-etcd"}' + - '{__name__="container_memory_usage_bytes", namespace="release-service"}' + - '{__name__="container_memory_usage_bytes", namespace="openshift-etcd"}' + - '{__name__="etcd_disk_wal_fsync_duration_seconds_bucket"}' + - '{__name__="etcd_disk_backend_commit_duration_seconds_bucket"}' + - '{__name__="etcd_server_proposals_failed_total"}' + - '{__name__="etcd_server_leader_changes_seen_total", namespace="openshift-etcd"}' + - '{__name__="etcd_server_has_leader", namespace="openshift-etcd"}' + - '{__name__="etcd_server_is_leader", namespace="openshift-etcd"}' + - '{__name__="etcd_server_id", namespace="openshift-etcd"}' + - '{__name__="etcd_server_quota_backend_bytes", namespace="openshift-etcd"}' + - '{__name__="etcd_mvcc_db_total_size_in_bytes", namespace="openshift-etcd"}' + - '{__name__="etcd_server_received_total", namespace="openshift-etcd"}' + - '{__name__="etcd_network_active_peers", namespace="openshift-etcd"}' + - '{__name__="etcd_network_peer_round_trip_time_seconds_bucket"}' + - '{__name__="etcd_disk_defrag_inflight"}' + - '{__name__="kube_job_spec_completions"}' + - '{__name__="kube_job_status_succeeded"}' + - '{__name__="kube_job_status_failed"}' + - '{__name__="node_cpu_seconds_total", mode="idle"}' + - '{__name__="node_memory_MemTotal_bytes"}' + - '{__name__="node_memory_MemAvailable_bytes"}' + - '{__name__="platform:hypershift_hostedclusters:max"}' + - '{__name__="kube_node_role"}' + - '{__name__="etcd_shield_trigger"}' + - '{__name__="etcd_shield_alert_triggered"}' + - '{__name__="apiserver_admission_webhook_rejection_count", name="vpipelineruns.konflux-ci.dev"}' + - '{__name__="apiserver_watch_events_total"}' + - '{__name__="apiserver_storage_objects"}' + - '{__name__="apiserver_current_inflight_requests"}' + - '{__name__="resource_verb:apiserver_request_total:rate5m"}' + - '{__name__="code:apiserver_request_total:rate5m"}' + - '{__name__="instance:apiserver_request_total:rate5m"}' + - '{__name__="prometheus_ready"}' + - '{__name__="process_cpu_seconds_total", job="apiserver"}' + - '{__name__="namespace:container_memory_usage_bytes:sum", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' + - '{__name__="namespace:container_cpu_usage:sum", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' + - '{__name__="node_namespace_pod:kube_pod_info:", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' + - '{__name__="kube_node_status_allocatable", resource=~"cpu|memory"}' + - '{__name__="kube_node_status_condition", condition="MemoryPressure", status="true"}' + - '{__name__="namespace_memory:kube_pod_container_resource_requests:sum", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' + - '{__name__="namespace_cpu:kube_pod_container_resource_requests:sum", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' + - '{__name__="namespace_memory:kube_pod_container_resource_limits:sum", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' + - '{__name__="namespace_cpu:kube_pod_container_resource_limits:sum", namespace=~"openshift-etcd|openshift-kube-apiserver|build-service|image-controller|integration-service|konflux-ui|product-kubearchive|openshift-kueue-operator|tekton-kueue|kueue-external-admission|mintmaker|multi-platform-controller|namespace-lister|openshift-pipelines|tekton-results|project-controller|smee|smee-client"}' diff --git a/components/monitoring/prometheus/staging/base/monitoringstack/kustomization.yaml b/components/monitoring/prometheus/staging/base/monitoringstack/kustomization.yaml index 43dbb53ef49..b10a5f4ce28 100644 --- a/components/monitoring/prometheus/staging/base/monitoringstack/kustomization.yaml +++ b/components/monitoring/prometheus/staging/base/monitoringstack/kustomization.yaml @@ -7,6 +7,10 @@ patches: target: name: appstudio-federate-smon kind: ServiceMonitor + - path: endpoints-params.yaml + target: + name: appstudio-federate-smon + kind: ServiceMonitor - path: cluster-type-patch.yaml target: name: appstudio-federate-uwm-smon @@ -15,6 +19,10 @@ patches: target: name: appstudio-federate-ms kind: MonitoringStack + - path: writeRelabelConfigs.yaml + target: + name: appstudio-federate-ms + kind: MonitoringStack commonAnnotations: argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true diff --git a/components/monitoring/prometheus/staging/base/monitoringstack/writeRelabelConfigs.yaml b/components/monitoring/prometheus/staging/base/monitoringstack/writeRelabelConfigs.yaml new file mode 100644 index 00000000000..8caaa8f4642 --- /dev/null +++ b/components/monitoring/prometheus/staging/base/monitoringstack/writeRelabelConfigs.yaml @@ -0,0 +1,18 @@ +--- +- op: replace + path: /spec/prometheusConfig/remoteWrite/0/writeRelabelConfigs + value: + - action: LabelKeep + regex: "__name__|source_environment|source_cluster|namespace|app|pod|container|\ + label_pipelines_appstudio_openshift_io_type|health_status|dest_namespace|\ + controller|service|reason|phase|type|resource|resourcequota|le|app|image|\ + commit_hash|job|operation|tokenName|rateLimited|state|persistentvolumeclaim|\ + storageclass|volumename|release_reason|instance|result|deployment_reason|\ + validation_reason|strategy|succeeded|target|name|method|code|sp|le|\ + unexpected_status|failure|hostname|label_app_kubernetes_io_managed_by|status|\ + pipeline|pipelinename|pipelinerun|schedule|check|grpc_service|grpc_code|\ + grpc_method|lease|lease_holder|deployment|platform|mode|cpu|role|node|kind|\ + verb|request_kind|tested_cluster|resource_type|exported_job|http_method|\ + http_route|http_status_code|gin_errors|rule_result|rule_execution_cause|\ + policy_name|policy_background_mode|rule_type|policy_type|policy_validation_mode|\ + resource_request_operation|resource_kind|policy_change_type|event_type"