From 5aad6eac35debf096899e0f8a7fb9b915bf3aa4d Mon Sep 17 00:00:00 2001 From: machadovilaca Date: Tue, 25 Nov 2025 16:45:40 +0000 Subject: [PATCH 1/4] Add base alert management API Signed-off-by: machadovilaca --- Makefile | 4 +- cmd/plugin-backend.go | 5 +- go.mod | 86 +- go.sum | 197 ++-- internal/managementrouter/alerts_get.go | 51 ++ internal/managementrouter/alerts_get_test.go | 129 +++ internal/managementrouter/health_get.go | 16 + internal/managementrouter/health_get_test.go | 48 + .../managementrouter_suite_test.go | 13 + internal/managementrouter/router.go | 75 ++ .../user_defined_alert_rule_bulk_delete.go | 60 ++ ...ser_defined_alert_rule_bulk_delete_test.go | 245 +++++ .../user_defined_alert_rule_delete_by_id.go | 26 + ...er_defined_alert_rule_delete_by_id_test.go | 173 ++++ pkg/k8s/alert_relabel_config.go | 70 ++ pkg/k8s/alert_relabel_config_informer.go | 62 ++ pkg/k8s/client.go | 91 ++ pkg/k8s/new.go | 12 + pkg/k8s/prometheus_alerts.go | 257 ++++++ pkg/k8s/prometheus_rule.go | 127 +++ pkg/k8s/prometheus_rule_informer.go | 62 ++ pkg/k8s/types.go | 115 +++ .../create_user_defined_alert_rule.go | 46 + .../create_user_defined_alert_rule_test.go | 310 +++++++ .../delete_user_defined_alert_rule_by_id.go | 85 ++ ...lete_user_defined_alert_rule_by_id_test.go | 527 +++++++++++ pkg/management/errors.go | 20 + pkg/management/get_alerts.go | 53 ++ pkg/management/get_alerts_test.go | 122 +++ pkg/management/get_rule_by_id.go | 56 ++ pkg/management/get_rule_by_id_test.go | 186 ++++ pkg/management/list_rules.go | 133 +++ pkg/management/list_rules_test.go | 451 +++++++++ pkg/management/management.go | 19 + pkg/management/management_suite_test.go | 13 + pkg/management/mapper/mapper.go | 286 ++++++ pkg/management/mapper/mapper_suite_test.go | 13 + pkg/management/mapper/mapper_test.go | 855 ++++++++++++++++++ pkg/management/mapper/new.go | 16 + pkg/management/mapper/types.go | 48 + pkg/management/new.go | 24 + pkg/management/relabel_config.go | 46 + pkg/management/relabel_config_test.go | 171 ++++ pkg/management/testutils/k8s_client_mock.go | 337 +++++++ pkg/management/testutils/mapper_mock.go | 82 ++ pkg/management/types.go | 57 ++ pkg/management/update_platform_alert_rule.go | 171 ++++ .../update_platform_alert_rule_test.go | 400 ++++++++ .../update_user_defined_alert_rule.go | 61 ++ .../update_user_defined_alert_rule_test.go | 250 +++++ pkg/server.go | 50 +- 51 files changed, 6687 insertions(+), 125 deletions(-) create mode 100644 internal/managementrouter/alerts_get.go create mode 100644 internal/managementrouter/alerts_get_test.go create mode 100644 internal/managementrouter/health_get.go create mode 100644 internal/managementrouter/health_get_test.go create mode 100644 internal/managementrouter/managementrouter_suite_test.go create mode 100644 internal/managementrouter/router.go create mode 100644 internal/managementrouter/user_defined_alert_rule_bulk_delete.go create mode 100644 internal/managementrouter/user_defined_alert_rule_bulk_delete_test.go create mode 100644 internal/managementrouter/user_defined_alert_rule_delete_by_id.go create mode 100644 internal/managementrouter/user_defined_alert_rule_delete_by_id_test.go create mode 100644 pkg/k8s/alert_relabel_config.go create mode 100644 pkg/k8s/alert_relabel_config_informer.go create mode 100644 pkg/k8s/client.go create mode 100644 pkg/k8s/new.go create mode 100644 pkg/k8s/prometheus_alerts.go create mode 100644 pkg/k8s/prometheus_rule.go create mode 100644 pkg/k8s/prometheus_rule_informer.go create mode 100644 pkg/k8s/types.go create mode 100644 pkg/management/create_user_defined_alert_rule.go create mode 100644 pkg/management/create_user_defined_alert_rule_test.go create mode 100644 pkg/management/delete_user_defined_alert_rule_by_id.go create mode 100644 pkg/management/delete_user_defined_alert_rule_by_id_test.go create mode 100644 pkg/management/errors.go create mode 100644 pkg/management/get_alerts.go create mode 100644 pkg/management/get_alerts_test.go create mode 100644 pkg/management/get_rule_by_id.go create mode 100644 pkg/management/get_rule_by_id_test.go create mode 100644 pkg/management/list_rules.go create mode 100644 pkg/management/list_rules_test.go create mode 100644 pkg/management/management.go create mode 100644 pkg/management/management_suite_test.go create mode 100644 pkg/management/mapper/mapper.go create mode 100644 pkg/management/mapper/mapper_suite_test.go create mode 100644 pkg/management/mapper/mapper_test.go create mode 100644 pkg/management/mapper/new.go create mode 100644 pkg/management/mapper/types.go create mode 100644 pkg/management/new.go create mode 100644 pkg/management/relabel_config.go create mode 100644 pkg/management/relabel_config_test.go create mode 100644 pkg/management/testutils/k8s_client_mock.go create mode 100644 pkg/management/testutils/mapper_mock.go create mode 100644 pkg/management/types.go create mode 100644 pkg/management/update_platform_alert_rule.go create mode 100644 pkg/management/update_platform_alert_rule_test.go create mode 100644 pkg/management/update_user_defined_alert_rule.go create mode 100644 pkg/management/update_user_defined_alert_rule_test.go diff --git a/Makefile b/Makefile index ce54b2060..9c6706886 100644 --- a/Makefile +++ b/Makefile @@ -41,7 +41,7 @@ lint-frontend: lint-backend: go mod tidy go fmt ./cmd/ - go fmt ./pkg/ + go fmt ./pkg/... ./internal/... .PHONY: install-backend install-backend: @@ -57,7 +57,7 @@ start-backend: .PHONY: test-backend test-backend: - go test ./pkg/... -v + go test ./pkg/... ./internal/... -v .PHONY: build-image build-image: diff --git a/cmd/plugin-backend.go b/cmd/plugin-backend.go index 82e76f4b6..0d1a3b165 100644 --- a/cmd/plugin-backend.go +++ b/cmd/plugin-backend.go @@ -8,15 +8,16 @@ import ( "strconv" "strings" - server "github.com/openshift/monitoring-plugin/pkg" "github.com/sirupsen/logrus" + + server "github.com/openshift/monitoring-plugin/pkg" ) var ( portArg = flag.Int("port", 0, "server port to listen on (default: 9443)\nports 9444 and 9445 reserved for other use") certArg = flag.String("cert", "", "cert file path to enable TLS (disabled by default)") keyArg = flag.String("key", "", "private key file path to enable TLS (disabled by default)") - featuresArg = flag.String("features", "", "enabled features, comma separated.\noptions: ['acm-alerting', 'incidents', 'dev-config', 'perses-dashboards']") + featuresArg = flag.String("features", "", "enabled features, comma separated.\noptions: ['acm-alerting', 'incidents', 'dev-config', 'perses-dashboards', 'management-api']") staticPathArg = flag.String("static-path", "", "static files path to serve frontend (default: './web/dist')") configPathArg = flag.String("config-path", "", "config files path (default: './config')") pluginConfigArg = flag.String("plugin-config-path", "", "plugin yaml configuration") diff --git a/go.mod b/go.mod index c63c87f86..4107fae38 100644 --- a/go.mod +++ b/go.mod @@ -4,57 +4,79 @@ go 1.24.0 require ( github.com/evanphx/json-patch v4.12.0+incompatible + github.com/go-playground/form/v4 v4.3.0 github.com/gorilla/handlers v1.5.2 github.com/gorilla/mux v1.8.1 + github.com/onsi/ginkgo/v2 v2.22.0 + github.com/onsi/gomega v1.36.1 + github.com/openshift/api v0.0.0-20251122153900-88cca31a44c9 + github.com/openshift/client-go v0.0.0-20251123231646-4685125c2287 github.com/openshift/library-go v0.0.0-20240905123346-5bdbfe35a6f5 + github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.87.0 + github.com/prometheus-operator/prometheus-operator/pkg/client v0.87.0 github.com/sirupsen/logrus v1.9.3 - github.com/stretchr/testify v1.9.0 + github.com/stretchr/testify v1.11.1 gopkg.in/yaml.v2 v2.4.0 - k8s.io/api v0.31.1 - k8s.io/apiserver v0.30.3 - k8s.io/client-go v0.31.1 + k8s.io/api v0.34.2 + k8s.io/apimachinery v0.34.2 + k8s.io/apiserver v0.34.2 + k8s.io/client-go v0.34.2 ) require ( github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect - github.com/emicklei/go-restful/v3 v3.12.1 // indirect + github.com/emicklei/go-restful/v3 v3.13.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect - github.com/fsnotify/fsnotify v1.7.0 // indirect - github.com/fxamacker/cbor/v2 v2.7.0 // indirect - github.com/go-logr/logr v1.4.2 // indirect - github.com/go-openapi/jsonpointer v0.21.0 // indirect - github.com/go-openapi/jsonreference v0.21.0 // indirect - github.com/go-openapi/swag v0.23.0 // indirect + github.com/fsnotify/fsnotify v1.9.0 // indirect + github.com/fxamacker/cbor/v2 v2.9.0 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-openapi/jsonpointer v0.22.1 // indirect + github.com/go-openapi/jsonreference v0.21.2 // indirect + github.com/go-openapi/swag v0.25.1 // indirect + github.com/go-openapi/swag/cmdutils v0.25.1 // indirect + github.com/go-openapi/swag/conv v0.25.1 // indirect + github.com/go-openapi/swag/fileutils v0.25.1 // indirect + github.com/go-openapi/swag/jsonname v0.25.1 // indirect + github.com/go-openapi/swag/jsonutils v0.25.1 // indirect + github.com/go-openapi/swag/loading v0.25.1 // indirect + github.com/go-openapi/swag/mangling v0.25.1 // indirect + github.com/go-openapi/swag/netutils v0.25.1 // indirect + github.com/go-openapi/swag/stringutils v0.25.1 // indirect + github.com/go-openapi/swag/typeutils v0.25.1 // indirect + github.com/go-openapi/swag/yamlutils v0.25.1 // indirect + github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect - github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect - github.com/golang/protobuf v1.5.4 // indirect - github.com/google/gnostic-models v0.6.8 // indirect - github.com/google/go-cmp v0.6.0 // indirect - github.com/google/gofuzz v1.2.0 // indirect + github.com/google/gnostic-models v0.7.0 // indirect + github.com/google/go-cmp v0.7.0 // indirect + github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect github.com/google/uuid v1.6.0 // indirect - github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/mailru/easyjson v0.7.7 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect - github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/x448/float16 v0.8.4 // indirect - golang.org/x/net v0.34.0 // indirect - golang.org/x/oauth2 v0.25.0 // indirect - golang.org/x/sys v0.29.0 // indirect - golang.org/x/term v0.28.0 // indirect - golang.org/x/text v0.21.0 // indirect - golang.org/x/time v0.9.0 // indirect - google.golang.org/protobuf v1.34.2 // indirect + go.yaml.in/yaml/v2 v2.4.3 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/net v0.44.0 // indirect + golang.org/x/oauth2 v0.31.0 // indirect + golang.org/x/sys v0.36.0 // indirect + golang.org/x/term v0.35.0 // indirect + golang.org/x/text v0.29.0 // indirect + golang.org/x/time v0.13.0 // indirect + golang.org/x/tools v0.36.0 // indirect + google.golang.org/protobuf v1.36.10 // indirect + gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/apimachinery v0.31.1 // indirect + k8s.io/apiextensions-apiserver v0.34.2 // indirect k8s.io/klog/v2 v2.130.1 // indirect - k8s.io/kube-openapi v0.0.0-20240808142205-8e686545bdb8 // indirect - k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect - sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect - sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect - sigs.k8s.io/yaml v1.4.0 // indirect + k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect + k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 // indirect + sigs.k8s.io/controller-runtime v0.22.3 // indirect + sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect + sigs.k8s.io/randfill v1.0.0 // indirect + sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect + sigs.k8s.io/yaml v1.6.0 // indirect ) diff --git a/go.sum b/go.sum index 4bc90faf2..975b1a057 100644 --- a/go.sum +++ b/go.sum @@ -2,50 +2,69 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU= -github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes= +github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/evanphx/json-patch v4.12.0+incompatible h1:4onqiflcdA9EOZ4RxV643DvftH5pOlLGNtQ5lPWQu84= github.com/evanphx/json-patch v4.12.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= -github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= -github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= -github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= -github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= -github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= -github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= -github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= -github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= -github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= -github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= +github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-openapi/jsonpointer v0.22.1 h1:sHYI1He3b9NqJ4wXLoJDKmUmHkWy/L7rtEo92JUxBNk= +github.com/go-openapi/jsonpointer v0.22.1/go.mod h1:pQT9OsLkfz1yWoMgYFy4x3U5GY5nUlsOn1qSBH5MkCM= +github.com/go-openapi/jsonreference v0.21.2 h1:Wxjda4M/BBQllegefXrY/9aq1fxBA8sI5M/lFU6tSWU= +github.com/go-openapi/jsonreference v0.21.2/go.mod h1:pp3PEjIsJ9CZDGCNOyXIQxsNuroxm8FAJ/+quA0yKzQ= +github.com/go-openapi/swag v0.25.1 h1:6uwVsx+/OuvFVPqfQmOOPsqTcm5/GkBhNwLqIR916n8= +github.com/go-openapi/swag v0.25.1/go.mod h1:bzONdGlT0fkStgGPd3bhZf1MnuPkf2YAys6h+jZipOo= +github.com/go-openapi/swag/cmdutils v0.25.1 h1:nDke3nAFDArAa631aitksFGj2omusks88GF1VwdYqPY= +github.com/go-openapi/swag/cmdutils v0.25.1/go.mod h1:pdae/AFo6WxLl5L0rq87eRzVPm/XRHM3MoYgRMvG4A0= +github.com/go-openapi/swag/conv v0.25.1 h1:+9o8YUg6QuqqBM5X6rYL/p1dpWeZRhoIt9x7CCP+he0= +github.com/go-openapi/swag/conv v0.25.1/go.mod h1:Z1mFEGPfyIKPu0806khI3zF+/EUXde+fdeksUl2NiDs= +github.com/go-openapi/swag/fileutils v0.25.1 h1:rSRXapjQequt7kqalKXdcpIegIShhTPXx7yw0kek2uU= +github.com/go-openapi/swag/fileutils v0.25.1/go.mod h1:+NXtt5xNZZqmpIpjqcujqojGFek9/w55b3ecmOdtg8M= +github.com/go-openapi/swag/jsonname v0.25.1 h1:Sgx+qbwa4ej6AomWC6pEfXrA6uP2RkaNjA9BR8a1RJU= +github.com/go-openapi/swag/jsonname v0.25.1/go.mod h1:71Tekow6UOLBD3wS7XhdT98g5J5GR13NOTQ9/6Q11Zo= +github.com/go-openapi/swag/jsonutils v0.25.1 h1:AihLHaD0brrkJoMqEZOBNzTLnk81Kg9cWr+SPtxtgl8= +github.com/go-openapi/swag/jsonutils v0.25.1/go.mod h1:JpEkAjxQXpiaHmRO04N1zE4qbUEg3b7Udll7AMGTNOo= +github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.1 h1:DSQGcdB6G0N9c/KhtpYc71PzzGEIc/fZ1no35x4/XBY= +github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.1/go.mod h1:kjmweouyPwRUEYMSrbAidoLMGeJ5p6zdHi9BgZiqmsg= +github.com/go-openapi/swag/loading v0.25.1 h1:6OruqzjWoJyanZOim58iG2vj934TysYVptyaoXS24kw= +github.com/go-openapi/swag/loading v0.25.1/go.mod h1:xoIe2EG32NOYYbqxvXgPzne989bWvSNoWoyQVWEZicc= +github.com/go-openapi/swag/mangling v0.25.1 h1:XzILnLzhZPZNtmxKaz/2xIGPQsBsvmCjrJOWGNz/ync= +github.com/go-openapi/swag/mangling v0.25.1/go.mod h1:CdiMQ6pnfAgyQGSOIYnZkXvqhnnwOn997uXZMAd/7mQ= +github.com/go-openapi/swag/netutils v0.25.1 h1:2wFLYahe40tDUHfKT1GRC4rfa5T1B4GWZ+msEFA4Fl4= +github.com/go-openapi/swag/netutils v0.25.1/go.mod h1:CAkkvqnUJX8NV96tNhEQvKz8SQo2KF0f7LleiJwIeRE= +github.com/go-openapi/swag/stringutils v0.25.1 h1:Xasqgjvk30eUe8VKdmyzKtjkVjeiXx1Iz0zDfMNpPbw= +github.com/go-openapi/swag/stringutils v0.25.1/go.mod h1:JLdSAq5169HaiDUbTvArA2yQxmgn4D6h4A+4HqVvAYg= +github.com/go-openapi/swag/typeutils v0.25.1 h1:rD/9HsEQieewNt6/k+JBwkxuAHktFtH3I3ysiFZqukA= +github.com/go-openapi/swag/typeutils v0.25.1/go.mod h1:9McMC/oCdS4BKwk2shEB7x17P6HmMmA6dQRtAkSnNb8= +github.com/go-openapi/swag/yamlutils v0.25.1 h1:mry5ez8joJwzvMbaTGLhw8pXUnhDK91oSJLDPF1bmGk= +github.com/go-openapi/swag/yamlutils v0.25.1/go.mod h1:cm9ywbzncy3y6uPm/97ysW8+wZ09qsks+9RS8fLWKqg= +github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= +github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= +github.com/go-playground/form/v4 v4.3.0 h1:OVttojbQv2WNCs4P+VnjPtrt/+30Ipw4890W3OaFlvk= +github.com/go-playground/form/v4 v4.3.0/go.mod h1:Cpe1iYJKoXb1vILRXEwxpWMGWyQuqplQ/4cvPecy+Jo= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= -github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= -github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= -github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= -github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= +github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= -github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 h1:FKHo8hFI3A+7w0aUQuYXQ+6EN5stWmeY/AZqtM8xk9k= -github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= +github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE= github.com/gorilla/handlers v1.5.2/go.mod h1:dX+xVpaxdSw+q0Qek8SSsl3dfMk3jNddUkMzo0GtH0w= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= -github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= -github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= @@ -54,19 +73,22 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= -github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/onsi/ginkgo/v2 v2.20.0 h1:PE84V2mHqoT1sglvHc8ZdQtPcwmvvt29WLEEO3xmdZw= -github.com/onsi/ginkgo/v2 v2.20.0/go.mod h1:lG9ey2Z29hR41WMVthyJBGUBcBhGOtoPF2VFMvBXFCI= -github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k= -github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY= +github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg= +github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= +github.com/onsi/gomega v1.36.1 h1:bJDPBO7ibjxcbHMgSCoo4Yj18UWbKDlLwX1x9sybDcw= +github.com/onsi/gomega v1.36.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= +github.com/openshift/api v0.0.0-20251122153900-88cca31a44c9 h1:RKbCmhOI6XOKMjoXLjANJ1ic7wd4dVV7nSfrn3csEuQ= +github.com/openshift/api v0.0.0-20251122153900-88cca31a44c9/go.mod h1:d5uzF0YN2nQQFA0jIEWzzOZ+edmo6wzlGLvx5Fhz4uY= +github.com/openshift/client-go v0.0.0-20251123231646-4685125c2287 h1:Spullg4rMMWUjYiBMvYMhyeZ+j36mYOrkSO7ad43xrA= +github.com/openshift/client-go v0.0.0-20251123231646-4685125c2287/go.mod h1:liCuDDdOsPSZIDP0QuTveFhF7ldXuvnPhBd/OTsJdJc= github.com/openshift/library-go v0.0.0-20240905123346-5bdbfe35a6f5 h1:CyPTfZvr+HvwXbix9kieI55HeFn4a5DBaxJ3DNFinhg= github.com/openshift/library-go v0.0.0-20240905123346-5bdbfe35a6f5/go.mod h1:/wmao3qtqOQ484HDka9cWP7SIvOQOdzpmhyXkF2YdzE= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= @@ -74,38 +96,46 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= -github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.87.0 h1:QK37j5ZUtBwbyZkF4BBAs3bQQ1gYKG8e+g1BdNZBr/M= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.87.0/go.mod h1:WHiLZmOWVop/MoYvRD58LfnPeyE+dcITby/jQjg83Hw= +github.com/prometheus-operator/prometheus-operator/pkg/client v0.87.0 h1:rrZriucuC8ZUOPr8Asvavb9pbzqXSsAeY79aH8xnXlc= +github.com/prometheus-operator/prometheus-operator/pkg/client v0.87.0/go.mod h1:OMvC2XJGxPeEAKf5qB1u7DudV46HA8ePxYslRjxQcbk= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o= +github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= +go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= -golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0= -golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k= -golang.org/x/oauth2 v0.25.0 h1:CY4y7XT9v0cRI9oupztF8AgiIu99L/ksR/Xp/6jrZ70= -golang.org/x/oauth2 v0.25.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= +golang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I= +golang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY= +golang.org/x/oauth2 v0.31.0 h1:8Fq0yVZLh4j4YA47vHKFTa9Ew5XIrCP8LC6UeNZnLxo= +golang.org/x/oauth2 v0.31.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -113,58 +143,63 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= -golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.28.0 h1:/Ts8HFuMR2E6IP/jlo7QVLZHggjKQbhu/7H0LJFr3Gg= -golang.org/x/term v0.28.0/go.mod h1:Sw/lC2IAUZ92udQNf3WodGtn4k/XoLyZoh8v/8uiwek= +golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= +golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= +golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= -golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= -golang.org/x/time v0.9.0 h1:EsRrnYcQiGH+5FfbgvV4AP7qEZstoyrHB0DzarOQ4ZY= -golang.org/x/time v0.9.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= +golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= +golang.org/x/time v0.13.0 h1:eUlYslOIt32DgYD6utsuUeHs4d7AsEYLuIAdg7FlYgI= +golang.org/x/time v0.13.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24= -golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= +golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= +golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= -google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE= +google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= -gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo= +gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= -gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -k8s.io/api v0.31.1 h1:Xe1hX/fPW3PXYYv8BlozYqw63ytA92snr96zMW9gWTU= -k8s.io/api v0.31.1/go.mod h1:sbN1g6eY6XVLeqNsZGLnI5FwVseTrZX7Fv3O26rhAaI= -k8s.io/apimachinery v0.31.1 h1:mhcUBbj7KUjaVhyXILglcVjuS4nYXiwC+KKFBgIVy7U= -k8s.io/apimachinery v0.31.1/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= -k8s.io/apiserver v0.30.3 h1:QZJndA9k2MjFqpnyYv/PH+9PE0SHhx3hBho4X0vE65g= -k8s.io/apiserver v0.30.3/go.mod h1:6Oa88y1CZqnzetd2JdepO0UXzQX4ZnOekx2/PtEjrOg= -k8s.io/client-go v0.31.1 h1:f0ugtWSbWpxHR7sjVpQwuvw9a3ZKLXX0u0itkFXufb0= -k8s.io/client-go v0.31.1/go.mod h1:sKI8871MJN2OyeqRlmA4W4KM9KBdBUpDLu/43eGemCg= +k8s.io/api v0.34.2 h1:fsSUNZhV+bnL6Aqrp6O7lMTy6o5x2C4XLjnh//8SLYY= +k8s.io/api v0.34.2/go.mod h1:MMBPaWlED2a8w4RSeanD76f7opUoypY8TFYkSM+3XHw= +k8s.io/apiextensions-apiserver v0.34.2 h1:WStKftnGeoKP4AZRz/BaAAEJvYp4mlZGN0UCv+uvsqo= +k8s.io/apiextensions-apiserver v0.34.2/go.mod h1:398CJrsgXF1wytdaanynDpJ67zG4Xq7yj91GrmYN2SE= +k8s.io/apimachinery v0.34.2 h1:zQ12Uk3eMHPxrsbUJgNF8bTauTVR2WgqJsTmwTE/NW4= +k8s.io/apimachinery v0.34.2/go.mod h1:/GwIlEcWuTX9zKIg2mbw0LRFIsXwrfoVxn+ef0X13lw= +k8s.io/apiserver v0.34.2 h1:2/yu8suwkmES7IzwlehAovo8dDE07cFRC7KMDb1+MAE= +k8s.io/apiserver v0.34.2/go.mod h1:gqJQy2yDOB50R3JUReHSFr+cwJnL8G1dzTA0YLEqAPI= +k8s.io/client-go v0.34.2 h1:Co6XiknN+uUZqiddlfAjT68184/37PS4QAzYvQvDR8M= +k8s.io/client-go v0.34.2/go.mod h1:2VYDl1XXJsdcAxw7BenFslRQX28Dxz91U9MWKjX97fE= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= -k8s.io/kube-openapi v0.0.0-20240808142205-8e686545bdb8 h1:1Wof1cGQgA5pqgo8MxKPtf+qN6Sh/0JzznmeGPm1HnE= -k8s.io/kube-openapi v0.0.0-20240808142205-8e686545bdb8/go.mod h1:Os6V6dZwLNii3vxFpxcNaTmH8LJJBkOTg1N0tOA0fvA= -k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= -k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= -sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= -sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= -sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= -sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= -sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= -sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= +k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 h1:Y3gxNAuB0OBLImH611+UDZcmKS3g6CthxToOb37KgwE= +k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912/go.mod h1:kdmbQkyfwUagLfXIad1y2TdrjPFWp2Q89B3qkRwf/pQ= +k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 h1:SjGebBtkBqHFOli+05xYbK8YF1Dzkbzn+gDM4X9T4Ck= +k8s.io/utils v0.0.0-20251002143259-bc988d571ff4/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/controller-runtime v0.22.3 h1:I7mfqz/a/WdmDCEnXmSPm8/b/yRTy6JsKKENTijTq8Y= +sigs.k8s.io/controller-runtime v0.22.3/go.mod h1:+QX1XUpTXN4mLoblf4tqr5CQcyHPAki2HLXqQMY6vh8= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= +sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= +sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0 h1:jTijUJbW353oVOd9oTlifJqOGEkUw2jB/fXCbTiQEco= +sigs.k8s.io/structured-merge-diff/v6 v6.3.0/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= +sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= +sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/internal/managementrouter/alerts_get.go b/internal/managementrouter/alerts_get.go new file mode 100644 index 000000000..4d1857051 --- /dev/null +++ b/internal/managementrouter/alerts_get.go @@ -0,0 +1,51 @@ +package managementrouter + +import ( + "encoding/json" + "net/http" + + "github.com/go-playground/form/v4" + + "github.com/openshift/monitoring-plugin/pkg/k8s" +) + +type GetAlertsQueryParams struct { + Labels map[string]string `form:"labels"` + State string `form:"state"` +} + +type GetAlertsResponse struct { + Data GetAlertsResponseData `json:"data"` + Status string `json:"status"` +} + +type GetAlertsResponseData struct { + Alerts []k8s.PrometheusAlert `json:"alerts"` +} + +func (hr *httpRouter) GetAlerts(w http.ResponseWriter, req *http.Request) { + var params GetAlertsQueryParams + + if err := form.NewDecoder().Decode(¶ms, req.URL.Query()); err != nil { + writeError(w, http.StatusBadRequest, "Invalid query parameters: "+err.Error()) + return + } + + alerts, err := hr.managementClient.GetAlerts(req.Context(), k8s.GetAlertsRequest{ + Labels: params.Labels, + State: params.State, + }) + if err != nil { + handleError(w, err) + return + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(GetAlertsResponse{ + Data: GetAlertsResponseData{ + Alerts: alerts, + }, + Status: "success", + }) +} diff --git a/internal/managementrouter/alerts_get_test.go b/internal/managementrouter/alerts_get_test.go new file mode 100644 index 000000000..3c612c878 --- /dev/null +++ b/internal/managementrouter/alerts_get_test.go @@ -0,0 +1,129 @@ +package managementrouter_test + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/openshift/monitoring-plugin/internal/managementrouter" + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" +) + +var _ = Describe("GetAlerts", func() { + var ( + mockK8s *testutils.MockClient + mockPrometheusAlerts *testutils.MockPrometheusAlertsInterface + mockManagement management.Client + router http.Handler + ) + + BeforeEach(func() { + By("setting up mock clients") + mockPrometheusAlerts = &testutils.MockPrometheusAlertsInterface{} + mockK8s = &testutils.MockClient{ + PrometheusAlertsFunc: func() k8s.PrometheusAlertsInterface { + return mockPrometheusAlerts + }, + } + + mockManagement = management.NewWithCustomMapper(context.Background(), mockK8s, &testutils.MockMapperClient{}) + router = managementrouter.New(mockManagement) + }) + + Context("when getting all alerts without filters", func() { + It("should return all active alerts", func() { + By("setting up test alerts") + testAlerts := []k8s.PrometheusAlert{ + { + Labels: map[string]string{ + "alertname": "HighCPUUsage", + "severity": "warning", + "namespace": "default", + }, + Annotations: map[string]string{ + "description": "CPU usage is high", + }, + State: "firing", + ActiveAt: time.Now(), + }, + { + Labels: map[string]string{ + "alertname": "LowMemory", + "severity": "critical", + "namespace": "monitoring", + }, + Annotations: map[string]string{ + "description": "Memory is running low", + }, + State: "firing", + ActiveAt: time.Now(), + }, + } + mockPrometheusAlerts.SetActiveAlerts(testAlerts) + + By("making the request") + req := httptest.NewRequest(http.MethodGet, "/api/v1/alerting/alerts", nil) + w := httptest.NewRecorder() + + router.ServeHTTP(w, req) + + By("verifying the response") + Expect(w.Code).To(Equal(http.StatusOK)) + Expect(w.Header().Get("Content-Type")).To(Equal("application/json")) + + var response managementrouter.GetAlertsResponse + err := json.NewDecoder(w.Body).Decode(&response) + Expect(err).NotTo(HaveOccurred()) + Expect(response.Data.Alerts).To(HaveLen(2)) + Expect(response.Data.Alerts[0].Labels["alertname"]).To(Equal("HighCPUUsage")) + Expect(response.Data.Alerts[1].Labels["alertname"]).To(Equal("LowMemory")) + }) + + It("should return empty array when no alerts exist", func() { + By("setting up empty alerts") + mockPrometheusAlerts.SetActiveAlerts([]k8s.PrometheusAlert{}) + + By("making the request") + req := httptest.NewRequest(http.MethodGet, "/api/v1/alerting/alerts", nil) + w := httptest.NewRecorder() + + router.ServeHTTP(w, req) + + By("verifying the response") + Expect(w.Code).To(Equal(http.StatusOK)) + + var response managementrouter.GetAlertsResponse + err := json.NewDecoder(w.Body).Decode(&response) + Expect(err).NotTo(HaveOccurred()) + Expect(response.Data.Alerts).To(BeEmpty()) + }) + }) + + Context("when handling errors", func() { + It("should return 500 when GetAlerts fails", func() { + By("configuring mock to return error") + mockPrometheusAlerts.GetAlertsFunc = func(ctx context.Context, req k8s.GetAlertsRequest) ([]k8s.PrometheusAlert, error) { + return nil, fmt.Errorf("connection error") + } + + By("making the request") + req := httptest.NewRequest(http.MethodGet, "/api/v1/alerting/alerts", nil) + w := httptest.NewRecorder() + + router.ServeHTTP(w, req) + + By("verifying error response") + Expect(w.Code).To(Equal(http.StatusInternalServerError)) + Expect(w.Body.String()).To(ContainSubstring("An unexpected error occurred")) + }) + }) + +}) diff --git a/internal/managementrouter/health_get.go b/internal/managementrouter/health_get.go new file mode 100644 index 000000000..b010375e5 --- /dev/null +++ b/internal/managementrouter/health_get.go @@ -0,0 +1,16 @@ +package managementrouter + +import ( + "encoding/json" + "net/http" +) + +type GetHealthResponse struct { + Status string `json:"status"` +} + +func (hr *httpRouter) GetHealth(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(GetHealthResponse{Status: "ok"}) +} diff --git a/internal/managementrouter/health_get_test.go b/internal/managementrouter/health_get_test.go new file mode 100644 index 000000000..80aa1c9b7 --- /dev/null +++ b/internal/managementrouter/health_get_test.go @@ -0,0 +1,48 @@ +package managementrouter_test + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/openshift/monitoring-plugin/internal/managementrouter" +) + +var _ = Describe("GetHealth", func() { + var router http.Handler + + BeforeEach(func() { + By("setting up the HTTP router") + router = managementrouter.New(nil) + }) + + Context("when calling the health endpoint", func() { + It("should return 200 OK status code", func() { + By("making the request") + req := httptest.NewRequest(http.MethodGet, "/api/v1/alerting/health", nil) + w := httptest.NewRecorder() + + router.ServeHTTP(w, req) + + By("verifying the status code") + Expect(w.Code).To(Equal(http.StatusOK)) + }) + + It("should return correct JSON structure with status ok", func() { + By("making the request") + req := httptest.NewRequest(http.MethodGet, "/api/v1/alerting/health", nil) + w := httptest.NewRecorder() + + router.ServeHTTP(w, req) + + By("verifying the response body") + var response managementrouter.GetHealthResponse + err := json.NewDecoder(w.Body).Decode(&response) + Expect(err).NotTo(HaveOccurred()) + Expect(response.Status).To(Equal("ok")) + }) + }) +}) diff --git a/internal/managementrouter/managementrouter_suite_test.go b/internal/managementrouter/managementrouter_suite_test.go new file mode 100644 index 000000000..3da1553b3 --- /dev/null +++ b/internal/managementrouter/managementrouter_suite_test.go @@ -0,0 +1,13 @@ +package managementrouter_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestHTTPRouter(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "HTTPRouter Suite") +} diff --git a/internal/managementrouter/router.go b/internal/managementrouter/router.go new file mode 100644 index 000000000..794fa5d1f --- /dev/null +++ b/internal/managementrouter/router.go @@ -0,0 +1,75 @@ +package managementrouter + +import ( + "errors" + "fmt" + "log" + "net/http" + "net/url" + "strings" + + "github.com/gorilla/mux" + + "github.com/openshift/monitoring-plugin/pkg/management" +) + +type httpRouter struct { + managementClient management.Client +} + +func New(managementClient management.Client) *mux.Router { + httpRouter := &httpRouter{ + managementClient: managementClient, + } + + r := mux.NewRouter() + + r.HandleFunc("/api/v1/alerting/health", httpRouter.GetHealth).Methods(http.MethodGet) + r.HandleFunc("/api/v1/alerting/alerts", httpRouter.GetAlerts).Methods(http.MethodGet) + r.HandleFunc("/api/v1/alerting/rules", httpRouter.BulkDeleteUserDefinedAlertRules).Methods(http.MethodDelete) + r.HandleFunc("/api/v1/alerting/rules/{ruleId}", httpRouter.DeleteUserDefinedAlertRuleById).Methods(http.MethodDelete) + + return r +} + +func writeError(w http.ResponseWriter, statusCode int, message string) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(statusCode) + _, _ = w.Write([]byte(`{"error":"` + message + `"}`)) +} + +func handleError(w http.ResponseWriter, err error) { + status, message := parseError(err) + writeError(w, status, message) +} + +func parseError(err error) (int, string) { + var nf *management.NotFoundError + if errors.As(err, &nf) { + return http.StatusNotFound, err.Error() + } + var na *management.NotAllowedError + if errors.As(err, &na) { + return http.StatusMethodNotAllowed, err.Error() + } + log.Printf("An unexpected error occurred: %v", err) + return http.StatusInternalServerError, "An unexpected error occurred" +} + +func parseParam(raw string, name string) (string, error) { + decoded, err := url.PathUnescape(raw) + if err != nil { + return "", fmt.Errorf("invalid %s encoding", name) + } + value := strings.TrimSpace(decoded) + if value == "" { + return "", fmt.Errorf("missing %s", name) + } + return value, nil +} + +func getParam(r *http.Request, name string) (string, error) { + vars := mux.Vars(r) + raw := vars[name] + return parseParam(raw, name) +} diff --git a/internal/managementrouter/user_defined_alert_rule_bulk_delete.go b/internal/managementrouter/user_defined_alert_rule_bulk_delete.go new file mode 100644 index 000000000..eea8ee19c --- /dev/null +++ b/internal/managementrouter/user_defined_alert_rule_bulk_delete.go @@ -0,0 +1,60 @@ +package managementrouter + +import ( + "encoding/json" + "net/http" +) + +type BulkDeleteUserDefinedAlertRulesRequest struct { + RuleIds []string `json:"ruleIds"` +} + +type BulkDeleteUserDefinedAlertRulesResponse struct { + Rules []DeleteUserDefinedAlertRulesResponse `json:"rules"` +} + +func (hr *httpRouter) BulkDeleteUserDefinedAlertRules(w http.ResponseWriter, req *http.Request) { + var payload BulkDeleteUserDefinedAlertRulesRequest + if err := json.NewDecoder(req.Body).Decode(&payload); err != nil { + writeError(w, http.StatusBadRequest, "invalid request body") + return + } + if len(payload.RuleIds) == 0 { + writeError(w, http.StatusBadRequest, "ruleIds is required") + return + } + + results := make([]DeleteUserDefinedAlertRulesResponse, 0, len(payload.RuleIds)) + + for _, rawId := range payload.RuleIds { + id, err := parseParam(rawId, "ruleId") + if err != nil { + results = append(results, DeleteUserDefinedAlertRulesResponse{ + Id: rawId, + StatusCode: http.StatusBadRequest, + Message: err.Error(), + }) + continue + } + + if err := hr.managementClient.DeleteUserDefinedAlertRuleById(req.Context(), id); err != nil { + status, message := parseError(err) + results = append(results, DeleteUserDefinedAlertRulesResponse{ + Id: id, + StatusCode: status, + Message: message, + }) + continue + } + results = append(results, DeleteUserDefinedAlertRulesResponse{ + Id: id, + StatusCode: http.StatusNoContent, + }) + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + _ = json.NewEncoder(w).Encode(BulkDeleteUserDefinedAlertRulesResponse{ + Rules: results, + }) +} diff --git a/internal/managementrouter/user_defined_alert_rule_bulk_delete_test.go b/internal/managementrouter/user_defined_alert_rule_bulk_delete_test.go new file mode 100644 index 000000000..15b6f7ac7 --- /dev/null +++ b/internal/managementrouter/user_defined_alert_rule_bulk_delete_test.go @@ -0,0 +1,245 @@ +package managementrouter_test + +import ( + "bytes" + "context" + "encoding/json" + "net/http" + "net/http/httptest" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + + "github.com/openshift/monitoring-plugin/internal/managementrouter" + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/mapper" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" +) + +var _ = Describe("BulkDeleteUserDefinedAlertRules", func() { + var ( + router http.Handler + mockK8sRules *testutils.MockPrometheusRuleInterface + mockK8s *testutils.MockClient + mockMapper *testutils.MockMapperClient + ) + + BeforeEach(func() { + mockK8sRules = &testutils.MockPrometheusRuleInterface{} + + userPR := monitoringv1.PrometheusRule{} + userPR.Name = "user-pr" + userPR.Namespace = "default" + userPR.Spec.Groups = []monitoringv1.RuleGroup{ + { + Name: "g1", + Rules: []monitoringv1.Rule{{Alert: "u1"}, {Alert: "u2"}}, + }, + } + + platformPR := monitoringv1.PrometheusRule{} + platformPR.Name = "platform-pr" + platformPR.Namespace = "openshift-monitoring" + platformPR.Spec.Groups = []monitoringv1.RuleGroup{ + { + Name: "pg1", + Rules: []monitoringv1.Rule{{Alert: "platform1"}}, + }, + } + + mockK8sRules.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "default/user-pr": &userPR, + "openshift-monitoring/platform-pr": &platformPR, + }) + + mockK8s = &testutils.MockClient{ + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { + return mockK8sRules + }, + } + + mockMapper = &testutils.MockMapperClient{ + GetAlertingRuleIdFunc: func(rule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + return mapper.PrometheusAlertRuleId(rule.Alert) + }, + FindAlertRuleByIdFunc: func(alertRuleId mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + id := string(alertRuleId) + pr := mapper.PrometheusRuleId{ + Namespace: "default", + Name: "user-pr", + } + if id == "platform1" { + pr.Namespace = "openshift-monitoring" + pr.Name = "platform-pr" + } + return &pr, nil + }, + } + + mgmt := management.NewWithCustomMapper(context.Background(), mockK8s, mockMapper) + router = managementrouter.New(mgmt) + }) + + Context("when deleting multiple rules", func() { + It("returns deleted and failed for mixed ruleIds and updates rules", func() { + body := map[string]interface{}{"ruleIds": []string{"u1", "platform1", ""}} + buf, _ := json.Marshal(body) + req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules", bytes.NewReader(buf)) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + Expect(w.Code).To(Equal(http.StatusOK)) + var resp struct { + Rules []struct { + Id string `json:"id"` + StatusCode int `json:"status_code"` + Message string `json:"message"` + } `json:"rules"` + } + Expect(json.NewDecoder(w.Body).Decode(&resp)).To(Succeed()) + Expect(resp.Rules).To(HaveLen(3)) + // u1 -> success + Expect(resp.Rules[0].Id).To(Equal("u1")) + Expect(resp.Rules[0].StatusCode).To(Equal(http.StatusNoContent)) + Expect(resp.Rules[0].Message).To(BeEmpty()) + // platform1 -> not allowed + Expect(resp.Rules[1].Id).To(Equal("platform1")) + Expect(resp.Rules[1].StatusCode).To(Equal(http.StatusMethodNotAllowed)) + Expect(resp.Rules[1].Message).To(ContainSubstring("cannot delete alert rule from a platform-managed PrometheusRule")) + // "" -> bad request (missing id) + Expect(resp.Rules[2].Id).To(Equal("")) + Expect(resp.Rules[2].StatusCode).To(Equal(http.StatusBadRequest)) + Expect(resp.Rules[2].Message).To(ContainSubstring("missing ruleId")) + + prUser, _, err := mockK8sRules.Get(context.Background(), "default", "user-pr") + Expect(err).NotTo(HaveOccurred()) + userRuleNames := []string{} + for _, g := range prUser.Spec.Groups { + for _, r := range g.Rules { + userRuleNames = append(userRuleNames, r.Alert) + } + } + Expect(userRuleNames).NotTo(ContainElement("u1")) + Expect(userRuleNames).To(ContainElement("u2")) + + prPlatform, _, err := mockK8sRules.Get(context.Background(), "openshift-monitoring", "platform-pr") + Expect(err).NotTo(HaveOccurred()) + foundPlatform := false + for _, g := range prPlatform.Spec.Groups { + for _, r := range g.Rules { + if r.Alert == "platform1" { + foundPlatform = true + } + } + } + Expect(foundPlatform).To(BeTrue()) + }) + + It("succeeds for user rule and fails for platform rule (mixed case)", func() { + body := map[string]interface{}{"ruleIds": []string{"u1", "platform1"}} + buf, _ := json.Marshal(body) + req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules", bytes.NewReader(buf)) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + Expect(w.Code).To(Equal(http.StatusOK)) + var resp struct { + Rules []struct { + Id string `json:"id"` + StatusCode int `json:"status_code"` + Message string `json:"message"` + } `json:"rules"` + } + Expect(json.NewDecoder(w.Body).Decode(&resp)).To(Succeed()) + Expect(resp.Rules).To(HaveLen(2)) + Expect(resp.Rules[0].Id).To(Equal("u1")) + Expect(resp.Rules[0].StatusCode).To(Equal(http.StatusNoContent)) + Expect(resp.Rules[1].Id).To(Equal("platform1")) + Expect(resp.Rules[1].StatusCode).To(Equal(http.StatusMethodNotAllowed)) + Expect(resp.Rules[1].Message).To(ContainSubstring("cannot delete alert rule from a platform-managed PrometheusRule")) + + // Ensure only user rule was removed + prUser, _, err := mockK8sRules.Get(context.Background(), "default", "user-pr") + Expect(err).NotTo(HaveOccurred()) + userRuleNames := []string{} + for _, g := range prUser.Spec.Groups { + for _, r := range g.Rules { + userRuleNames = append(userRuleNames, r.Alert) + } + } + Expect(userRuleNames).NotTo(ContainElement("u1")) + Expect(userRuleNames).To(ContainElement("u2")) + + // Platform rule remains intact + prPlatform, _, err := mockK8sRules.Get(context.Background(), "openshift-monitoring", "platform-pr") + Expect(err).NotTo(HaveOccurred()) + foundPlatform := false + for _, g := range prPlatform.Spec.Groups { + for _, r := range g.Rules { + if r.Alert == "platform1" { + foundPlatform = true + } + } + } + Expect(foundPlatform).To(BeTrue()) + }) + + It("returns all deleted when all user ruleIds succeed", func() { + body := map[string]interface{}{"ruleIds": []string{"u1", "u2"}} + buf, _ := json.Marshal(body) + req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules", bytes.NewReader(buf)) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + Expect(w.Code).To(Equal(http.StatusOK)) + var resp struct { + Rules []struct { + Id string `json:"id"` + StatusCode int `json:"status_code"` + Message string `json:"message"` + } `json:"rules"` + } + Expect(json.NewDecoder(w.Body).Decode(&resp)).To(Succeed()) + Expect(resp.Rules).To(HaveLen(2)) + Expect(resp.Rules[0].Id).To(Equal("u1")) + Expect(resp.Rules[0].StatusCode).To(Equal(http.StatusNoContent)) + Expect(resp.Rules[1].Id).To(Equal("u2")) + Expect(resp.Rules[1].StatusCode).To(Equal(http.StatusNoContent)) + + // User PrometheusRule should be deleted after removing the last rule + _, found, err := mockK8sRules.Get(context.Background(), "default", "user-pr") + Expect(err).NotTo(HaveOccurred()) + Expect(found).To(BeFalse()) + + // Platform PrometheusRule remains present + _, found, err = mockK8sRules.Get(context.Background(), "openshift-monitoring", "platform-pr") + Expect(err).NotTo(HaveOccurred()) + Expect(found).To(BeTrue()) + }) + }) + + Context("when request body is invalid", func() { + It("returns 400", func() { + req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules", bytes.NewBufferString("{")) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + Expect(w.Code).To(Equal(http.StatusBadRequest)) + Expect(w.Body.String()).To(ContainSubstring("invalid request body")) + }) + }) + + Context("when ruleIds is empty", func() { + It("returns 400", func() { + body := map[string]interface{}{"ruleIds": []string{}} + buf, _ := json.Marshal(body) + req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules", bytes.NewReader(buf)) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + Expect(w.Code).To(Equal(http.StatusBadRequest)) + Expect(w.Body.String()).To(ContainSubstring("ruleIds is required")) + }) + }) +}) diff --git a/internal/managementrouter/user_defined_alert_rule_delete_by_id.go b/internal/managementrouter/user_defined_alert_rule_delete_by_id.go new file mode 100644 index 000000000..778f7f474 --- /dev/null +++ b/internal/managementrouter/user_defined_alert_rule_delete_by_id.go @@ -0,0 +1,26 @@ +package managementrouter + +import ( + "net/http" +) + +type DeleteUserDefinedAlertRulesResponse struct { + Id string `json:"id"` + StatusCode int `json:"status_code"` + Message string `json:"message,omitempty"` +} + +func (hr *httpRouter) DeleteUserDefinedAlertRuleById(w http.ResponseWriter, req *http.Request) { + ruleId, err := getParam(req, "ruleId") + if err != nil { + writeError(w, http.StatusBadRequest, err.Error()) + return + } + + if err := hr.managementClient.DeleteUserDefinedAlertRuleById(req.Context(), ruleId); err != nil { + handleError(w, err) + return + } + + w.WriteHeader(http.StatusNoContent) +} diff --git a/internal/managementrouter/user_defined_alert_rule_delete_by_id_test.go b/internal/managementrouter/user_defined_alert_rule_delete_by_id_test.go new file mode 100644 index 000000000..9b93bebfa --- /dev/null +++ b/internal/managementrouter/user_defined_alert_rule_delete_by_id_test.go @@ -0,0 +1,173 @@ +package managementrouter_test + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + + "github.com/openshift/monitoring-plugin/internal/managementrouter" + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/mapper" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" +) + +var _ = Describe("DeleteUserDefinedAlertRuleById", func() { + var ( + router http.Handler + mockK8sRules *testutils.MockPrometheusRuleInterface + mockK8s *testutils.MockClient + mockMapper *testutils.MockMapperClient + ) + + BeforeEach(func() { + mockK8sRules = &testutils.MockPrometheusRuleInterface{} + + userPR := monitoringv1.PrometheusRule{} + userPR.Name = "user-pr" + userPR.Namespace = "default" + userPR.Spec.Groups = []monitoringv1.RuleGroup{ + { + Name: "g1", + Rules: []monitoringv1.Rule{{Alert: "u1"}, {Alert: "u2"}}, + }, + } + + platformPR := monitoringv1.PrometheusRule{} + platformPR.Name = "platform-pr" + platformPR.Namespace = "openshift-monitoring" + platformPR.Spec.Groups = []monitoringv1.RuleGroup{ + { + Name: "pg1", + Rules: []monitoringv1.Rule{{Alert: "p1"}}, + }, + } + + mockK8sRules.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "default/user-pr": &userPR, + "openshift-monitoring/platform-pr": &platformPR, + }) + + mockK8s = &testutils.MockClient{ + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { + return mockK8sRules + }, + } + }) + + Context("when ruleId is missing or blank", func() { + It("returns 400 with missing ruleId message", func() { + mgmt := management.NewWithCustomMapper(context.Background(), mockK8s, mockMapper) + router = managementrouter.New(mgmt) + + req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules/%20", nil) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + Expect(w.Code).To(Equal(http.StatusBadRequest)) + Expect(w.Body.String()).To(ContainSubstring("missing ruleId")) + }) + }) + + Context("when deletion succeeds", func() { + It("deletes a user-defined rule and keeps the other intact", func() { + mockMapper = &testutils.MockMapperClient{ + GetAlertingRuleIdFunc: func(rule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + return mapper.PrometheusAlertRuleId(rule.Alert) + }, + FindAlertRuleByIdFunc: func(alertRuleId mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + pr := mapper.PrometheusRuleId{ + Namespace: "default", + Name: "user-pr", + } + return &pr, nil + }, + } + + mgmt := management.NewWithCustomMapper(context.Background(), mockK8s, mockMapper) + router = managementrouter.New(mgmt) + + req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules/u1", nil) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + Expect(w.Code).To(Equal(http.StatusNoContent)) + + pr, found, err := mockK8sRules.Get(context.Background(), "default", "user-pr") + Expect(found).To(BeTrue()) + Expect(err).NotTo(HaveOccurred()) + ruleNames := []string{} + for _, g := range pr.Spec.Groups { + for _, r := range g.Rules { + ruleNames = append(ruleNames, r.Alert) + } + } + Expect(ruleNames).NotTo(ContainElement("u1")) + Expect(ruleNames).To(ContainElement("u2")) + }) + }) + + Context("when rule is not found", func() { + It("returns 404 with expected message", func() { + mockMapper = &testutils.MockMapperClient{ + FindAlertRuleByIdFunc: func(alertRuleId mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return nil, fmt.Errorf("alert rule not found") + }, + } + mgmt := management.NewWithCustomMapper(context.Background(), mockK8s, mockMapper) + router = managementrouter.New(mgmt) + + req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules/missing", nil) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + Expect(w.Code).To(Equal(http.StatusNotFound)) + Expect(w.Body.String()).To(ContainSubstring("AlertRule with id missing not found")) + }) + }) + + Context("when platform rule", func() { + It("rejects platform rule deletion and PR remains unchanged", func() { + mockMapper = &testutils.MockMapperClient{ + GetAlertingRuleIdFunc: func(rule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + return mapper.PrometheusAlertRuleId(rule.Alert) + }, + FindAlertRuleByIdFunc: func(alertRuleId mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + pr := mapper.PrometheusRuleId{ + Namespace: "openshift-monitoring", + Name: "platform-pr", + } + return &pr, nil + }, + } + + mgmt := management.NewWithCustomMapper(context.Background(), mockK8s, mockMapper) + router = managementrouter.New(mgmt) + + req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules/p1", nil) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) + + Expect(w.Code).To(Equal(http.StatusMethodNotAllowed)) + Expect(w.Body.String()).To(ContainSubstring("cannot delete alert rule from a platform-managed PrometheusRule")) + + pr, found, err := mockK8sRules.Get(context.Background(), "openshift-monitoring", "platform-pr") + Expect(found).To(BeTrue()) + Expect(err).NotTo(HaveOccurred()) + for _, g := range pr.Spec.Groups { + for _, r := range g.Rules { + if r.Alert == "p1" { + found = true + } + } + } + Expect(found).To(BeTrue()) + }) + }) +}) diff --git a/pkg/k8s/alert_relabel_config.go b/pkg/k8s/alert_relabel_config.go new file mode 100644 index 000000000..8ce3501eb --- /dev/null +++ b/pkg/k8s/alert_relabel_config.go @@ -0,0 +1,70 @@ +package k8s + +import ( + "context" + "fmt" + + osmv1 "github.com/openshift/api/monitoring/v1" + osmv1client "github.com/openshift/client-go/monitoring/clientset/versioned" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type alertRelabelConfigManager struct { + clientset *osmv1client.Clientset +} + +func newAlertRelabelConfigManager(clientset *osmv1client.Clientset) AlertRelabelConfigInterface { + return &alertRelabelConfigManager{ + clientset: clientset, + } +} + +func (arcm *alertRelabelConfigManager) List(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) { + arcs, err := arcm.clientset.MonitoringV1().AlertRelabelConfigs(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, err + } + + return arcs.Items, nil +} + +func (arcm *alertRelabelConfigManager) Get(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) { + arc, err := arcm.clientset.MonitoringV1().AlertRelabelConfigs(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + if errors.IsNotFound(err) { + return nil, false, nil + } + + return nil, false, fmt.Errorf("failed to get AlertRelabelConfig %s/%s: %w", namespace, name, err) + } + + return arc, true, nil +} + +func (arcm *alertRelabelConfigManager) Create(ctx context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) { + created, err := arcm.clientset.MonitoringV1().AlertRelabelConfigs(arc.Namespace).Create(ctx, &arc, metav1.CreateOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to create AlertRelabelConfig %s/%s: %w", arc.Namespace, arc.Name, err) + } + + return created, nil +} + +func (arcm *alertRelabelConfigManager) Update(ctx context.Context, arc osmv1.AlertRelabelConfig) error { + _, err := arcm.clientset.MonitoringV1().AlertRelabelConfigs(arc.Namespace).Update(ctx, &arc, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update AlertRelabelConfig %s/%s: %w", arc.Namespace, arc.Name, err) + } + + return nil +} + +func (arcm *alertRelabelConfigManager) Delete(ctx context.Context, namespace string, name string) error { + err := arcm.clientset.MonitoringV1().AlertRelabelConfigs(namespace).Delete(ctx, name, metav1.DeleteOptions{}) + if err != nil { + return fmt.Errorf("failed to delete AlertRelabelConfig %s: %w", name, err) + } + + return nil +} diff --git a/pkg/k8s/alert_relabel_config_informer.go b/pkg/k8s/alert_relabel_config_informer.go new file mode 100644 index 000000000..eccbd36d4 --- /dev/null +++ b/pkg/k8s/alert_relabel_config_informer.go @@ -0,0 +1,62 @@ +package k8s + +import ( + "context" + "log" + + osmv1 "github.com/openshift/api/monitoring/v1" + osmv1client "github.com/openshift/client-go/monitoring/clientset/versioned" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/watch" +) + +type alertRelabelConfigInformer struct { + clientset *osmv1client.Clientset +} + +func newAlertRelabelConfigInformer(clientset *osmv1client.Clientset) AlertRelabelConfigInformerInterface { + return &alertRelabelConfigInformer{ + clientset: clientset, + } +} + +func (arci *alertRelabelConfigInformer) Run(ctx context.Context, callbacks AlertRelabelConfigInformerCallback) error { + options := metav1.ListOptions{ + Watch: true, + } + + watcher, err := arci.clientset.MonitoringV1().AlertRelabelConfigs("").Watch(ctx, options) + if err != nil { + return err + } + defer watcher.Stop() + + ch := watcher.ResultChan() + for event := range ch { + arc, ok := event.Object.(*osmv1.AlertRelabelConfig) + if !ok { + log.Printf("Unexpected type: %v", event.Object) + continue + } + + switch event.Type { + case watch.Added: + if callbacks.OnAdd != nil { + callbacks.OnAdd(arc) + } + case watch.Modified: + if callbacks.OnUpdate != nil { + callbacks.OnUpdate(arc) + } + case watch.Deleted: + if callbacks.OnDelete != nil { + callbacks.OnDelete(arc) + } + case watch.Error: + log.Printf("Error occurred while watching AlertRelabelConfig: %s\n", event.Object) + } + } + + log.Fatalf("AlertRelabelConfig watcher channel closed unexpectedly") + return nil +} diff --git a/pkg/k8s/client.go b/pkg/k8s/client.go new file mode 100644 index 000000000..e016eb5f6 --- /dev/null +++ b/pkg/k8s/client.go @@ -0,0 +1,91 @@ +package k8s + +import ( + "context" + "fmt" + + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + + osmv1client "github.com/openshift/client-go/monitoring/clientset/versioned" + monitoringv1client "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned" +) + +var _ Client = (*client)(nil) + +type client struct { + clientset *kubernetes.Clientset + monitoringv1clientset *monitoringv1client.Clientset + osmv1clientset *osmv1client.Clientset + config *rest.Config + + prometheusAlerts PrometheusAlertsInterface + + prometheusRuleManager PrometheusRuleInterface + prometheusRuleInformer PrometheusRuleInformerInterface + + alertRelabelConfigManager AlertRelabelConfigInterface + alertRelabelConfigInformer AlertRelabelConfigInformerInterface +} + +func newClient(_ context.Context, config *rest.Config) (Client, error) { + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create clientset: %w", err) + } + + monitoringv1clientset, err := monitoringv1client.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create monitoringv1 clientset: %w", err) + } + + osmv1clientset, err := osmv1client.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create osmv1 clientset: %w", err) + } + + c := &client{ + clientset: clientset, + monitoringv1clientset: monitoringv1clientset, + osmv1clientset: osmv1clientset, + config: config, + } + + c.prometheusAlerts = newPrometheusAlerts(clientset, config) + + c.prometheusRuleManager = newPrometheusRuleManager(monitoringv1clientset) + c.prometheusRuleInformer = newPrometheusRuleInformer(monitoringv1clientset) + + c.alertRelabelConfigManager = newAlertRelabelConfigManager(osmv1clientset) + c.alertRelabelConfigInformer = newAlertRelabelConfigInformer(osmv1clientset) + + return c, nil +} + +func (c *client) TestConnection(_ context.Context) error { + _, err := c.clientset.Discovery().ServerVersion() + if err != nil { + return fmt.Errorf("failed to connect to cluster: %w", err) + } + return nil +} + +func (c *client) PrometheusAlerts() PrometheusAlertsInterface { + return c.prometheusAlerts +} + +func (c *client) PrometheusRules() PrometheusRuleInterface { + return c.prometheusRuleManager +} + +func (c *client) PrometheusRuleInformer() PrometheusRuleInformerInterface { + return c.prometheusRuleInformer +} + +func (c *client) AlertRelabelConfigs() AlertRelabelConfigInterface { + return c.alertRelabelConfigManager +} + +func (c *client) AlertRelabelConfigInformer() AlertRelabelConfigInformerInterface { + return c.alertRelabelConfigInformer +} diff --git a/pkg/k8s/new.go b/pkg/k8s/new.go new file mode 100644 index 000000000..5542d455f --- /dev/null +++ b/pkg/k8s/new.go @@ -0,0 +1,12 @@ +package k8s + +import ( + "context" + + "k8s.io/client-go/rest" +) + +// NewClient creates a new Kubernetes client with the given options +func NewClient(ctx context.Context, config *rest.Config) (Client, error) { + return newClient(ctx, config) +} diff --git a/pkg/k8s/prometheus_alerts.go b/pkg/k8s/prometheus_alerts.go new file mode 100644 index 000000000..e659c8a9f --- /dev/null +++ b/pkg/k8s/prometheus_alerts.go @@ -0,0 +1,257 @@ +package k8s + +import ( + "context" + "crypto/tls" + "crypto/x509" + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "time" + + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" +) + +const ( + prometheusRouteNamespace = "openshift-monitoring" + prometheusRouteName = "prometheus-k8s" + prometheusAPIPath = "/v1/alerts" +) + +var ( + prometheusRoutePath = fmt.Sprintf("/apis/route.openshift.io/v1/namespaces/%s/routes/%s", prometheusRouteNamespace, prometheusRouteName) +) + +type prometheusAlerts struct { + clientset *kubernetes.Clientset + config *rest.Config +} + +// GetAlertsRequest holds parameters for filtering alerts +type GetAlertsRequest struct { + // Labels filters alerts by labels + Labels map[string]string + // State filters alerts by state: "firing", "pending", or "" for all states + State string +} + +type PrometheusAlert struct { + Labels map[string]string `json:"labels"` + Annotations map[string]string `json:"annotations"` + State string `json:"state"` + ActiveAt time.Time `json:"activeAt"` + Value string `json:"value"` +} + +type prometheusAlertsResponse struct { + Status string `json:"status"` + Data struct { + Alerts []PrometheusAlert `json:"alerts"` + } `json:"data"` +} + +type prometheusRoute struct { + Spec struct { + Host string `json:"host"` + Path string `json:"path"` + } `json:"spec"` +} + +func newPrometheusAlerts(clientset *kubernetes.Clientset, config *rest.Config) PrometheusAlertsInterface { + return &prometheusAlerts{ + clientset: clientset, + config: config, + } +} + +func (pa prometheusAlerts) GetAlerts(ctx context.Context, req GetAlertsRequest) ([]PrometheusAlert, error) { + raw, err := pa.getAlertsViaProxy(ctx) + if err != nil { + return nil, err + } + + var alertsResp prometheusAlertsResponse + if err := json.Unmarshal(raw, &alertsResp); err != nil { + return nil, fmt.Errorf("decode prometheus response: %w", err) + } + + if alertsResp.Status != "success" { + return nil, fmt.Errorf("prometheus API returned non-success status: %s", alertsResp.Status) + } + + out := make([]PrometheusAlert, 0, len(alertsResp.Data.Alerts)) + for _, a := range alertsResp.Data.Alerts { + // Filter alerts based on state if provided + if req.State != "" && a.State != req.State { + continue + } + + // Filter alerts based on labels if provided + if !labelsMatch(&req, &a) { + continue + } + + out = append(out, a) + } + return out, nil +} + +func (pa prometheusAlerts) getAlertsViaProxy(ctx context.Context) ([]byte, error) { + url, err := pa.buildPrometheusURL(ctx) + if err != nil { + return nil, err + } + + client, err := pa.createHTTPClient() + if err != nil { + return nil, err + } + + return pa.executeRequest(ctx, client, url) +} + +func (pa prometheusAlerts) buildPrometheusURL(ctx context.Context) (string, error) { + route, err := pa.fetchPrometheusRoute(ctx) + if err != nil { + return "", err + } + + return fmt.Sprintf("https://%s%s%s", route.Spec.Host, route.Spec.Path, prometheusAPIPath), nil +} + +func (pa prometheusAlerts) fetchPrometheusRoute(ctx context.Context) (*prometheusRoute, error) { + routeData, err := pa.clientset.CoreV1().RESTClient(). + Get(). + AbsPath(prometheusRoutePath). + DoRaw(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get prometheus route: %w", err) + } + + var route prometheusRoute + if err := json.Unmarshal(routeData, &route); err != nil { + return nil, fmt.Errorf("failed to parse route: %w", err) + } + + return &route, nil +} + +func (pa prometheusAlerts) createHTTPClient() (*http.Client, error) { + tlsConfig, err := pa.buildTLSConfig() + if err != nil { + return nil, err + } + + return &http.Client{ + Transport: &http.Transport{ + TLSClientConfig: tlsConfig, + }, + }, nil +} + +func (pa prometheusAlerts) buildTLSConfig() (*tls.Config, error) { + caCertPool, err := pa.loadCACertPool() + if err != nil { + return nil, err + } + + return &tls.Config{ + MinVersion: tls.VersionTLS12, + RootCAs: caCertPool, + }, nil +} + +func (pa prometheusAlerts) loadCACertPool() (*x509.CertPool, error) { + caCertPool, err := x509.SystemCertPool() + if err != nil { + caCertPool = x509.NewCertPool() + } + + if len(pa.config.CAData) > 0 { + caCertPool.AppendCertsFromPEM(pa.config.CAData) + return caCertPool, nil + } + + if pa.config.CAFile != "" { + caCert, err := os.ReadFile(pa.config.CAFile) + if err != nil { + return nil, fmt.Errorf("read CA cert file: %w", err) + } + caCertPool.AppendCertsFromPEM(caCert) + } + + return caCertPool, nil +} + +func (pa prometheusAlerts) executeRequest(ctx context.Context, client *http.Client, url string) ([]byte, error) { + req, err := pa.createAuthenticatedRequest(ctx, url) + if err != nil { + return nil, err + } + + return pa.performRequest(client, req) +} + +func (pa prometheusAlerts) createAuthenticatedRequest(ctx context.Context, url string) (*http.Request, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, fmt.Errorf("create request: %w", err) + } + + token, err := pa.loadBearerToken() + if err != nil { + return nil, err + } + + req.Header.Set("Authorization", "Bearer "+token) + return req, nil +} + +func (pa prometheusAlerts) loadBearerToken() (string, error) { + if pa.config.BearerToken != "" { + return pa.config.BearerToken, nil + } + + if pa.config.BearerTokenFile == "" { + return "", fmt.Errorf("no bearer token or token file configured") + } + + tokenBytes, err := os.ReadFile(pa.config.BearerTokenFile) + if err != nil { + return "", fmt.Errorf("load bearer token file: %w", err) + } + + return string(tokenBytes), nil +} + +func (pa prometheusAlerts) performRequest(client *http.Client, req *http.Request) ([]byte, error) { + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("execute request: %w", err) + } + defer func() { _ = resp.Body.Close() }() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("read response body: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body)) + } + + return body, nil +} + +func labelsMatch(req *GetAlertsRequest, alert *PrometheusAlert) bool { + for key, value := range req.Labels { + if alertValue, exists := alert.Labels[key]; !exists || alertValue != value { + return false + } + } + + return true +} diff --git a/pkg/k8s/prometheus_rule.go b/pkg/k8s/prometheus_rule.go new file mode 100644 index 000000000..eb9246130 --- /dev/null +++ b/pkg/k8s/prometheus_rule.go @@ -0,0 +1,127 @@ +package k8s + +import ( + "context" + "fmt" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + monitoringv1client "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" +) + +type prometheusRuleManager struct { + clientset *monitoringv1client.Clientset +} + +func newPrometheusRuleManager(clientset *monitoringv1client.Clientset) PrometheusRuleInterface { + return &prometheusRuleManager{ + clientset: clientset, + } +} + +func (prm *prometheusRuleManager) List(ctx context.Context, namespace string) ([]monitoringv1.PrometheusRule, error) { + prs, err := prm.clientset.MonitoringV1().PrometheusRules(namespace).List(ctx, metav1.ListOptions{}) + if err != nil { + return nil, err + } + + return prs.Items, nil +} + +func (prm *prometheusRuleManager) Get(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + pr, err := prm.clientset.MonitoringV1().PrometheusRules(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + if errors.IsNotFound(err) { + return nil, false, nil + } + + return nil, false, fmt.Errorf("failed to get PrometheusRule %s/%s: %w", namespace, name, err) + } + + return pr, true, nil +} + +func (prm *prometheusRuleManager) Update(ctx context.Context, pr monitoringv1.PrometheusRule) error { + _, err := prm.clientset.MonitoringV1().PrometheusRules(pr.Namespace).Update(ctx, &pr, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update PrometheusRule %s/%s: %w", pr.Namespace, pr.Name, err) + } + + return nil +} + +func (prm *prometheusRuleManager) Delete(ctx context.Context, namespace string, name string) error { + err := prm.clientset.MonitoringV1().PrometheusRules(namespace).Delete(ctx, name, metav1.DeleteOptions{}) + if err != nil { + return fmt.Errorf("failed to delete PrometheusRule %s: %w", name, err) + } + + return nil +} + +func (prm *prometheusRuleManager) AddRule(ctx context.Context, namespacedName types.NamespacedName, groupName string, rule monitoringv1.Rule) error { + pr, err := prm.getOrCreatePrometheusRule(ctx, namespacedName) + if err != nil { + return err + } + + // Find or create the group + var group *monitoringv1.RuleGroup + for i := range pr.Spec.Groups { + if pr.Spec.Groups[i].Name == groupName { + group = &pr.Spec.Groups[i] + break + } + } + if group == nil { + pr.Spec.Groups = append(pr.Spec.Groups, monitoringv1.RuleGroup{ + Name: groupName, + Rules: []monitoringv1.Rule{}, + }) + group = &pr.Spec.Groups[len(pr.Spec.Groups)-1] + } + + // Add the new rule to the group + group.Rules = append(group.Rules, rule) + + _, err = prm.clientset.MonitoringV1().PrometheusRules(namespacedName.Namespace).Update(ctx, pr, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update PrometheusRule %s/%s: %w", namespacedName.Namespace, namespacedName.Name, err) + } + + return nil +} + +func (prm *prometheusRuleManager) getOrCreatePrometheusRule(ctx context.Context, namespacedName types.NamespacedName) (*monitoringv1.PrometheusRule, error) { + pr, err := prm.clientset.MonitoringV1().PrometheusRules(namespacedName.Namespace).Get(ctx, namespacedName.Name, metav1.GetOptions{}) + if err != nil { + if errors.IsNotFound(err) { + return prm.createPrometheusRule(ctx, namespacedName) + } + + return nil, fmt.Errorf("failed to get PrometheusRule %s/%s: %w", namespacedName.Namespace, namespacedName.Name, err) + } + + return pr, nil +} + +func (prm *prometheusRuleManager) createPrometheusRule(ctx context.Context, namespacedName types.NamespacedName) (*monitoringv1.PrometheusRule, error) { + pr := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: namespacedName.Name, + Namespace: namespacedName.Namespace, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{}, + }, + } + + pr, err := prm.clientset.MonitoringV1().PrometheusRules(namespacedName.Namespace).Create(ctx, pr, metav1.CreateOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to create PrometheusRule %s/%s: %w", namespacedName.Namespace, namespacedName.Name, err) + } + + return pr, nil +} diff --git a/pkg/k8s/prometheus_rule_informer.go b/pkg/k8s/prometheus_rule_informer.go new file mode 100644 index 000000000..c0e7a716b --- /dev/null +++ b/pkg/k8s/prometheus_rule_informer.go @@ -0,0 +1,62 @@ +package k8s + +import ( + "context" + "log" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + monitoringv1client "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/watch" +) + +type prometheusRuleInformer struct { + clientset *monitoringv1client.Clientset +} + +func newPrometheusRuleInformer(clientset *monitoringv1client.Clientset) PrometheusRuleInformerInterface { + return &prometheusRuleInformer{ + clientset: clientset, + } +} + +func (pri *prometheusRuleInformer) Run(ctx context.Context, callbacks PrometheusRuleInformerCallback) error { + options := metav1.ListOptions{ + Watch: true, + } + + watcher, err := pri.clientset.MonitoringV1().PrometheusRules("").Watch(ctx, options) + if err != nil { + return err + } + defer watcher.Stop() + + ch := watcher.ResultChan() + for event := range ch { + pr, ok := event.Object.(*monitoringv1.PrometheusRule) + if !ok { + log.Printf("Unexpected type: %v", event.Object) + continue + } + + switch event.Type { + case watch.Added: + if callbacks.OnAdd != nil { + callbacks.OnAdd(pr) + } + case watch.Modified: + if callbacks.OnUpdate != nil { + callbacks.OnUpdate(pr) + } + case watch.Deleted: + if callbacks.OnDelete != nil { + callbacks.OnDelete(pr) + } + case watch.Error: + log.Printf("Error occurred while watching PrometheusRule: %s\n", event.Object) + } + } + + log.Fatalf("PrometheusRule watcher channel closed unexpectedly") + return nil +} diff --git a/pkg/k8s/types.go b/pkg/k8s/types.go new file mode 100644 index 000000000..c3579841f --- /dev/null +++ b/pkg/k8s/types.go @@ -0,0 +1,115 @@ +package k8s + +import ( + "context" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/apimachinery/pkg/types" +) + +// ClientOptions holds configuration options for creating a Kubernetes client +type ClientOptions struct { + // KubeconfigPath specifies the path to the kubeconfig file for remote connections + // If empty, will try default locations or in-cluster config + KubeconfigPath string +} + +// Client defines the contract for Kubernetes client operations +type Client interface { + // TestConnection tests the connection to the Kubernetes cluster + TestConnection(ctx context.Context) error + + // PrometheusAlerts retrieves active Prometheus alerts + PrometheusAlerts() PrometheusAlertsInterface + + // PrometheusRules returns the PrometheusRule interface + PrometheusRules() PrometheusRuleInterface + + // PrometheusRuleInformer returns the PrometheusRuleInformer interface + PrometheusRuleInformer() PrometheusRuleInformerInterface + + // AlertRelabelConfigs returns the AlertRelabelConfig interface + AlertRelabelConfigs() AlertRelabelConfigInterface + + // AlertRelabelConfigInformer returns the AlertRelabelConfigInformer interface + AlertRelabelConfigInformer() AlertRelabelConfigInformerInterface +} + +// PrometheusAlertsInterface defines operations for managing PrometheusAlerts +type PrometheusAlertsInterface interface { + // GetAlerts retrieves Prometheus alerts with optional state filtering + GetAlerts(ctx context.Context, req GetAlertsRequest) ([]PrometheusAlert, error) +} + +// PrometheusRuleInterface defines operations for managing PrometheusRules +type PrometheusRuleInterface interface { + // List lists all PrometheusRules in the cluster + List(ctx context.Context, namespace string) ([]monitoringv1.PrometheusRule, error) + + // Get retrieves a PrometheusRule by namespace and name + Get(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) + + // Update updates an existing PrometheusRule + Update(ctx context.Context, pr monitoringv1.PrometheusRule) error + + // Delete deletes a PrometheusRule by namespace and name + Delete(ctx context.Context, namespace string, name string) error + + // AddRule adds a new rule to the specified PrometheusRule + AddRule(ctx context.Context, namespacedName types.NamespacedName, groupName string, rule monitoringv1.Rule) error +} + +// PrometheusRuleInformerInterface defines operations for PrometheusRules informers +type PrometheusRuleInformerInterface interface { + // Run starts the informer and sets up the provided callbacks for add, update, and delete events + Run(ctx context.Context, callbacks PrometheusRuleInformerCallback) error +} + +// PrometheusRuleInformerCallback holds the callback functions for informer events +type PrometheusRuleInformerCallback struct { + // OnAdd is called when a new PrometheusRule is added + OnAdd func(pr *monitoringv1.PrometheusRule) + + // OnUpdate is called when an existing PrometheusRule is updated + OnUpdate func(pr *monitoringv1.PrometheusRule) + + // OnDelete is called when a PrometheusRule is deleted + OnDelete func(pr *monitoringv1.PrometheusRule) +} + +// AlertRelabelConfigInterface defines operations for managing AlertRelabelConfigs +type AlertRelabelConfigInterface interface { + // List lists all AlertRelabelConfigs in the cluster + List(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) + + // Get retrieves an AlertRelabelConfig by namespace and name + Get(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) + + // Create creates a new AlertRelabelConfig + Create(ctx context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) + + // Update updates an existing AlertRelabelConfig + Update(ctx context.Context, arc osmv1.AlertRelabelConfig) error + + // Delete deletes an AlertRelabelConfig by namespace and name + Delete(ctx context.Context, namespace string, name string) error +} + +// AlertRelabelConfigInformerInterface defines operations for AlertRelabelConfig informers +type AlertRelabelConfigInformerInterface interface { + // Run starts the informer and sets up the provided callbacks for add, update, and delete events + Run(ctx context.Context, callbacks AlertRelabelConfigInformerCallback) error +} + +// AlertRelabelConfigInformerCallback holds the callback functions for informer events +type AlertRelabelConfigInformerCallback struct { + // OnAdd is called when a new AlertRelabelConfig is added + OnAdd func(arc *osmv1.AlertRelabelConfig) + + // OnUpdate is called when an existing AlertRelabelConfig is updated + OnUpdate func(arc *osmv1.AlertRelabelConfig) + + // OnDelete is called when an AlertRelabelConfig is deleted + OnDelete func(arc *osmv1.AlertRelabelConfig) +} diff --git a/pkg/management/create_user_defined_alert_rule.go b/pkg/management/create_user_defined_alert_rule.go new file mode 100644 index 000000000..226b371f2 --- /dev/null +++ b/pkg/management/create_user_defined_alert_rule.go @@ -0,0 +1,46 @@ +package management + +import ( + "context" + "errors" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/apimachinery/pkg/types" +) + +const ( + DefaultGroupName = "user-defined-rules" +) + +func (c *client) CreateUserDefinedAlertRule(ctx context.Context, alertRule monitoringv1.Rule, prOptions PrometheusRuleOptions) (string, error) { + if prOptions.Name == "" || prOptions.Namespace == "" { + return "", errors.New("PrometheusRule Name and Namespace must be specified") + } + + nn := types.NamespacedName{ + Name: prOptions.Name, + Namespace: prOptions.Namespace, + } + + if IsPlatformAlertRule(nn) { + return "", errors.New("cannot add user-defined alert rule to a platform-managed PrometheusRule") + } + + // Check if rule with the same ID already exists + ruleId := c.mapper.GetAlertingRuleId(&alertRule) + _, err := c.mapper.FindAlertRuleById(ruleId) + if err == nil { + return "", errors.New("alert rule with exact config already exists") + } + + if prOptions.GroupName == "" { + prOptions.GroupName = DefaultGroupName + } + + err = c.k8sClient.PrometheusRules().AddRule(ctx, nn, prOptions.GroupName, alertRule) + if err != nil { + return "", err + } + + return string(c.mapper.GetAlertingRuleId(&alertRule)), nil +} diff --git a/pkg/management/create_user_defined_alert_rule_test.go b/pkg/management/create_user_defined_alert_rule_test.go new file mode 100644 index 000000000..f45355e60 --- /dev/null +++ b/pkg/management/create_user_defined_alert_rule_test.go @@ -0,0 +1,310 @@ +package management_test + +import ( + "context" + "errors" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/mapper" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" +) + +var _ = Describe("CreateUserDefinedAlertRule", func() { + var ( + ctx context.Context + mockK8s *testutils.MockClient + mockPR *testutils.MockPrometheusRuleInterface + mockMapper *testutils.MockMapperClient + client management.Client + ) + + BeforeEach(func() { + ctx = context.Background() + + mockPR = &testutils.MockPrometheusRuleInterface{} + mockK8s = &testutils.MockClient{ + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { + return mockPR + }, + } + mockMapper = &testutils.MockMapperClient{} + + client = management.NewWithCustomMapper(ctx, mockK8s, mockMapper) + }) + + Context("when creating a user-defined alert rule", func() { + It("should successfully create with default group name", func() { + By("setting up test data") + alertRule := monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", + }, + Annotations: map[string]string{ + "summary": "Test alert", + }, + } + + prOptions := management.PrometheusRuleOptions{ + Name: "test-rule", + Namespace: "test-namespace", + } + + ruleId := "test-rule-id" + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + return mapper.PrometheusAlertRuleId(ruleId) + } + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return nil, errors.New("not found") + } + + addRuleCalled := false + var capturedGroupName string + mockPR.AddRuleFunc = func(ctx context.Context, nn types.NamespacedName, groupName string, rule monitoringv1.Rule) error { + addRuleCalled = true + capturedGroupName = groupName + Expect(nn.Name).To(Equal("test-rule")) + Expect(nn.Namespace).To(Equal("test-namespace")) + Expect(rule.Alert).To(Equal("TestAlert")) + return nil + } + + By("creating the alert rule") + returnedId, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) + + By("verifying the result") + Expect(err).ToNot(HaveOccurred()) + Expect(returnedId).To(Equal(ruleId)) + Expect(addRuleCalled).To(BeTrue()) + Expect(capturedGroupName).To(Equal("user-defined-rules")) + }) + + It("should successfully create with custom group name", func() { + By("setting up test data") + alertRule := monitoringv1.Rule{ + Alert: "CustomGroupAlert", + Expr: intstr.FromString("memory_usage > 90"), + } + + prOptions := management.PrometheusRuleOptions{ + Name: "custom-rule", + Namespace: "custom-namespace", + GroupName: "custom-group", + } + + ruleId := "custom-rule-id" + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + return mapper.PrometheusAlertRuleId(ruleId) + } + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return nil, errors.New("not found") + } + + var capturedGroupName string + mockPR.AddRuleFunc = func(ctx context.Context, nn types.NamespacedName, groupName string, rule monitoringv1.Rule) error { + capturedGroupName = groupName + return nil + } + + By("creating the alert rule") + returnedId, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) + + By("verifying the result") + Expect(err).ToNot(HaveOccurred()) + Expect(returnedId).To(Equal(ruleId)) + Expect(capturedGroupName).To(Equal("custom-group")) + }) + + It("should return error when namespace is missing", func() { + By("setting up test data with missing namespace") + alertRule := monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + } + + prOptions := management.PrometheusRuleOptions{ + Name: "test-rule", + Namespace: "", + } + + By("attempting to create the alert rule") + _, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) + + By("verifying the error") + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("PrometheusRule Name and Namespace must be specified")) + }) + + It("should return error when name is missing", func() { + By("setting up test data with missing name") + alertRule := monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + } + + prOptions := management.PrometheusRuleOptions{ + Name: "", + Namespace: "test-namespace", + } + + By("attempting to create the alert rule") + _, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) + + By("verifying the error") + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("PrometheusRule Name and Namespace must be specified")) + }) + + It("should return error when trying to add to platform-managed PrometheusRule", func() { + By("setting up test data with platform-managed PrometheusRule name") + alertRule := monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + } + + prOptions := management.PrometheusRuleOptions{ + Name: "openshift-platform-alerts", + Namespace: "openshift-monitoring", + } + + By("attempting to create the alert rule") + _, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) + + By("verifying the error") + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("cannot add user-defined alert rule to a platform-managed PrometheusRule")) + }) + + It("should return error when rule with same config already exists", func() { + By("setting up test data") + alertRule := monitoringv1.Rule{ + Alert: "DuplicateAlert", + Expr: intstr.FromString("up == 0"), + } + + prOptions := management.PrometheusRuleOptions{ + Name: "test-rule", + Namespace: "test-namespace", + } + + ruleId := "duplicate-rule-id" + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + return mapper.PrometheusAlertRuleId(ruleId) + } + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + // Return success, indicating the rule already exists + return &mapper.PrometheusRuleId{ + Namespace: "test-namespace", + Name: "test-rule", + }, nil + } + + By("attempting to create the duplicate alert rule") + _, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) + + By("verifying the error") + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("alert rule with exact config already exists")) + }) + + It("should return error when AddRule fails", func() { + By("setting up test data") + alertRule := monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + } + + prOptions := management.PrometheusRuleOptions{ + Name: "test-rule", + Namespace: "test-namespace", + } + + ruleId := "test-rule-id" + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + return mapper.PrometheusAlertRuleId(ruleId) + } + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return nil, errors.New("not found") + } + + expectedError := errors.New("failed to add rule to kubernetes") + mockPR.AddRuleFunc = func(ctx context.Context, nn types.NamespacedName, groupName string, rule monitoringv1.Rule) error { + return expectedError + } + + By("attempting to create the alert rule") + _, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) + + By("verifying the error is propagated") + Expect(err).To(HaveOccurred()) + Expect(err).To(Equal(expectedError)) + }) + }) + + Context("when dealing with edge cases", func() { + It("should handle alert rule with no labels or annotations", func() { + By("setting up minimal alert rule") + alertRule := monitoringv1.Rule{ + Alert: "MinimalAlert", + Expr: intstr.FromString("up == 0"), + } + + prOptions := management.PrometheusRuleOptions{ + Name: "minimal-rule", + Namespace: "test-namespace", + } + + ruleId := "minimal-rule-id" + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + return mapper.PrometheusAlertRuleId(ruleId) + } + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return nil, errors.New("not found") + } + + addRuleCalled := false + mockPR.AddRuleFunc = func(ctx context.Context, nn types.NamespacedName, groupName string, rule monitoringv1.Rule) error { + addRuleCalled = true + Expect(rule.Labels).To(BeNil()) + Expect(rule.Annotations).To(BeNil()) + return nil + } + + By("creating the minimal alert rule") + returnedId, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) + + By("verifying the result") + Expect(err).ToNot(HaveOccurred()) + Expect(returnedId).To(Equal(ruleId)) + Expect(addRuleCalled).To(BeTrue()) + }) + + It("should reject PrometheusRules in openshift- prefixed namespaces", func() { + By("setting up test data with openshift- namespace prefix") + alertRule := monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + } + + prOptions := management.PrometheusRuleOptions{ + Name: "custom-rule", + Namespace: "openshift-user-namespace", + } + + By("attempting to create the alert rule") + _, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) + + By("verifying the error") + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("cannot add user-defined alert rule to a platform-managed PrometheusRule")) + }) + }) +}) diff --git a/pkg/management/delete_user_defined_alert_rule_by_id.go b/pkg/management/delete_user_defined_alert_rule_by_id.go new file mode 100644 index 000000000..18ac94b0d --- /dev/null +++ b/pkg/management/delete_user_defined_alert_rule_by_id.go @@ -0,0 +1,85 @@ +package management + +import ( + "context" + "fmt" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/apimachinery/pkg/types" + + "github.com/openshift/monitoring-plugin/pkg/management/mapper" +) + +func (c *client) DeleteUserDefinedAlertRuleById(ctx context.Context, alertRuleId string) error { + prId, err := c.mapper.FindAlertRuleById(mapper.PrometheusAlertRuleId(alertRuleId)) + if err != nil { + return &NotFoundError{Resource: "AlertRule", Id: alertRuleId} + } + + if IsPlatformAlertRule(types.NamespacedName(*prId)) { + return &NotAllowedError{Message: "cannot delete alert rule from a platform-managed PrometheusRule"} + } + + pr, found, err := c.k8sClient.PrometheusRules().Get(ctx, prId.Namespace, prId.Name) + if err != nil { + return err + } + + if !found { + return &NotFoundError{Resource: "PrometheusRule", Id: fmt.Sprintf("%s/%s", prId.Namespace, prId.Name)} + } + + updated := false + var newGroups []monitoringv1.RuleGroup + + for _, group := range pr.Spec.Groups { + newRules := c.filterRulesById(group.Rules, alertRuleId, &updated) + + // Only keep groups that still have rules + if len(newRules) > 0 { + group.Rules = newRules + newGroups = append(newGroups, group) + } else if len(newRules) != len(group.Rules) { + // Group became empty due to rule deletion + updated = true + } + } + + if updated { + if len(newGroups) == 0 { + // No groups left, delete the entire PrometheusRule + err = c.k8sClient.PrometheusRules().Delete(ctx, pr.Namespace, pr.Name) + if err != nil { + return fmt.Errorf("failed to delete PrometheusRule %s/%s: %w", pr.Namespace, pr.Name, err) + } + } else { + // Update PrometheusRule with remaining groups + pr.Spec.Groups = newGroups + err = c.k8sClient.PrometheusRules().Update(ctx, *pr) + if err != nil { + return fmt.Errorf("failed to update PrometheusRule %s/%s: %w", pr.Namespace, pr.Name, err) + } + } + return nil + } + + return &NotFoundError{Resource: "PrometheusRule", Id: fmt.Sprintf("%s/%s", pr.Namespace, pr.Name)} +} + +func (c *client) filterRulesById(rules []monitoringv1.Rule, alertRuleId string, updated *bool) []monitoringv1.Rule { + var newRules []monitoringv1.Rule + + for _, rule := range rules { + if c.shouldDeleteRule(rule, alertRuleId) { + *updated = true + continue + } + newRules = append(newRules, rule) + } + + return newRules +} + +func (c *client) shouldDeleteRule(rule monitoringv1.Rule, alertRuleId string) bool { + return alertRuleId == string(c.mapper.GetAlertingRuleId(&rule)) +} diff --git a/pkg/management/delete_user_defined_alert_rule_by_id_test.go b/pkg/management/delete_user_defined_alert_rule_by_id_test.go new file mode 100644 index 000000000..879d87307 --- /dev/null +++ b/pkg/management/delete_user_defined_alert_rule_by_id_test.go @@ -0,0 +1,527 @@ +package management_test + +import ( + "context" + "errors" + "fmt" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/mapper" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" +) + +var _ = Describe("DeleteUserDefinedAlertRuleById", func() { + var ( + ctx context.Context + mockK8s *testutils.MockClient + mockPR *testutils.MockPrometheusRuleInterface + mockMapper *testutils.MockMapperClient + client management.Client + ) + + BeforeEach(func() { + ctx = context.Background() + + mockPR = &testutils.MockPrometheusRuleInterface{} + mockK8s = &testutils.MockClient{ + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { + return mockPR + }, + } + mockMapper = &testutils.MockMapperClient{} + + client = management.NewWithCustomMapper(ctx, mockK8s, mockMapper) + }) + + Context("when deleting a user-defined alert rule", func() { + It("should delete rule from multi-rule PrometheusRule and update", func() { + By("setting up PrometheusRule with 3 rules in 2 groups") + rule1 := monitoringv1.Rule{ + Alert: "Alert1", + Expr: intstr.FromString("up == 0"), + } + rule2 := monitoringv1.Rule{ + Alert: "Alert2", + Expr: intstr.FromString("cpu_usage > 80"), + } + rule3 := monitoringv1.Rule{ + Alert: "Alert3", + Expr: intstr.FromString("memory_usage > 90"), + } + + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "multi-rule", + Namespace: "test-namespace", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "group1", + Rules: []monitoringv1.Rule{rule1, rule2}, + }, + { + Name: "group2", + Rules: []monitoringv1.Rule{rule3}, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "test-namespace/multi-rule": prometheusRule, + }) + + alertRuleId := "alert2-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "test-namespace", + Name: "multi-rule", + }, nil + } + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + if alertRule.Alert == "Alert2" { + return mapper.PrometheusAlertRuleId(alertRuleId) + } + return mapper.PrometheusAlertRuleId("other-id") + } + + By("deleting the middle rule") + err := client.DeleteUserDefinedAlertRuleById(ctx, alertRuleId) + Expect(err).ToNot(HaveOccurred()) + + By("verifying PrometheusRule was updated, not deleted") + updatedPR, found, err := mockPR.Get(ctx, "test-namespace", "multi-rule") + Expect(err).ToNot(HaveOccurred()) + Expect(found).To(BeTrue()) + Expect(updatedPR.Spec.Groups).To(HaveLen(2)) + Expect(updatedPR.Spec.Groups[0].Rules).To(HaveLen(1)) + Expect(updatedPR.Spec.Groups[0].Rules[0].Alert).To(Equal("Alert1")) + Expect(updatedPR.Spec.Groups[1].Rules).To(HaveLen(1)) + Expect(updatedPR.Spec.Groups[1].Rules[0].Alert).To(Equal("Alert3")) + }) + + It("should delete entire PrometheusRule when deleting the last rule", func() { + By("setting up PrometheusRule with single rule") + rule := monitoringv1.Rule{ + Alert: "OnlyAlert", + Expr: intstr.FromString("up == 0"), + } + + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "single-rule", + Namespace: "test-namespace", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "group1", + Rules: []monitoringv1.Rule{rule}, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "test-namespace/single-rule": prometheusRule, + }) + + alertRuleId := "only-alert-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "test-namespace", + Name: "single-rule", + }, nil + } + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + return mapper.PrometheusAlertRuleId(alertRuleId) + } + + deleteCalled := false + mockPR.DeleteFunc = func(ctx context.Context, namespace, name string) error { + deleteCalled = true + Expect(namespace).To(Equal("test-namespace")) + Expect(name).To(Equal("single-rule")) + return nil + } + + By("deleting the only rule") + err := client.DeleteUserDefinedAlertRuleById(ctx, alertRuleId) + Expect(err).ToNot(HaveOccurred()) + + By("verifying PrometheusRule was deleted") + Expect(deleteCalled).To(BeTrue()) + }) + + It("should remove empty group when deleting its only rule", func() { + By("setting up PrometheusRule with 2 groups, one with single rule") + rule1 := monitoringv1.Rule{ + Alert: "Alert1", + Expr: intstr.FromString("up == 0"), + } + rule2 := monitoringv1.Rule{ + Alert: "Alert2", + Expr: intstr.FromString("cpu_usage > 80"), + } + rule3 := monitoringv1.Rule{ + Alert: "SingleRuleInGroup", + Expr: intstr.FromString("memory_usage > 90"), + } + + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "multi-group", + Namespace: "test-namespace", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "group1", + Rules: []monitoringv1.Rule{rule1, rule2}, + }, + { + Name: "group2", + Rules: []monitoringv1.Rule{rule3}, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "test-namespace/multi-group": prometheusRule, + }) + + alertRuleId := "single-rule-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "test-namespace", + Name: "multi-group", + }, nil + } + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + if alertRule.Alert == "SingleRuleInGroup" { + return mapper.PrometheusAlertRuleId(alertRuleId) + } + return mapper.PrometheusAlertRuleId("other-id") + } + + By("deleting the single rule from group2") + err := client.DeleteUserDefinedAlertRuleById(ctx, alertRuleId) + Expect(err).ToNot(HaveOccurred()) + + By("verifying group2 was removed and group1 remains") + updatedPR, found, err := mockPR.Get(ctx, "test-namespace", "multi-group") + Expect(found).To(BeTrue()) + Expect(err).ToNot(HaveOccurred()) + Expect(updatedPR.Spec.Groups).To(HaveLen(1)) + Expect(updatedPR.Spec.Groups[0].Name).To(Equal("group1")) + Expect(updatedPR.Spec.Groups[0].Rules).To(HaveLen(2)) + }) + + It("should delete only the exact matching rule", func() { + By("setting up PrometheusRule with similar rules") + rule1 := monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", + }, + } + rule2 := monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "critical", + }, + } + + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "similar-rules", + Namespace: "test-namespace", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "group1", + Rules: []monitoringv1.Rule{rule1, rule2}, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "test-namespace/similar-rules": prometheusRule, + }) + + targetRuleId := "target-rule-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "test-namespace", + Name: "similar-rules", + }, nil + } + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + // Only rule1 matches the target ID + if alertRule.Alert == "TestAlert" && alertRule.Labels["severity"] == "warning" { + return mapper.PrometheusAlertRuleId(targetRuleId) + } + return mapper.PrometheusAlertRuleId("other-id") + } + + By("deleting the specific rule") + err := client.DeleteUserDefinedAlertRuleById(ctx, targetRuleId) + Expect(err).ToNot(HaveOccurred()) + + By("verifying only the exact matching rule was deleted") + updatedPR, found, err := mockPR.Get(ctx, "test-namespace", "similar-rules") + Expect(found).To(BeTrue()) + Expect(err).ToNot(HaveOccurred()) + Expect(updatedPR.Spec.Groups[0].Rules).To(HaveLen(1)) + Expect(updatedPR.Spec.Groups[0].Rules[0].Labels["severity"]).To(Equal("critical")) + }) + }) + + Context("when handling errors", func() { + It("should return error when rule not found in mapper", func() { + By("configuring mapper to return error") + alertRuleId := "nonexistent-rule-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return nil, errors.New("alert rule not found") + } + + By("attempting to delete the rule") + err := client.DeleteUserDefinedAlertRuleById(ctx, alertRuleId) + + By("verifying error is returned") + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("AlertRule with id nonexistent-rule-id not found")) + }) + + It("should return error when trying to delete from platform-managed PrometheusRule", func() { + By("configuring mapper to return platform PrometheusRule") + alertRuleId := "platform-rule-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "openshift-monitoring", + Name: "openshift-platform-alerts", + }, nil + } + + By("attempting to delete the rule") + err := client.DeleteUserDefinedAlertRuleById(ctx, alertRuleId) + + By("verifying error is returned") + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("cannot delete alert rule from a platform-managed PrometheusRule")) + }) + + It("should return error when PrometheusRule Get fails", func() { + By("configuring Get to return error") + alertRuleId := "test-rule-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "test-namespace", + Name: "test-rule", + }, nil + } + + mockPR.GetFunc = func(ctx context.Context, namespace, name string) (*monitoringv1.PrometheusRule, bool, error) { + return nil, false, errors.New("failed to get PrometheusRule") + } + + By("attempting to delete the rule") + err := client.DeleteUserDefinedAlertRuleById(ctx, alertRuleId) + + By("verifying error is returned") + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to get PrometheusRule")) + }) + + It("should return error when PrometheusRule Update fails", func() { + By("setting up PrometheusRule with 2 rules") + rule1 := monitoringv1.Rule{ + Alert: "Alert1", + Expr: intstr.FromString("up == 0"), + } + rule2 := monitoringv1.Rule{ + Alert: "Alert2", + Expr: intstr.FromString("cpu_usage > 80"), + } + + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rule", + Namespace: "test-namespace", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "group1", + Rules: []monitoringv1.Rule{rule1, rule2}, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "test-namespace/test-rule": prometheusRule, + }) + + alertRuleId := "alert2-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "test-namespace", + Name: "test-rule", + }, nil + } + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + if alertRule.Alert == "Alert2" { + return mapper.PrometheusAlertRuleId(alertRuleId) + } + return mapper.PrometheusAlertRuleId("other-id") + } + + mockPR.UpdateFunc = func(ctx context.Context, pr monitoringv1.PrometheusRule) error { + return fmt.Errorf("kubernetes update error") + } + + By("attempting to delete the rule") + err := client.DeleteUserDefinedAlertRuleById(ctx, alertRuleId) + + By("verifying error is returned") + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to update PrometheusRule")) + Expect(err.Error()).To(ContainSubstring("kubernetes update error")) + }) + + It("should return error when PrometheusRule Delete fails", func() { + By("setting up PrometheusRule with single rule") + rule := monitoringv1.Rule{ + Alert: "OnlyAlert", + Expr: intstr.FromString("up == 0"), + } + + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "single-rule", + Namespace: "test-namespace", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "group1", + Rules: []monitoringv1.Rule{rule}, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "test-namespace/single-rule": prometheusRule, + }) + + alertRuleId := "only-alert-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "test-namespace", + Name: "single-rule", + }, nil + } + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + return mapper.PrometheusAlertRuleId(alertRuleId) + } + + mockPR.DeleteFunc = func(ctx context.Context, namespace, name string) error { + return fmt.Errorf("kubernetes delete error") + } + + By("attempting to delete the rule") + err := client.DeleteUserDefinedAlertRuleById(ctx, alertRuleId) + + By("verifying error is returned") + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to delete PrometheusRule")) + Expect(err.Error()).To(ContainSubstring("kubernetes delete error")) + }) + }) + + Context("when handling edge cases", func() { + It("should handle PrometheusRule with multiple groups correctly", func() { + By("setting up PrometheusRule with 3 groups") + rule1 := monitoringv1.Rule{ + Alert: "Alert1", + Expr: intstr.FromString("up == 0"), + } + rule2 := monitoringv1.Rule{ + Alert: "Alert2", + Expr: intstr.FromString("cpu_usage > 80"), + } + rule3 := monitoringv1.Rule{ + Alert: "Alert3", + Expr: intstr.FromString("memory_usage > 90"), + } + + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "multi-group", + Namespace: "test-namespace", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "group1", + Rules: []monitoringv1.Rule{rule1}, + }, + { + Name: "group2", + Rules: []monitoringv1.Rule{rule2}, + }, + { + Name: "group3", + Rules: []monitoringv1.Rule{rule3}, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "test-namespace/multi-group": prometheusRule, + }) + + alertRuleId := "alert2-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "test-namespace", + Name: "multi-group", + }, nil + } + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + if alertRule.Alert == "Alert2" { + return mapper.PrometheusAlertRuleId(alertRuleId) + } + return mapper.PrometheusAlertRuleId("other-id") + } + + By("deleting rule from middle group") + err := client.DeleteUserDefinedAlertRuleById(ctx, alertRuleId) + Expect(err).ToNot(HaveOccurred()) + + By("verifying middle group was removed") + updatedPR, found, err := mockPR.Get(ctx, "test-namespace", "multi-group") + Expect(found).To(BeTrue()) + Expect(err).ToNot(HaveOccurred()) + Expect(updatedPR.Spec.Groups).To(HaveLen(2)) + Expect(updatedPR.Spec.Groups[0].Name).To(Equal("group1")) + Expect(updatedPR.Spec.Groups[1].Name).To(Equal("group3")) + }) + }) +}) diff --git a/pkg/management/errors.go b/pkg/management/errors.go new file mode 100644 index 000000000..a175acdc8 --- /dev/null +++ b/pkg/management/errors.go @@ -0,0 +1,20 @@ +package management + +import "fmt" + +type NotFoundError struct { + Resource string + Id string +} + +func (r *NotFoundError) Error() string { + return fmt.Sprintf("%s with id %s not found", r.Resource, r.Id) +} + +type NotAllowedError struct { + Message string +} + +func (r *NotAllowedError) Error() string { + return r.Message +} diff --git a/pkg/management/get_alerts.go b/pkg/management/get_alerts.go new file mode 100644 index 000000000..ec0c3976d --- /dev/null +++ b/pkg/management/get_alerts.go @@ -0,0 +1,53 @@ +package management + +import ( + "context" + "fmt" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + + "github.com/openshift/monitoring-plugin/pkg/k8s" +) + +func (c *client) GetAlerts(ctx context.Context, req k8s.GetAlertsRequest) ([]k8s.PrometheusAlert, error) { + alerts, err := c.k8sClient.PrometheusAlerts().GetAlerts(ctx, req) + if err != nil { + return nil, fmt.Errorf("failed to get prometheus alerts: %w", err) + } + + var result []k8s.PrometheusAlert + for _, alert := range alerts { + // Apply relabel configurations to the alert + updatedAlert, err := c.updateAlertBasedOnRelabelConfig(&alert) + if err != nil { + // Alert was dropped by relabel config, skip it + continue + } + result = append(result, updatedAlert) + } + + return result, nil +} + +func (c *client) updateAlertBasedOnRelabelConfig(alert *k8s.PrometheusAlert) (k8s.PrometheusAlert, error) { + // Create a temporary rule to match relabel configs + rule := &monitoringv1.Rule{ + Alert: alert.Labels["alertname"], + Labels: alert.Labels, + } + + configs := c.mapper.GetAlertRelabelConfigSpec(rule) + + updatedLabels, err := applyRelabelConfigs(string(rule.Alert), alert.Labels, configs) + if err != nil { + return k8s.PrometheusAlert{}, err + } + + alert.Labels = updatedLabels + // Update severity if it was changed + if severity, exists := updatedLabels["severity"]; exists { + alert.Labels["severity"] = severity + } + + return *alert, nil +} diff --git a/pkg/management/get_alerts_test.go b/pkg/management/get_alerts_test.go new file mode 100644 index 000000000..428303b37 --- /dev/null +++ b/pkg/management/get_alerts_test.go @@ -0,0 +1,122 @@ +package management_test + +import ( + "context" + "errors" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" +) + +var _ = Describe("GetAlerts", func() { + var ( + ctx context.Context + mockK8s *testutils.MockClient + mockAlerts *testutils.MockPrometheusAlertsInterface + mockMapper *testutils.MockMapperClient + client management.Client + testTime time.Time + ) + + BeforeEach(func() { + ctx = context.Background() + testTime = time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC) + + mockAlerts = &testutils.MockPrometheusAlertsInterface{} + mockK8s = &testutils.MockClient{ + PrometheusAlertsFunc: func() k8s.PrometheusAlertsInterface { + return mockAlerts + }, + } + mockMapper = &testutils.MockMapperClient{} + + client = management.NewWithCustomMapper(ctx, mockK8s, mockMapper) + }) + + It("should return alerts unchanged when no relabel configs exist", func() { + mockAlerts.SetActiveAlerts([]k8s.PrometheusAlert{ + {Labels: map[string]string{"alertname": "HighCPU", "severity": "warning"}, State: "firing", ActiveAt: testTime}, + {Labels: map[string]string{"alertname": "HighMemory", "severity": "critical"}, State: "pending", ActiveAt: testTime}, + }) + mockMapper.GetAlertRelabelConfigSpecFunc = func(*monitoringv1.Rule) []osmv1.RelabelConfig { return nil } + + result, err := client.GetAlerts(ctx, k8s.GetAlertsRequest{}) + + Expect(err).ToNot(HaveOccurred()) + Expect(result).To(HaveLen(2)) + Expect(result[0].Labels["alertname"]).To(Equal("HighCPU")) + Expect(result[1].Labels["alertname"]).To(Equal("HighMemory")) + }) + + It("should apply Replace relabel actions correctly", func() { + mockAlerts.SetActiveAlerts([]k8s.PrometheusAlert{ + { + Labels: map[string]string{"alertname": "TestAlert", "severity": "warning", "team": "platform"}, + State: "firing", + }, + }) + mockMapper.GetAlertRelabelConfigSpecFunc = func(rule *monitoringv1.Rule) []osmv1.RelabelConfig { + return []osmv1.RelabelConfig{ + {TargetLabel: "severity", Replacement: "critical", Action: "Replace"}, + {TargetLabel: "team", Replacement: "infrastructure", Action: "Replace"}, + {TargetLabel: "reviewed", Replacement: "true", Action: "Replace"}, + } + } + + result, err := client.GetAlerts(ctx, k8s.GetAlertsRequest{}) + + Expect(err).ToNot(HaveOccurred()) + Expect(result).To(HaveLen(1)) + Expect(result[0].Labels).To(HaveKeyWithValue("severity", "critical")) + Expect(result[0].Labels).To(HaveKeyWithValue("team", "infrastructure")) + Expect(result[0].Labels).To(HaveKeyWithValue("reviewed", "true")) + }) + + It("should filter out alerts with Drop action", func() { + mockAlerts.SetActiveAlerts([]k8s.PrometheusAlert{ + {Labels: map[string]string{"alertname": "KeepAlert", "severity": "warning"}, State: "firing", ActiveAt: testTime}, + {Labels: map[string]string{"alertname": "DropAlert", "severity": "info"}, State: "firing", ActiveAt: testTime}, + }) + mockMapper.GetAlertRelabelConfigSpecFunc = func(rule *monitoringv1.Rule) []osmv1.RelabelConfig { + if rule.Alert == "DropAlert" { + return []osmv1.RelabelConfig{{Action: "Drop"}} + } + return nil + } + + result, err := client.GetAlerts(ctx, k8s.GetAlertsRequest{}) + + Expect(err).ToNot(HaveOccurred()) + Expect(result).To(HaveLen(1)) + Expect(result[0].Labels["alertname"]).To(Equal("KeepAlert")) + }) + + It("should propagate errors and handle edge cases", func() { + By("propagating errors from PrometheusAlerts interface") + mockAlerts.GetAlertsFunc = func(context.Context, k8s.GetAlertsRequest) ([]k8s.PrometheusAlert, error) { + return nil, errors.New("prometheus error") + } + _, err := client.GetAlerts(ctx, k8s.GetAlertsRequest{}) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("prometheus error")) + + By("handling nil labels with Replace action") + mockAlerts.GetAlertsFunc = nil + mockAlerts.SetActiveAlerts([]k8s.PrometheusAlert{ + {Labels: map[string]string{"alertname": "TestAlert", "severity": "warning"}, State: "firing", ActiveAt: testTime}, + }) + mockMapper.GetAlertRelabelConfigSpecFunc = func(*monitoringv1.Rule) []osmv1.RelabelConfig { + return []osmv1.RelabelConfig{{TargetLabel: "team", Replacement: "infra", Action: "Replace"}} + } + result, err := client.GetAlerts(ctx, k8s.GetAlertsRequest{}) + Expect(err).ToNot(HaveOccurred()) + Expect(result[0].Labels).To(HaveKeyWithValue("team", "infra")) + }) +}) diff --git a/pkg/management/get_rule_by_id.go b/pkg/management/get_rule_by_id.go new file mode 100644 index 000000000..524aeaeb9 --- /dev/null +++ b/pkg/management/get_rule_by_id.go @@ -0,0 +1,56 @@ +package management + +import ( + "context" + "fmt" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + + "github.com/openshift/monitoring-plugin/pkg/management/mapper" +) + +func (c *client) GetRuleById(ctx context.Context, alertRuleId string) (monitoringv1.Rule, error) { + prId, err := c.mapper.FindAlertRuleById(mapper.PrometheusAlertRuleId(alertRuleId)) + if err != nil { + return monitoringv1.Rule{}, err + } + + pr, found, err := c.k8sClient.PrometheusRules().Get(ctx, prId.Namespace, prId.Name) + if err != nil { + return monitoringv1.Rule{}, err + } + + if !found { + return monitoringv1.Rule{}, &NotFoundError{Resource: "PrometheusRule", Id: fmt.Sprintf("%s/%s", prId.Namespace, prId.Name)} + } + + var rule *monitoringv1.Rule + + for groupIdx := range pr.Spec.Groups { + for ruleIdx := range pr.Spec.Groups[groupIdx].Rules { + foundRule := &pr.Spec.Groups[groupIdx].Rules[ruleIdx] + if c.mapper.GetAlertingRuleId(foundRule) == mapper.PrometheusAlertRuleId(alertRuleId) { + rule = foundRule + break + } + } + } + + if rule != nil { + return c.updateRuleBasedOnRelabelConfig(rule) + } + + return monitoringv1.Rule{}, fmt.Errorf("alert rule with id %s not found in PrometheusRule %s/%s", alertRuleId, prId.Namespace, prId.Name) +} + +func (c *client) updateRuleBasedOnRelabelConfig(rule *monitoringv1.Rule) (monitoringv1.Rule, error) { + configs := c.mapper.GetAlertRelabelConfigSpec(rule) + + updatedLabels, err := applyRelabelConfigs(string(rule.Alert), rule.Labels, configs) + if err != nil { + return monitoringv1.Rule{}, err + } + + rule.Labels = updatedLabels + return *rule, nil +} diff --git a/pkg/management/get_rule_by_id_test.go b/pkg/management/get_rule_by_id_test.go new file mode 100644 index 000000000..27e61d94a --- /dev/null +++ b/pkg/management/get_rule_by_id_test.go @@ -0,0 +1,186 @@ +package management_test + +import ( + "context" + "errors" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/mapper" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" +) + +var ErrAlertRuleNotFound = errors.New("alert rule not found") + +var _ = Describe("GetRuleById", func() { + var ( + ctx context.Context + mockK8s *testutils.MockClient + mockPR *testutils.MockPrometheusRuleInterface + mockMapper *testutils.MockMapperClient + client management.Client + ) + + BeforeEach(func() { + ctx = context.Background() + + mockPR = &testutils.MockPrometheusRuleInterface{} + mockK8s = &testutils.MockClient{ + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { + return mockPR + }, + } + mockMapper = &testutils.MockMapperClient{} + + client = management.NewWithCustomMapper(ctx, mockK8s, mockMapper) + }) + + Context("when retrieving an alert rule by ID", func() { + It("should successfully return the rule when it exists", func() { + By("setting up a PrometheusRule with multiple rules") + rule1 := monitoringv1.Rule{ + Alert: "TestAlert1", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "critical", + }, + } + rule2 := monitoringv1.Rule{ + Alert: "TestAlert2", + Expr: intstr.FromString("cpu > 80"), + Annotations: map[string]string{ + "summary": "High CPU usage", + }, + } + + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rules", + Namespace: "monitoring", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "group1", + Rules: []monitoringv1.Rule{rule1}, + }, + { + Name: "group2", + Rules: []monitoringv1.Rule{rule2}, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "monitoring/test-rules": prometheusRule, + }) + + alertRuleId := "test-rule-id-2" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "monitoring", + Name: "test-rules", + }, nil + } + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + if alertRule.Alert == "TestAlert2" { + return mapper.PrometheusAlertRuleId(alertRuleId) + } + return mapper.PrometheusAlertRuleId("other-id") + } + + By("retrieving the rule by ID") + rule, err := client.GetRuleById(ctx, alertRuleId) + Expect(err).ToNot(HaveOccurred()) + Expect(rule).ToNot(BeNil()) + + By("verifying the returned rule is correct") + Expect(rule.Alert).To(Equal("TestAlert2")) + Expect(rule.Expr.String()).To(Equal("cpu > 80")) + Expect(rule.Annotations).To(HaveKeyWithValue("summary", "High CPU usage")) + }) + + It("should return an error when the mapper cannot find the rule", func() { + alertRuleId := "nonexistent-rule-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return nil, ErrAlertRuleNotFound + } + + By("attempting to retrieve a nonexistent rule") + _, err := client.GetRuleById(ctx, alertRuleId) + + By("verifying an error is returned") + Expect(err).To(HaveOccurred()) + Expect(err).To(Equal(ErrAlertRuleNotFound)) + }) + + It("should return an error when the PrometheusRule does not exist", func() { + alertRuleId := "test-rule-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "monitoring", + Name: "nonexistent-rule", + }, nil + } + + By("attempting to retrieve a rule from a nonexistent PrometheusRule") + _, err := client.GetRuleById(ctx, alertRuleId) + + By("verifying an error is returned") + Expect(err).To(HaveOccurred()) + }) + + It("should return an error when the rule ID is not found in the PrometheusRule", func() { + By("setting up a PrometheusRule without the target rule") + rule1 := monitoringv1.Rule{ + Alert: "DifferentAlert", + Expr: intstr.FromString("up == 0"), + } + + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rules", + Namespace: "monitoring", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "group1", + Rules: []monitoringv1.Rule{rule1}, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "monitoring/test-rules": prometheusRule, + }) + + alertRuleId := "nonexistent-rule-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "monitoring", + Name: "test-rules", + }, nil + } + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + return mapper.PrometheusAlertRuleId("different-id") + } + + By("attempting to retrieve the rule") + _, err := client.GetRuleById(ctx, alertRuleId) + + By("verifying an error is returned") + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("alert rule with id")) + Expect(err.Error()).To(ContainSubstring("not found")) + }) + }) +}) diff --git a/pkg/management/list_rules.go b/pkg/management/list_rules.go new file mode 100644 index 000000000..24d92a8c1 --- /dev/null +++ b/pkg/management/list_rules.go @@ -0,0 +1,133 @@ +package management + +import ( + "context" + "errors" + "fmt" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/apimachinery/pkg/types" + + "github.com/openshift/monitoring-plugin/pkg/management/mapper" +) + +const alertRuleIdLabel = "alert_rule_id" + +func (c *client) ListRules(ctx context.Context, prOptions PrometheusRuleOptions, arOptions AlertRuleOptions) ([]monitoringv1.Rule, error) { + if prOptions.Name != "" && prOptions.Namespace == "" { + return nil, errors.New("PrometheusRule Namespace must be specified when Name is provided") + } + + // Name and Namespace specified + if prOptions.Name != "" && prOptions.Namespace != "" { + pr, found, err := c.k8sClient.PrometheusRules().Get(ctx, prOptions.Namespace, prOptions.Name) + if err != nil { + return nil, fmt.Errorf("failed to get PrometheusRule %s/%s: %w", prOptions.Namespace, prOptions.Name, err) + } + if !found { + return nil, &NotFoundError{Resource: "PrometheusRule", Id: fmt.Sprintf("%s/%s", prOptions.Namespace, prOptions.Name)} + } + return c.extractAndFilterRules(*pr, &prOptions, &arOptions), nil + } + + // Name not specified + allPrometheusRules, err := c.k8sClient.PrometheusRules().List(ctx, prOptions.Namespace) + if err != nil { + return nil, fmt.Errorf("failed to list PrometheusRules: %w", err) + } + + var allRules []monitoringv1.Rule + for _, pr := range allPrometheusRules { + rules := c.extractAndFilterRules(pr, &prOptions, &arOptions) + allRules = append(allRules, rules...) + } + + return allRules, nil +} + +func (c *client) extractAndFilterRules(pr monitoringv1.PrometheusRule, prOptions *PrometheusRuleOptions, arOptions *AlertRuleOptions) []monitoringv1.Rule { + var rules []monitoringv1.Rule + + for _, group := range pr.Spec.Groups { + // Filter by group name if specified + if prOptions.GroupName != "" && group.Name != prOptions.GroupName { + continue + } + + for _, rule := range group.Rules { + // Skip recording rules (only process alert rules) + if rule.Alert == "" { + continue + } + + // Apply alert rule filters + if !c.matchesAlertRuleFilters(rule, pr, arOptions) { + continue + } + + // Parse and update the rule based on relabeling configurations + r := c.parseRule(rule) + if r != nil { + rules = append(rules, *r) + } + } + } + + return rules +} + +func (c *client) matchesAlertRuleFilters(rule monitoringv1.Rule, pr monitoringv1.PrometheusRule, arOptions *AlertRuleOptions) bool { + // Filter by alert name + if arOptions.Name != "" && string(rule.Alert) != arOptions.Name { + return false + } + + // Filter by source (platform or user-defined) + if arOptions.Source != "" { + prId := types.NamespacedName{Name: pr.Name, Namespace: pr.Namespace} + isPlatform := IsPlatformAlertRule(prId) + + if arOptions.Source == "platform" && !isPlatform { + return false + } + if arOptions.Source == "user-defined" && isPlatform { + return false + } + } + + // Filter by labels + if len(arOptions.Labels) > 0 { + for key, value := range arOptions.Labels { + ruleValue, exists := rule.Labels[key] + if !exists || ruleValue != value { + return false + } + } + } + + return true +} + +func (c *client) parseRule(rule monitoringv1.Rule) *monitoringv1.Rule { + alertRuleId := c.mapper.GetAlertingRuleId(&rule) + if alertRuleId == "" { + return nil + } + + _, err := c.mapper.FindAlertRuleById(mapper.PrometheusAlertRuleId(alertRuleId)) + if err != nil { + return nil + } + + rule, err = c.updateRuleBasedOnRelabelConfig(&rule) + if err != nil { + return nil + } + + if rule.Labels == nil { + rule.Labels = make(map[string]string) + } + rule.Labels[alertRuleIdLabel] = string(alertRuleId) + + return &rule +} diff --git a/pkg/management/list_rules_test.go b/pkg/management/list_rules_test.go new file mode 100644 index 000000000..3003801b2 --- /dev/null +++ b/pkg/management/list_rules_test.go @@ -0,0 +1,451 @@ +package management_test + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" +) + +var _ = Describe("ListRules", func() { + var ( + ctx context.Context + mockK8s *testutils.MockClient + mockPR *testutils.MockPrometheusRuleInterface + mockMapper *testutils.MockMapperClient + client management.Client + ) + + BeforeEach(func() { + ctx = context.Background() + + mockPR = &testutils.MockPrometheusRuleInterface{} + mockK8s = &testutils.MockClient{ + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { + return mockPR + }, + } + mockMapper = &testutils.MockMapperClient{} + + client = management.NewWithCustomMapper(ctx, mockK8s, mockMapper) + }) + + It("should list rules from a specific PrometheusRule", func() { + testRule := monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + } + + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rule", + Namespace: "test-namespace", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "test-group", + Rules: []monitoringv1.Rule{testRule}, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "test-namespace/test-rule": prometheusRule, + }) + + options := management.PrometheusRuleOptions{ + Name: "test-rule", + Namespace: "test-namespace", + GroupName: "test-group", + } + + rules, err := client.ListRules(ctx, options, management.AlertRuleOptions{}) + + Expect(err).ToNot(HaveOccurred()) + Expect(rules).To(HaveLen(1)) + Expect(rules[0].Alert).To(Equal("TestAlert")) + Expect(rules[0].Expr.String()).To(Equal("up == 0")) + }) + + It("should list rules from all namespaces", func() { + testRule1 := monitoringv1.Rule{ + Alert: "TestAlert1", + Expr: intstr.FromString("up == 0"), + } + + testRule2 := monitoringv1.Rule{ + Alert: "TestAlert2", + Expr: intstr.FromString("cpu_usage > 80"), + } + + prometheusRule1 := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rule1", + Namespace: "namespace1", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "group1", + Rules: []monitoringv1.Rule{testRule1}, + }, + }, + }, + } + + prometheusRule2 := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rule2", + Namespace: "namespace2", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "group2", + Rules: []monitoringv1.Rule{testRule2}, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "namespace1/rule1": prometheusRule1, + "namespace2/rule2": prometheusRule2, + }) + + options := management.PrometheusRuleOptions{} + + rules, err := client.ListRules(ctx, options, management.AlertRuleOptions{}) + + Expect(err).ToNot(HaveOccurred()) + Expect(rules).To(HaveLen(2)) + + alertNames := []string{rules[0].Alert, rules[1].Alert} + Expect(alertNames).To(ContainElement("TestAlert1")) + Expect(alertNames).To(ContainElement("TestAlert2")) + }) + + It("should list all rules from a specific namespace", func() { + // Setup test data in the same namespace but different PrometheusRules + testRule1 := monitoringv1.Rule{ + Alert: "NamespaceAlert1", + Expr: intstr.FromString("memory_usage > 90"), + } + + testRule2 := monitoringv1.Rule{ + Alert: "NamespaceAlert2", + Expr: intstr.FromString("disk_usage > 85"), + } + + testRule3 := monitoringv1.Rule{ + Alert: "OtherNamespaceAlert", + Expr: intstr.FromString("network_error_rate > 0.1"), + } + + // PrometheusRule in target namespace + prometheusRule1 := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rule1", + Namespace: "target-namespace", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "group1", + Rules: []monitoringv1.Rule{testRule1}, + }, + }, + }, + } + + // Another PrometheusRule in the same target namespace + prometheusRule2 := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rule2", + Namespace: "target-namespace", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "group2", + Rules: []monitoringv1.Rule{testRule2}, + }, + }, + }, + } + + // PrometheusRule in a different namespace (should not be included) + prometheusRule3 := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rule3", + Namespace: "other-namespace", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "group3", + Rules: []monitoringv1.Rule{testRule3}, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "target-namespace/rule1": prometheusRule1, + "target-namespace/rule2": prometheusRule2, + "other-namespace/rule3": prometheusRule3, + }) + + options := management.PrometheusRuleOptions{ + Namespace: "target-namespace", + } + + rules, err := client.ListRules(ctx, options, management.AlertRuleOptions{}) + + Expect(err).ToNot(HaveOccurred()) + Expect(rules).To(HaveLen(2)) + + alertNames := []string{rules[0].Alert, rules[1].Alert} + Expect(alertNames).To(ContainElement("NamespaceAlert1")) + Expect(alertNames).To(ContainElement("NamespaceAlert2")) + Expect(alertNames).ToNot(ContainElement("OtherNamespaceAlert")) + }) + + Context("AlertRuleOptions filtering", func() { + var prometheusRule *monitoringv1.PrometheusRule + + BeforeEach(func() { + prometheusRule = &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-alerts", + Namespace: "monitoring", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "critical-alerts", + Rules: []monitoringv1.Rule{ + { + Alert: "HighCPUUsage", + Expr: intstr.FromString("cpu_usage > 90"), + Labels: map[string]string{ + "severity": "critical", + "component": "node", + }, + }, + { + Alert: "HighCPUUsage", + Expr: intstr.FromString("cpu_usage > 80"), + Labels: map[string]string{ + "severity": "warning", + "component": "node", + }, + }, + { + Alert: "DiskSpaceLow", + Expr: intstr.FromString("disk_usage > 95"), + Labels: map[string]string{ + "severity": "critical", + "component": "storage", + }, + }, + }, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "monitoring/test-alerts": prometheusRule, + }) + }) + + It("should filter by alert name", func() { + prOptions := management.PrometheusRuleOptions{ + Name: "test-alerts", + Namespace: "monitoring", + } + arOptions := management.AlertRuleOptions{ + Name: "HighCPUUsage", + } + + rules, err := client.ListRules(ctx, prOptions, arOptions) + + Expect(err).ToNot(HaveOccurred()) + Expect(rules).To(HaveLen(2)) + Expect(rules[0].Alert).To(Equal("HighCPUUsage")) + Expect(rules[1].Alert).To(Equal("HighCPUUsage")) + }) + + It("should filter by label severity", func() { + prOptions := management.PrometheusRuleOptions{ + Name: "test-alerts", + Namespace: "monitoring", + } + arOptions := management.AlertRuleOptions{ + Labels: map[string]string{ + "severity": "critical", + }, + } + + rules, err := client.ListRules(ctx, prOptions, arOptions) + + Expect(err).ToNot(HaveOccurred()) + Expect(rules).To(HaveLen(2)) + + alertNames := []string{rules[0].Alert, rules[1].Alert} + Expect(alertNames).To(ContainElement("HighCPUUsage")) + Expect(alertNames).To(ContainElement("DiskSpaceLow")) + + for _, rule := range rules { + Expect(rule.Labels["severity"]).To(Equal("critical")) + } + }) + + It("should filter by multiple labels", func() { + prOptions := management.PrometheusRuleOptions{ + Name: "test-alerts", + Namespace: "monitoring", + } + arOptions := management.AlertRuleOptions{ + Labels: map[string]string{ + "severity": "critical", + "component": "storage", + }, + } + + rules, err := client.ListRules(ctx, prOptions, arOptions) + + Expect(err).ToNot(HaveOccurred()) + Expect(rules).To(HaveLen(1)) + Expect(rules[0].Alert).To(Equal("DiskSpaceLow")) + Expect(rules[0].Labels["severity"]).To(Equal("critical")) + Expect(rules[0].Labels["component"]).To(Equal("storage")) + }) + + It("should filter by source platform", func() { + platformRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "openshift-platform-alerts", + Namespace: "openshift-monitoring", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "platform-group", + Rules: []monitoringv1.Rule{ + { + Alert: "PlatformAlert", + Expr: intstr.FromString("platform_metric > 0"), + }, + }, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "monitoring/test-alerts": prometheusRule, + "openshift-monitoring/openshift-platform-alerts": platformRule, + }) + + prOptions := management.PrometheusRuleOptions{} + arOptions := management.AlertRuleOptions{ + Source: "platform", + } + + rules, err := client.ListRules(ctx, prOptions, arOptions) + + Expect(err).ToNot(HaveOccurred()) + Expect(rules).To(HaveLen(1)) + Expect(rules[0].Alert).To(Equal("PlatformAlert")) + }) + + It("should filter by source user-defined", func() { + platformRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "openshift-platform-alerts", + Namespace: "openshift-monitoring", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "platform-group", + Rules: []monitoringv1.Rule{ + { + Alert: "PlatformAlert", + Expr: intstr.FromString("platform_metric > 0"), + }, + }, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "monitoring/test-alerts": prometheusRule, + "openshift-monitoring/openshift-platform-alerts": platformRule, + }) + + prOptions := management.PrometheusRuleOptions{} + arOptions := management.AlertRuleOptions{ + Source: "user-defined", + } + + rules, err := client.ListRules(ctx, prOptions, arOptions) + + Expect(err).ToNot(HaveOccurred()) + Expect(rules).To(HaveLen(3)) + + alertNames := []string{rules[0].Alert, rules[1].Alert, rules[2].Alert} + Expect(alertNames).To(ContainElement("HighCPUUsage")) + Expect(alertNames).To(ContainElement("DiskSpaceLow")) + Expect(alertNames).ToNot(ContainElement("PlatformAlert")) + }) + + It("should combine multiple filters", func() { + prOptions := management.PrometheusRuleOptions{ + Name: "test-alerts", + Namespace: "monitoring", + } + arOptions := management.AlertRuleOptions{ + Name: "HighCPUUsage", + Labels: map[string]string{ + "severity": "critical", + }, + } + + rules, err := client.ListRules(ctx, prOptions, arOptions) + + Expect(err).ToNot(HaveOccurred()) + Expect(rules).To(HaveLen(1)) + Expect(rules[0].Alert).To(Equal("HighCPUUsage")) + Expect(rules[0].Labels["severity"]).To(Equal("critical")) + }) + + It("should return empty list when no rules match filters", func() { + prOptions := management.PrometheusRuleOptions{ + Name: "test-alerts", + Namespace: "monitoring", + } + arOptions := management.AlertRuleOptions{ + Name: "NonExistentAlert", + } + + rules, err := client.ListRules(ctx, prOptions, arOptions) + + Expect(err).ToNot(HaveOccurred()) + Expect(rules).To(BeEmpty()) + }) + }) +}) diff --git a/pkg/management/management.go b/pkg/management/management.go new file mode 100644 index 000000000..7135755b6 --- /dev/null +++ b/pkg/management/management.go @@ -0,0 +1,19 @@ +package management + +import ( + "strings" + + "k8s.io/apimachinery/pkg/types" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management/mapper" +) + +type client struct { + k8sClient k8s.Client + mapper mapper.Client +} + +func IsPlatformAlertRule(prId types.NamespacedName) bool { + return strings.HasPrefix(prId.Namespace, "openshift-") +} diff --git a/pkg/management/management_suite_test.go b/pkg/management/management_suite_test.go new file mode 100644 index 000000000..6cf1a3084 --- /dev/null +++ b/pkg/management/management_suite_test.go @@ -0,0 +1,13 @@ +package management_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestManagement(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Management Suite") +} diff --git a/pkg/management/mapper/mapper.go b/pkg/management/mapper/mapper.go new file mode 100644 index 000000000..4941270b9 --- /dev/null +++ b/pkg/management/mapper/mapper.go @@ -0,0 +1,286 @@ +package mapper + +import ( + "context" + "crypto/sha256" + "fmt" + "log" + "regexp" + "slices" + "sort" + "strings" + "sync" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/apimachinery/pkg/types" + + "github.com/openshift/monitoring-plugin/pkg/k8s" +) + +type mapper struct { + k8sClient k8s.Client + mu sync.RWMutex + + prometheusRules map[PrometheusRuleId][]PrometheusAlertRuleId + alertRelabelConfigs map[AlertRelabelConfigId][]osmv1.RelabelConfig +} + +var _ Client = (*mapper)(nil) + +func (m *mapper) GetAlertingRuleId(alertRule *monitoringv1.Rule) PrometheusAlertRuleId { + var kind, name string + if alertRule.Alert != "" { + kind = "alert" + name = alertRule.Alert + } else if alertRule.Record != "" { + kind = "record" + name = alertRule.Record + } else { + return "" + } + + expr := alertRule.Expr.String() + forDuration := "" + if alertRule.For != nil { + forDuration = string(*alertRule.For) + } + + var sortedLabels []string + if alertRule.Labels != nil { + for key, value := range alertRule.Labels { + sortedLabels = append(sortedLabels, fmt.Sprintf("%s=%s", key, value)) + } + sort.Strings(sortedLabels) + } + + var sortedAnnotations []string + if alertRule.Annotations != nil { + for key, value := range alertRule.Annotations { + sortedAnnotations = append(sortedAnnotations, fmt.Sprintf("%s=%s", key, value)) + } + sort.Strings(sortedAnnotations) + } + + // Build the hash input string + hashInput := strings.Join([]string{ + kind, + name, + expr, + forDuration, + strings.Join(sortedLabels, ","), + strings.Join(sortedAnnotations, ","), + }, "\n") + + // Generate SHA256 hash + hash := sha256.Sum256([]byte(hashInput)) + + return PrometheusAlertRuleId(fmt.Sprintf("%s/%x", name, hash)) +} + +func (m *mapper) FindAlertRuleById(alertRuleId PrometheusAlertRuleId) (*PrometheusRuleId, error) { + m.mu.RLock() + defer m.mu.RUnlock() + + for id, rules := range m.prometheusRules { + if slices.Contains(rules, alertRuleId) { + return &id, nil + } + } + + // If the PrometheusRuleId is not found, return an error + return nil, fmt.Errorf("alert rule with id %s not found", alertRuleId) +} + +func (m *mapper) WatchPrometheusRules(ctx context.Context) { + go func() { + callbacks := k8s.PrometheusRuleInformerCallback{ + OnAdd: func(pr *monitoringv1.PrometheusRule) { + m.AddPrometheusRule(pr) + }, + OnUpdate: func(pr *monitoringv1.PrometheusRule) { + m.AddPrometheusRule(pr) + }, + OnDelete: func(pr *monitoringv1.PrometheusRule) { + m.DeletePrometheusRule(pr) + }, + } + + err := m.k8sClient.PrometheusRuleInformer().Run(ctx, callbacks) + if err != nil { + log.Fatalf("Failed to run PrometheusRule informer: %v", err) + } + }() +} + +func (m *mapper) AddPrometheusRule(pr *monitoringv1.PrometheusRule) { + m.mu.Lock() + defer m.mu.Unlock() + + promRuleId := PrometheusRuleId(types.NamespacedName{Namespace: pr.Namespace, Name: pr.Name}) + delete(m.prometheusRules, promRuleId) + + rules := make([]PrometheusAlertRuleId, 0) + for _, group := range pr.Spec.Groups { + for _, rule := range group.Rules { + if rule.Alert != "" { + ruleId := m.GetAlertingRuleId(&rule) + if ruleId != "" { + rules = append(rules, ruleId) + } + } + } + } + + m.prometheusRules[promRuleId] = rules +} + +func (m *mapper) DeletePrometheusRule(pr *monitoringv1.PrometheusRule) { + m.mu.Lock() + defer m.mu.Unlock() + + delete(m.prometheusRules, PrometheusRuleId(types.NamespacedName{Namespace: pr.Namespace, Name: pr.Name})) +} + +func (m *mapper) WatchAlertRelabelConfigs(ctx context.Context) { + go func() { + callbacks := k8s.AlertRelabelConfigInformerCallback{ + OnAdd: func(arc *osmv1.AlertRelabelConfig) { + m.AddAlertRelabelConfig(arc) + }, + OnUpdate: func(arc *osmv1.AlertRelabelConfig) { + m.AddAlertRelabelConfig(arc) + }, + OnDelete: func(arc *osmv1.AlertRelabelConfig) { + m.DeleteAlertRelabelConfig(arc) + }, + } + + err := m.k8sClient.AlertRelabelConfigInformer().Run(ctx, callbacks) + if err != nil { + log.Fatalf("Failed to run AlertRelabelConfig informer: %v", err) + } + }() +} + +func (m *mapper) AddAlertRelabelConfig(arc *osmv1.AlertRelabelConfig) { + m.mu.Lock() + defer m.mu.Unlock() + + arcId := AlertRelabelConfigId(types.NamespacedName{Namespace: arc.Namespace, Name: arc.Name}) + + // Clean up old entries + delete(m.alertRelabelConfigs, arcId) + + configs := make([]osmv1.RelabelConfig, 0) + + for _, config := range arc.Spec.Configs { + if slices.Contains(config.SourceLabels, "alertname") { + alertname := parseAlertnameFromRelabelConfig(config) + if alertname != "" { + configs = append(configs, config) + } + } + } + + if len(configs) > 0 { + m.alertRelabelConfigs[arcId] = configs + } +} + +func parseAlertnameFromRelabelConfig(config osmv1.RelabelConfig) string { + separator := config.Separator + if separator == "" { + separator = ";" + } + + regex := config.Regex + if regex == "" { + return "" + } + + values := strings.Split(regex, separator) + if len(values) != len(config.SourceLabels) { + return "" + } + + // Find the alertname value from source labels + for i, labelName := range config.SourceLabels { + if string(labelName) == "alertname" { + return values[i] + } + } + + return "" +} + +func (m *mapper) DeleteAlertRelabelConfig(arc *osmv1.AlertRelabelConfig) { + m.mu.Lock() + defer m.mu.Unlock() + + arcId := AlertRelabelConfigId(types.NamespacedName{Namespace: arc.Namespace, Name: arc.Name}) + delete(m.alertRelabelConfigs, arcId) +} + +func (m *mapper) GetAlertRelabelConfigSpec(alertRule *monitoringv1.Rule) []osmv1.RelabelConfig { + m.mu.RLock() + defer m.mu.RUnlock() + + if alertRule == nil { + return nil + } + + var matchingConfigs []osmv1.RelabelConfig + + // Iterate through all AlertRelabelConfigs + for _, configs := range m.alertRelabelConfigs { + for _, config := range configs { + if m.configMatchesAlert(config, alertRule) { + matchingConfigs = append(matchingConfigs, config) + } + } + } + + return matchingConfigs +} + +// configMatchesAlert checks if a RelabelConfig matches the given alert rule's labels +func (m *mapper) configMatchesAlert(config osmv1.RelabelConfig, alertRule *monitoringv1.Rule) bool { + separator := config.Separator + if separator == "" { + separator = ";" + } + + var labelValues []string + for _, labelName := range config.SourceLabels { + labelValue := "" + + if string(labelName) == "alertname" { + if alertRule.Alert != "" { + labelValue = alertRule.Alert + } + } else { + if alertRule.Labels != nil { + if val, exists := alertRule.Labels[string(labelName)]; exists { + labelValue = val + } + } + } + + labelValues = append(labelValues, labelValue) + } + + ruleLabels := strings.Join(labelValues, separator) + + regex := config.Regex + if regex == "" { + regex = "(.*)" + } + + matched, err := regexp.MatchString(regex, ruleLabels) + if err != nil { + return false + } + + return matched +} diff --git a/pkg/management/mapper/mapper_suite_test.go b/pkg/management/mapper/mapper_suite_test.go new file mode 100644 index 000000000..ad8ae2bb4 --- /dev/null +++ b/pkg/management/mapper/mapper_suite_test.go @@ -0,0 +1,13 @@ +package mapper_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestMapper(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Mapper Suite") +} diff --git a/pkg/management/mapper/mapper_test.go b/pkg/management/mapper/mapper_test.go new file mode 100644 index 000000000..fff7158ca --- /dev/null +++ b/pkg/management/mapper/mapper_test.go @@ -0,0 +1,855 @@ +package mapper_test + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + + "github.com/openshift/monitoring-plugin/pkg/management/mapper" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" +) + +var _ = Describe("Mapper", func() { + var ( + mockK8sClient *testutils.MockClient + mapperClient mapper.Client + ) + + BeforeEach(func() { + mockK8sClient = &testutils.MockClient{} + mapperClient = mapper.New(mockK8sClient) + }) + + createPrometheusRule := func(namespace, name string, alertRules []monitoringv1.Rule) *monitoringv1.PrometheusRule { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "test-group", + Rules: alertRules, + }, + }, + }, + } + } + + Describe("GetAlertingRuleId", func() { + Context("when generating IDs for alert rules", func() { + It("should generate a non-empty ID for a simple alert rule", func() { + By("creating a simple alert rule") + alertRule := monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + } + + By("generating the rule ID") + ruleId := mapperClient.GetAlertingRuleId(&alertRule) + + By("verifying the result") + Expect(ruleId).NotTo(BeEmpty()) + Expect(string(ruleId)).To(HaveLen(len(alertRule.Alert) + 1 + 64)) // alertname + separator + SHA256 hash should be 64 characters + }) + + It("should generate different IDs for different alert rules", func() { + By("creating two different alert rules") + alertRule1 := monitoringv1.Rule{ + Alert: "TestAlert1", + Expr: intstr.FromString("up == 0"), + } + alertRule2 := monitoringv1.Rule{ + Alert: "TestAlert2", + Expr: intstr.FromString("cpu > 80"), + } + + By("generating rule IDs") + ruleId1 := mapperClient.GetAlertingRuleId(&alertRule1) + ruleId2 := mapperClient.GetAlertingRuleId(&alertRule2) + + By("verifying the results") + Expect(ruleId1).NotTo(BeEmpty()) + Expect(ruleId2).NotTo(BeEmpty()) + Expect(ruleId1).NotTo(Equal(ruleId2)) + }) + + It("should generate the same ID for identical alert rules", func() { + By("creating two identical alert rules") + alertRule1 := monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + } + alertRule2 := monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + } + + By("generating rule IDs") + ruleId1 := mapperClient.GetAlertingRuleId(&alertRule1) + ruleId2 := mapperClient.GetAlertingRuleId(&alertRule2) + + By("verifying the results") + Expect(ruleId1).NotTo(BeEmpty()) + Expect(ruleId2).NotTo(BeEmpty()) + Expect(ruleId1).To(Equal(ruleId2)) + }) + + It("should return empty string for rules without alert or record name", func() { + By("creating a rule without alert or record name") + alertRule := monitoringv1.Rule{ + Expr: intstr.FromString("up == 0"), + } + + By("generating the rule ID") + ruleId := mapperClient.GetAlertingRuleId(&alertRule) + + By("verifying the result") + Expect(ruleId).To(BeEmpty()) + }) + }) + }) + + Describe("FindAlertRuleById", func() { + Context("when the alert rule exists", func() { + It("should return the correct PrometheusRuleId", func() { + By("creating test alert rule") + alertRule := monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + } + + By("creating PrometheusRule") + pr := createPrometheusRule("test-namespace", "test-rule", []monitoringv1.Rule{alertRule}) + + By("adding the PrometheusRule to the mapper") + mapperClient.AddPrometheusRule(pr) + + By("getting the generated rule ID") + ruleId := mapperClient.GetAlertingRuleId(&alertRule) + Expect(ruleId).NotTo(BeEmpty()) + + By("testing FindAlertRuleById") + foundPrometheusRuleId, err := mapperClient.FindAlertRuleById(ruleId) + + By("verifying results") + Expect(err).NotTo(HaveOccurred()) + expectedPrometheusRuleId := mapper.PrometheusRuleId(types.NamespacedName{ + Namespace: "test-namespace", + Name: "test-rule", + }) + Expect(*foundPrometheusRuleId).To(Equal(expectedPrometheusRuleId)) + }) + + It("should return the correct PrometheusRuleId when alert rule is one of multiple in the same PrometheusRule", func() { + By("creating multiple test alert rules") + alertRule1 := monitoringv1.Rule{ + Alert: "TestAlert1", + Expr: intstr.FromString("up == 0"), + } + alertRule2 := monitoringv1.Rule{ + Alert: "TestAlert2", + Expr: intstr.FromString("cpu > 80"), + } + + By("creating PrometheusRule with multiple rules") + pr := createPrometheusRule("multi-namespace", "multi-rule", []monitoringv1.Rule{alertRule1, alertRule2}) + + By("adding the PrometheusRule to the mapper") + mapperClient.AddPrometheusRule(pr) + + By("getting the generated rule IDs") + ruleId1 := mapperClient.GetAlertingRuleId(&alertRule1) + ruleId2 := mapperClient.GetAlertingRuleId(&alertRule2) + Expect(ruleId1).NotTo(BeEmpty()) + Expect(ruleId2).NotTo(BeEmpty()) + Expect(ruleId1).NotTo(Equal(ruleId2)) + + By("testing FindAlertRuleById for both rules") + expectedPrometheusRuleId := mapper.PrometheusRuleId(types.NamespacedName{ + Namespace: "multi-namespace", + Name: "multi-rule", + }) + + foundPrometheusRuleId1, err1 := mapperClient.FindAlertRuleById(ruleId1) + Expect(err1).NotTo(HaveOccurred()) + Expect(*foundPrometheusRuleId1).To(Equal(expectedPrometheusRuleId)) + + foundPrometheusRuleId2, err2 := mapperClient.FindAlertRuleById(ruleId2) + Expect(err2).NotTo(HaveOccurred()) + Expect(*foundPrometheusRuleId2).To(Equal(expectedPrometheusRuleId)) + }) + }) + + Context("when the alert rule does not exist", func() { + It("should return an error when no rules are mapped", func() { + By("setting up test data") + nonExistentRuleId := mapper.PrometheusAlertRuleId("non-existent-rule-id") + + By("testing the method") + _, err := mapperClient.FindAlertRuleById(nonExistentRuleId) + + By("verifying results") + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("alert rule with id non-existent-rule-id not found")) + }) + + It("should return an error when rules are mapped but the target rule is not found", func() { + By("creating and adding a valid alert rule") + alertRule := monitoringv1.Rule{ + Alert: "ValidAlert", + Expr: intstr.FromString("up == 0"), + } + pr := createPrometheusRule("test-namespace", "test-rule", []monitoringv1.Rule{alertRule}) + mapperClient.AddPrometheusRule(pr) + + By("trying to find a non-existent rule ID") + nonExistentRuleId := mapper.PrometheusAlertRuleId("definitely-non-existent-rule-id") + + By("testing the method") + _, err := mapperClient.FindAlertRuleById(nonExistentRuleId) + + By("verifying results") + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("alert rule with id definitely-non-existent-rule-id not found")) + }) + }) + }) + + Describe("AddPrometheusRule", func() { + Context("when adding PrometheusRules", func() { + It("should successfully add a PrometheusRule with alert rules", func() { + By("creating a PrometheusRule with alert rules") + alertRule1 := monitoringv1.Rule{ + Alert: "TestAlert1", + Expr: intstr.FromString("up == 0"), + } + alertRule2 := monitoringv1.Rule{ + Alert: "TestAlert2", + Expr: intstr.FromString("cpu > 80"), + } + + pr := createPrometheusRule("test-namespace", "test-rule", []monitoringv1.Rule{alertRule1, alertRule2}) + + By("adding the PrometheusRule") + mapperClient.AddPrometheusRule(pr) + + By("verifying the rules can be found") + ruleId1 := mapperClient.GetAlertingRuleId(&alertRule1) + foundPr1, err1 := mapperClient.FindAlertRuleById(ruleId1) + Expect(err1).ToNot(HaveOccurred()) + Expect(foundPr1.Namespace).To(Equal("test-namespace")) + Expect(foundPr1.Name).To(Equal("test-rule")) + + ruleId2 := mapperClient.GetAlertingRuleId(&alertRule2) + foundPr2, err2 := mapperClient.FindAlertRuleById(ruleId2) + Expect(err2).ToNot(HaveOccurred()) + Expect(foundPr2.Namespace).To(Equal("test-namespace")) + Expect(foundPr2.Name).To(Equal("test-rule")) + }) + + It("should update existing PrometheusRule when added again", func() { + By("creating and adding initial PrometheusRule") + alertRule1 := monitoringv1.Rule{ + Alert: "TestAlert1", + Expr: intstr.FromString("up == 0"), + } + pr1 := createPrometheusRule("test-namespace", "test-rule", []monitoringv1.Rule{alertRule1}) + mapperClient.AddPrometheusRule(pr1) + + By("creating updated PrometheusRule with different alerts") + alertRule2 := monitoringv1.Rule{ + Alert: "TestAlert2", + Expr: intstr.FromString("cpu > 80"), + } + pr2 := createPrometheusRule("test-namespace", "test-rule", []monitoringv1.Rule{alertRule2}) + mapperClient.AddPrometheusRule(pr2) + + By("verifying old rule is no longer found") + ruleId1 := mapperClient.GetAlertingRuleId(&alertRule1) + _, err1 := mapperClient.FindAlertRuleById(ruleId1) + Expect(err1).To(HaveOccurred()) + + By("verifying new rule is found") + ruleId2 := mapperClient.GetAlertingRuleId(&alertRule2) + foundPr, err2 := mapperClient.FindAlertRuleById(ruleId2) + Expect(err2).ToNot(HaveOccurred()) + Expect(foundPr.Namespace).To(Equal("test-namespace")) + }) + + It("should ignore recording rules (not alert rules)", func() { + By("creating a PrometheusRule with recording rule") + recordingRule := monitoringv1.Rule{ + Record: "test:recording:rule", + Expr: intstr.FromString("sum(up)"), + } + + pr := createPrometheusRule("test-namespace", "test-rule", []monitoringv1.Rule{recordingRule}) + + By("adding the PrometheusRule") + mapperClient.AddPrometheusRule(pr) + + By("verifying the recording rule is not found") + ruleId := mapperClient.GetAlertingRuleId(&recordingRule) + _, err := mapperClient.FindAlertRuleById(ruleId) + Expect(err).To(HaveOccurred()) + }) + }) + }) + + Describe("DeletePrometheusRule", func() { + Context("when deleting PrometheusRules", func() { + It("should successfully delete a PrometheusRule", func() { + By("creating and adding a PrometheusRule") + alertRule := monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + } + pr := createPrometheusRule("test-namespace", "test-rule", []monitoringv1.Rule{alertRule}) + mapperClient.AddPrometheusRule(pr) + + By("verifying the rule exists") + ruleId := mapperClient.GetAlertingRuleId(&alertRule) + _, err := mapperClient.FindAlertRuleById(ruleId) + Expect(err).ToNot(HaveOccurred()) + + By("deleting the PrometheusRule") + mapperClient.DeletePrometheusRule(pr) + + By("verifying the rule is no longer found") + _, err = mapperClient.FindAlertRuleById(ruleId) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("not found")) + }) + + It("should handle deleting non-existent PrometheusRule gracefully", func() { + By("creating a PrometheusRule that was never added") + alertRule := monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + } + pr := createPrometheusRule("test-namespace", "test-rule", []monitoringv1.Rule{alertRule}) + + By("deleting the non-existent PrometheusRule") + Expect(func() { + mapperClient.DeletePrometheusRule(pr) + }).NotTo(Panic()) + + By("verifying mapper still works after delete attempt") + // Add a different rule to verify the mapper is still functional + alertRule2 := monitoringv1.Rule{ + Alert: "AnotherAlert", + Expr: intstr.FromString("cpu > 80"), + } + pr2 := createPrometheusRule("test-namespace", "another-rule", []monitoringv1.Rule{alertRule2}) + mapperClient.AddPrometheusRule(pr2) + + ruleId := mapperClient.GetAlertingRuleId(&alertRule2) + foundPr, err := mapperClient.FindAlertRuleById(ruleId) + Expect(err).ToNot(HaveOccurred()) + Expect(foundPr.Name).To(Equal("another-rule")) + }) + }) + }) + + Describe("AddAlertRelabelConfig", func() { + Context("when adding AlertRelabelConfigs", func() { + It("should successfully add an AlertRelabelConfig", func() { + By("creating an AlertRelabelConfig") + arc := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-arc", + Namespace: "test-namespace", + }, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + { + SourceLabels: []osmv1.LabelName{"alertname", "severity"}, + Separator: ";", + Regex: "TestAlert;critical", + TargetLabel: "severity", + Replacement: "warning", + Action: "Replace", + }, + }, + }, + } + + By("adding the AlertRelabelConfig") + mapperClient.AddAlertRelabelConfig(arc) + + By("verifying it can be retrieved") + alertRule := &monitoringv1.Rule{ + Alert: "TestAlert", + Labels: map[string]string{ + "severity": "critical", + }, + } + configs := mapperClient.GetAlertRelabelConfigSpec(alertRule) + Expect(configs).To(HaveLen(1)) + Expect(configs[0].SourceLabels).To(ContainElement(osmv1.LabelName("alertname"))) + Expect(configs[0].Regex).To(Equal("TestAlert;critical")) + }) + + It("should ignore configs without alertname in SourceLabels", func() { + By("creating an AlertRelabelConfig without alertname") + arc := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-arc", + Namespace: "test-namespace", + }, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + { + SourceLabels: []osmv1.LabelName{"severity", "namespace"}, + Separator: ";", + Regex: "critical;default", + TargetLabel: "priority", + Replacement: "high", + Action: "Replace", + }, + }, + }, + } + + By("adding the AlertRelabelConfig") + mapperClient.AddAlertRelabelConfig(arc) + + By("verifying it returns empty for an alert") + alertRule := &monitoringv1.Rule{ + Alert: "TestAlert", + Labels: map[string]string{ + "severity": "critical", + "namespace": "default", + }, + } + specs := mapperClient.GetAlertRelabelConfigSpec(alertRule) + Expect(specs).To(BeEmpty()) + }) + + It("should update existing AlertRelabelConfig when added again", func() { + By("creating and adding initial AlertRelabelConfig") + arc1 := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-arc", + Namespace: "test-namespace", + }, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + { + SourceLabels: []osmv1.LabelName{"alertname"}, + Separator: ";", + Regex: "Alert1", + TargetLabel: "severity", + Replacement: "warning", + Action: "Replace", + }, + }, + }, + } + mapperClient.AddAlertRelabelConfig(arc1) + + By("creating updated AlertRelabelConfig") + arc2 := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-arc", + Namespace: "test-namespace", + }, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + { + SourceLabels: []osmv1.LabelName{"alertname"}, + Separator: ";", + Regex: "Alert2", + TargetLabel: "severity", + Replacement: "critical", + Action: "Replace", + }, + }, + }, + } + mapperClient.AddAlertRelabelConfig(arc2) + + By("verifying the updated config is retrieved") + alertRule := &monitoringv1.Rule{ + Alert: "Alert2", + } + configs := mapperClient.GetAlertRelabelConfigSpec(alertRule) + Expect(configs).To(HaveLen(1)) + Expect(configs[0].Regex).To(Equal("Alert2")) + }) + + It("should handle multiple relabel configs in single AlertRelabelConfig", func() { + By("creating AlertRelabelConfig with multiple configs") + arc := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-arc", + Namespace: "test-namespace", + }, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + { + SourceLabels: []osmv1.LabelName{"alertname"}, + Separator: ";", + Regex: "Alert1", + TargetLabel: "severity", + Replacement: "warning", + Action: "Replace", + }, + { + SourceLabels: []osmv1.LabelName{"alertname"}, + Separator: ";", + Regex: "Alert2", + TargetLabel: "priority", + Replacement: "high", + Action: "Replace", + }, + }, + }, + } + + By("adding the AlertRelabelConfig") + mapperClient.AddAlertRelabelConfig(arc) + + By("verifying Alert1 gets its matching config") + alertRule1 := &monitoringv1.Rule{ + Alert: "Alert1", + } + specs1 := mapperClient.GetAlertRelabelConfigSpec(alertRule1) + Expect(specs1).To(HaveLen(1)) + Expect(specs1[0].TargetLabel).To(Equal("severity")) + + By("verifying Alert2 gets its matching config") + alertRule2 := &monitoringv1.Rule{ + Alert: "Alert2", + } + specs2 := mapperClient.GetAlertRelabelConfigSpec(alertRule2) + Expect(specs2).To(HaveLen(1)) + Expect(specs2[0].TargetLabel).To(Equal("priority")) + }) + + It("should handle configs with empty regex", func() { + By("creating AlertRelabelConfig with empty regex") + arc := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-arc", + Namespace: "test-namespace", + }, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + { + SourceLabels: []osmv1.LabelName{"alertname"}, + Separator: ";", + Regex: "", + TargetLabel: "severity", + Replacement: "warning", + Action: "Replace", + }, + }, + }, + } + + By("adding the AlertRelabelConfig") + mapperClient.AddAlertRelabelConfig(arc) + + By("verifying it's ignored (empty regex)") + alertRule := &monitoringv1.Rule{ + Alert: "TestAlert", + } + specs := mapperClient.GetAlertRelabelConfigSpec(alertRule) + Expect(specs).To(BeEmpty()) + }) + + It("should handle configs where regex values don't match source labels count", func() { + By("creating AlertRelabelConfig with mismatched regex/labels") + arc := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-arc", + Namespace: "test-namespace", + }, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + { + SourceLabels: []osmv1.LabelName{"alertname", "severity"}, + Separator: ";", + Regex: "OnlyOneValue", + TargetLabel: "severity", + Replacement: "warning", + Action: "Replace", + }, + }, + }, + } + + By("adding the AlertRelabelConfig") + mapperClient.AddAlertRelabelConfig(arc) + + By("verifying it's ignored (mismatch)") + alertRule := &monitoringv1.Rule{ + Alert: "OnlyOneValue", + Labels: map[string]string{ + "severity": "critical", + }, + } + specs := mapperClient.GetAlertRelabelConfigSpec(alertRule) + Expect(specs).To(BeEmpty()) + }) + }) + }) + + Describe("DeleteAlertRelabelConfig", func() { + Context("when deleting AlertRelabelConfigs", func() { + It("should successfully delete an AlertRelabelConfig", func() { + By("creating and adding an AlertRelabelConfig") + arc := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-arc", + Namespace: "test-namespace", + }, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + { + SourceLabels: []osmv1.LabelName{"alertname"}, + Separator: ";", + Regex: "TestAlert", + TargetLabel: "severity", + Replacement: "warning", + Action: "Replace", + }, + }, + }, + } + mapperClient.AddAlertRelabelConfig(arc) + + By("verifying it exists") + alertRule := &monitoringv1.Rule{ + Alert: "TestAlert", + } + specs := mapperClient.GetAlertRelabelConfigSpec(alertRule) + Expect(specs).To(HaveLen(1)) + + By("deleting the AlertRelabelConfig") + mapperClient.DeleteAlertRelabelConfig(arc) + + By("verifying it's no longer found") + specs = mapperClient.GetAlertRelabelConfigSpec(alertRule) + Expect(specs).To(BeEmpty()) + }) + + It("should handle deleting non-existent AlertRelabelConfig gracefully", func() { + By("creating an AlertRelabelConfig that was never added") + arc := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-arc", + Namespace: "test-namespace", + }, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{}, + }, + } + + By("deleting the non-existent AlertRelabelConfig") + Expect(func() { + mapperClient.DeleteAlertRelabelConfig(arc) + }).NotTo(Panic()) + + By("verifying mapper still works after delete attempt") + // Add a different AlertRelabelConfig to verify the mapper is still functional + arc2 := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "another-arc", + Namespace: "test-namespace", + }, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + { + SourceLabels: []osmv1.LabelName{"alertname"}, + Separator: ";", + Regex: "TestAlert", + TargetLabel: "severity", + Replacement: "critical", + Action: "Replace", + }, + }, + }, + } + mapperClient.AddAlertRelabelConfig(arc2) + + alertRule := &monitoringv1.Rule{ + Alert: "TestAlert", + } + configs := mapperClient.GetAlertRelabelConfigSpec(alertRule) + Expect(configs).To(HaveLen(1)) + Expect(configs[0].Regex).To(Equal("TestAlert")) + }) + }) + }) + + Describe("GetAlertRelabelConfigSpec", func() { + Context("when retrieving AlertRelabelConfig specs", func() { + It("should return specs for existing AlertRelabelConfig", func() { + By("creating and adding an AlertRelabelConfig") + arc := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-arc", + Namespace: "test-namespace", + }, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + { + SourceLabels: []osmv1.LabelName{"alertname", "severity"}, + Separator: ";", + Regex: "TestAlert;critical", + TargetLabel: "priority", + Replacement: "high", + Action: "Replace", + }, + }, + }, + } + mapperClient.AddAlertRelabelConfig(arc) + + By("retrieving the configs") + alertRule := &monitoringv1.Rule{ + Alert: "TestAlert", + Labels: map[string]string{ + "severity": "critical", + }, + } + configs := mapperClient.GetAlertRelabelConfigSpec(alertRule) + + By("verifying the configs") + Expect(configs).To(HaveLen(1)) + Expect(configs[0].TargetLabel).To(Equal("priority")) + Expect(configs[0].Replacement).To(Equal("high")) + Expect(configs[0].SourceLabels).To(ContainElements(osmv1.LabelName("alertname"), osmv1.LabelName("severity"))) + Expect(configs[0].Regex).To(Equal("TestAlert;critical")) + }) + + It("should return empty for alert that doesn't match any config", func() { + By("trying to get specs for an alert that doesn't match") + alertRule := &monitoringv1.Rule{ + Alert: "NonMatchingAlert", + Labels: map[string]string{ + "severity": "info", + }, + } + specs := mapperClient.GetAlertRelabelConfigSpec(alertRule) + + By("verifying empty is returned") + Expect(specs).To(BeEmpty()) + }) + + It("should return copies of specs (not original pointers)", func() { + By("creating and adding an AlertRelabelConfig") + arc := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-arc", + Namespace: "test-namespace", + }, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + { + SourceLabels: []osmv1.LabelName{"alertname"}, + Separator: ";", + Regex: "TestAlert", + TargetLabel: "severity", + Replacement: "warning", + Action: "Replace", + }, + }, + }, + } + mapperClient.AddAlertRelabelConfig(arc) + + By("retrieving configs twice") + alertRule := &monitoringv1.Rule{ + Alert: "TestAlert", + } + configs1 := mapperClient.GetAlertRelabelConfigSpec(alertRule) + configs2 := mapperClient.GetAlertRelabelConfigSpec(alertRule) + + By("verifying they are independent copies") + Expect(configs1).To(HaveLen(1)) + Expect(configs2).To(HaveLen(1)) + // Modify one and verify the other is unchanged + configs1[0].Replacement = "modified" + Expect(configs2[0].Replacement).To(Equal("warning")) + }) + }) + }) + + Describe("GetAlertRelabelConfigSpec with matching alerts", func() { + Context("when alert rule matches AlertRelabelConfig", func() { + It("should return matching configs from all AlertRelabelConfigs", func() { + By("creating and adding a PrometheusRule") + alertRule := monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "critical", + }, + } + pr := createPrometheusRule("test-namespace", "test-rule", []monitoringv1.Rule{alertRule}) + mapperClient.AddPrometheusRule(pr) + + By("creating and adding first AlertRelabelConfig") + arc1 := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-arc-1", + Namespace: "test-namespace", + }, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + { + SourceLabels: []osmv1.LabelName{"alertname"}, + Separator: ";", + Regex: "TestAlert", + TargetLabel: "priority", + Replacement: "high", + Action: "Replace", + }, + }, + }, + } + mapperClient.AddAlertRelabelConfig(arc1) + + By("creating and adding second AlertRelabelConfig") + arc2 := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-arc-2", + Namespace: "test-namespace", + }, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + { + SourceLabels: []osmv1.LabelName{"alertname", "severity"}, + Separator: ";", + Regex: "TestAlert;critical", + TargetLabel: "team", + Replacement: "platform", + Action: "Replace", + }, + }, + }, + } + mapperClient.AddAlertRelabelConfig(arc2) + + By("getting matching configs for the alert") + configs := mapperClient.GetAlertRelabelConfigSpec(&alertRule) + + By("verifying both configs are returned") + Expect(configs).To(HaveLen(2)) + // Verify first config + targetLabels := []string{configs[0].TargetLabel, configs[1].TargetLabel} + Expect(targetLabels).To(ContainElements("priority", "team")) + }) + }) + }) +}) diff --git a/pkg/management/mapper/new.go b/pkg/management/mapper/new.go new file mode 100644 index 000000000..aa5a3708a --- /dev/null +++ b/pkg/management/mapper/new.go @@ -0,0 +1,16 @@ +package mapper + +import ( + osmv1 "github.com/openshift/api/monitoring/v1" + + "github.com/openshift/monitoring-plugin/pkg/k8s" +) + +// New creates a new instance of the mapper client. +func New(k8sClient k8s.Client) Client { + return &mapper{ + k8sClient: k8sClient, + prometheusRules: make(map[PrometheusRuleId][]PrometheusAlertRuleId), + alertRelabelConfigs: make(map[AlertRelabelConfigId][]osmv1.RelabelConfig), + } +} diff --git a/pkg/management/mapper/types.go b/pkg/management/mapper/types.go new file mode 100644 index 000000000..f662a4d84 --- /dev/null +++ b/pkg/management/mapper/types.go @@ -0,0 +1,48 @@ +package mapper + +import ( + "context" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/apimachinery/pkg/types" +) + +// PrometheusRuleId is a unique identifier for a PrometheusRule resource in Kubernetes, represented by its NamespacedName. +type PrometheusRuleId types.NamespacedName + +// AlertRelabelConfigId is a unique identifier for an AlertRelabelConfig resource in Kubernetes, represented by its NamespacedName. +type AlertRelabelConfigId types.NamespacedName + +// PrometheusAlertRuleId is a hash-based identifier for an alerting rule within a PrometheusRule, represented by a string. +type PrometheusAlertRuleId string + +// Client defines the interface for mapping between Prometheus alerting rules and their unique identifiers. +type Client interface { + // GetAlertingRuleId returns the unique identifier for a given alerting rule. + GetAlertingRuleId(alertRule *monitoringv1.Rule) PrometheusAlertRuleId + + // FindAlertRuleById returns the PrometheusRuleId for a given alerting rule ID. + FindAlertRuleById(alertRuleId PrometheusAlertRuleId) (*PrometheusRuleId, error) + + // WatchPrometheusRules starts watching for changes to PrometheusRules. + WatchPrometheusRules(ctx context.Context) + + // AddPrometheusRule adds or updates a PrometheusRule in the mapper. + AddPrometheusRule(pr *monitoringv1.PrometheusRule) + + // DeletePrometheusRule removes a PrometheusRule from the mapper. + DeletePrometheusRule(pr *monitoringv1.PrometheusRule) + + // WatchAlertRelabelConfigs starts watching for changes to AlertRelabelConfigs. + WatchAlertRelabelConfigs(ctx context.Context) + + // AddAlertRelabelConfig adds or updates an AlertRelabelConfig in the mapper. + AddAlertRelabelConfig(arc *osmv1.AlertRelabelConfig) + + // DeleteAlertRelabelConfig removes an AlertRelabelConfig from the mapper. + DeleteAlertRelabelConfig(arc *osmv1.AlertRelabelConfig) + + // GetAlertRelabelConfigSpec returns the RelabelConfigs that match the given alert rule's labels. + GetAlertRelabelConfigSpec(alertRule *monitoringv1.Rule) []osmv1.RelabelConfig +} diff --git a/pkg/management/new.go b/pkg/management/new.go new file mode 100644 index 000000000..a4c827df2 --- /dev/null +++ b/pkg/management/new.go @@ -0,0 +1,24 @@ +package management + +import ( + "context" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management/mapper" +) + +// New creates a new management client +func New(ctx context.Context, k8sClient k8s.Client) Client { + m := mapper.New(k8sClient) + m.WatchPrometheusRules(ctx) + m.WatchAlertRelabelConfigs(ctx) + + return NewWithCustomMapper(ctx, k8sClient, m) +} + +func NewWithCustomMapper(ctx context.Context, k8sClient k8s.Client, m mapper.Client) Client { + return &client{ + k8sClient: k8sClient, + mapper: m, + } +} diff --git a/pkg/management/relabel_config.go b/pkg/management/relabel_config.go new file mode 100644 index 000000000..552d37d56 --- /dev/null +++ b/pkg/management/relabel_config.go @@ -0,0 +1,46 @@ +package management + +import ( + "fmt" + + osmv1 "github.com/openshift/api/monitoring/v1" +) + +// applyRelabelConfigs applies relabel configurations to a set of labels. +// Returns the updated labels or an error if the alert/rule should be dropped. +func applyRelabelConfigs(name string, labels map[string]string, configs []osmv1.RelabelConfig) (map[string]string, error) { + if labels == nil { + labels = make(map[string]string) + } + + updatedLabels := make(map[string]string, len(labels)) + for k, v := range labels { + updatedLabels[k] = v + } + + for _, config := range configs { + // TODO: (machadovilaca) Implement all relabeling actions + // 'Replace', 'Keep', 'Drop', 'HashMod', 'LabelMap', 'LabelDrop', or 'LabelKeep' + + switch config.Action { + case "Drop": + return nil, fmt.Errorf("alert/rule %s has been dropped by relabeling configuration", name) + case "Replace": + updatedLabels[config.TargetLabel] = config.Replacement + case "Keep": + // Keep action is a no-op in this context since the alert/rule is already matched + case "HashMod": + // HashMod action is not implemented yet + case "LabelMap": + // LabelMap action is not implemented yet + case "LabelDrop": + // LabelDrop action is not implemented yet + case "LabelKeep": + // LabelKeep action is not implemented yet + default: + // Unsupported action, ignore + } + } + + return updatedLabels, nil +} diff --git a/pkg/management/relabel_config_test.go b/pkg/management/relabel_config_test.go new file mode 100644 index 000000000..1271fb202 --- /dev/null +++ b/pkg/management/relabel_config_test.go @@ -0,0 +1,171 @@ +package management + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + osmv1 "github.com/openshift/api/monitoring/v1" +) + +var _ = Describe("applyRelabelConfigs", func() { + Context("when Drop action is applied", func() { + It("should return error", func() { + initialLabels := map[string]string{ + "severity": "critical", + } + configs := []osmv1.RelabelConfig{ + { + Action: "Drop", + }, + } + + result, err := applyRelabelConfigs("TestAlert", initialLabels, configs) + + Expect(err).To(HaveOccurred()) + Expect(result).To(BeNil()) + }) + }) + + Context("when Replace action is applied", func() { + It("should update existing label", func() { + initialLabels := map[string]string{ + "severity": "warning", + } + configs := []osmv1.RelabelConfig{ + { + Action: "Replace", + TargetLabel: "severity", + Replacement: "critical", + }, + } + + result, err := applyRelabelConfigs("TestAlert", initialLabels, configs) + + Expect(err).ToNot(HaveOccurred()) + Expect(result).To(Equal(map[string]string{ + "severity": "critical", + })) + }) + + It("should add new label", func() { + initialLabels := map[string]string{ + "severity": "warning", + } + configs := []osmv1.RelabelConfig{ + { + Action: "Replace", + TargetLabel: "team", + Replacement: "platform", + }, + } + + result, err := applyRelabelConfigs("TestAlert", initialLabels, configs) + + Expect(err).ToNot(HaveOccurred()) + Expect(result).To(Equal(map[string]string{ + "severity": "warning", + "team": "platform", + })) + }) + + It("should work with nil labels", func() { + configs := []osmv1.RelabelConfig{ + { + Action: "Replace", + TargetLabel: "severity", + Replacement: "critical", + }, + } + + result, err := applyRelabelConfigs("TestAlert", nil, configs) + + Expect(err).ToNot(HaveOccurred()) + Expect(result).To(Equal(map[string]string{ + "severity": "critical", + })) + }) + }) + + Context("when multiple Replace actions are applied", func() { + It("should apply all replacements", func() { + initialLabels := map[string]string{ + "severity": "warning", + } + configs := []osmv1.RelabelConfig{ + { + Action: "Replace", + TargetLabel: "severity", + Replacement: "critical", + }, + { + Action: "Replace", + TargetLabel: "team", + Replacement: "platform", + }, + } + + result, err := applyRelabelConfigs("TestAlert", initialLabels, configs) + + Expect(err).ToNot(HaveOccurred()) + Expect(result).To(Equal(map[string]string{ + "severity": "critical", + "team": "platform", + })) + }) + }) + + Context("when Keep action is applied", func() { + It("should be a no-op", func() { + initialLabels := map[string]string{ + "severity": "warning", + } + configs := []osmv1.RelabelConfig{ + { + Action: "Keep", + }, + } + + result, err := applyRelabelConfigs("TestAlert", initialLabels, configs) + + Expect(err).ToNot(HaveOccurred()) + Expect(result).To(Equal(map[string]string{ + "severity": "warning", + })) + }) + }) + + Context("when unknown action is applied", func() { + It("should be ignored", func() { + initialLabels := map[string]string{ + "severity": "warning", + } + configs := []osmv1.RelabelConfig{ + { + Action: "UnknownAction", + }, + } + + result, err := applyRelabelConfigs("TestAlert", initialLabels, configs) + + Expect(err).ToNot(HaveOccurred()) + Expect(result).To(Equal(map[string]string{ + "severity": "warning", + })) + }) + }) + + Context("when no configs are provided", func() { + It("should return unchanged labels", func() { + initialLabels := map[string]string{ + "severity": "warning", + } + configs := []osmv1.RelabelConfig{} + + result, err := applyRelabelConfigs("TestAlert", initialLabels, configs) + + Expect(err).ToNot(HaveOccurred()) + Expect(result).To(Equal(map[string]string{ + "severity": "warning", + })) + }) + }) +}) diff --git a/pkg/management/testutils/k8s_client_mock.go b/pkg/management/testutils/k8s_client_mock.go new file mode 100644 index 000000000..7849c5a0b --- /dev/null +++ b/pkg/management/testutils/k8s_client_mock.go @@ -0,0 +1,337 @@ +package testutils + +import ( + "context" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/apimachinery/pkg/types" + + "github.com/openshift/monitoring-plugin/pkg/k8s" +) + +// MockClient is a mock implementation of k8s.Client interface +type MockClient struct { + TestConnectionFunc func(ctx context.Context) error + PrometheusAlertsFunc func() k8s.PrometheusAlertsInterface + PrometheusRulesFunc func() k8s.PrometheusRuleInterface + PrometheusRuleInformerFunc func() k8s.PrometheusRuleInformerInterface + AlertRelabelConfigsFunc func() k8s.AlertRelabelConfigInterface + AlertRelabelConfigInformerFunc func() k8s.AlertRelabelConfigInformerInterface +} + +// TestConnection mocks the TestConnection method +func (m *MockClient) TestConnection(ctx context.Context) error { + if m.TestConnectionFunc != nil { + return m.TestConnectionFunc(ctx) + } + return nil +} + +// PrometheusAlerts mocks the PrometheusAlerts method +func (m *MockClient) PrometheusAlerts() k8s.PrometheusAlertsInterface { + if m.PrometheusAlertsFunc != nil { + return m.PrometheusAlertsFunc() + } + return &MockPrometheusAlertsInterface{} +} + +// PrometheusRules mocks the PrometheusRules method +func (m *MockClient) PrometheusRules() k8s.PrometheusRuleInterface { + if m.PrometheusRulesFunc != nil { + return m.PrometheusRulesFunc() + } + return &MockPrometheusRuleInterface{} +} + +// PrometheusRuleInformer mocks the PrometheusRuleInformer method +func (m *MockClient) PrometheusRuleInformer() k8s.PrometheusRuleInformerInterface { + if m.PrometheusRuleInformerFunc != nil { + return m.PrometheusRuleInformerFunc() + } + return &MockPrometheusRuleInformerInterface{} +} + +// AlertRelabelConfigs mocks the AlertRelabelConfigs method +func (m *MockClient) AlertRelabelConfigs() k8s.AlertRelabelConfigInterface { + if m.AlertRelabelConfigsFunc != nil { + return m.AlertRelabelConfigsFunc() + } + return &MockAlertRelabelConfigInterface{} +} + +// AlertRelabelConfigInformer mocks the AlertRelabelConfigInformer method +func (m *MockClient) AlertRelabelConfigInformer() k8s.AlertRelabelConfigInformerInterface { + if m.AlertRelabelConfigInformerFunc != nil { + return m.AlertRelabelConfigInformerFunc() + } + return &MockAlertRelabelConfigInformerInterface{} +} + +// MockPrometheusAlertsInterface is a mock implementation of k8s.PrometheusAlertsInterface +type MockPrometheusAlertsInterface struct { + GetAlertsFunc func(ctx context.Context, req k8s.GetAlertsRequest) ([]k8s.PrometheusAlert, error) + + // Storage for test data + ActiveAlerts []k8s.PrometheusAlert +} + +func (m *MockPrometheusAlertsInterface) SetActiveAlerts(alerts []k8s.PrometheusAlert) { + m.ActiveAlerts = alerts +} + +// GetAlerts mocks the GetAlerts method +func (m *MockPrometheusAlertsInterface) GetAlerts(ctx context.Context, req k8s.GetAlertsRequest) ([]k8s.PrometheusAlert, error) { + if m.GetAlertsFunc != nil { + return m.GetAlertsFunc(ctx, req) + } + + if m.ActiveAlerts != nil { + return m.ActiveAlerts, nil + } + return []k8s.PrometheusAlert{}, nil +} + +// MockPrometheusRuleInterface is a mock implementation of k8s.PrometheusRuleInterface +type MockPrometheusRuleInterface struct { + ListFunc func(ctx context.Context, namespace string) ([]monitoringv1.PrometheusRule, error) + GetFunc func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) + UpdateFunc func(ctx context.Context, pr monitoringv1.PrometheusRule) error + DeleteFunc func(ctx context.Context, namespace string, name string) error + AddRuleFunc func(ctx context.Context, namespacedName types.NamespacedName, groupName string, rule monitoringv1.Rule) error + + // Storage for test data + PrometheusRules map[string]*monitoringv1.PrometheusRule +} + +func (m *MockPrometheusRuleInterface) SetPrometheusRules(rules map[string]*monitoringv1.PrometheusRule) { + m.PrometheusRules = rules +} + +// List mocks the List method +func (m *MockPrometheusRuleInterface) List(ctx context.Context, namespace string) ([]monitoringv1.PrometheusRule, error) { + if m.ListFunc != nil { + return m.ListFunc(ctx, namespace) + } + + var rules []monitoringv1.PrometheusRule + if m.PrometheusRules != nil { + for _, rule := range m.PrometheusRules { + if namespace == "" || rule.Namespace == namespace { + rules = append(rules, *rule) + } + } + } + return rules, nil +} + +// Get mocks the Get method +func (m *MockPrometheusRuleInterface) Get(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + if m.GetFunc != nil { + return m.GetFunc(ctx, namespace, name) + } + + key := namespace + "/" + name + if m.PrometheusRules != nil { + if rule, exists := m.PrometheusRules[key]; exists { + return rule, true, nil + } + } + + return nil, false, nil +} + +// Update mocks the Update method +func (m *MockPrometheusRuleInterface) Update(ctx context.Context, pr monitoringv1.PrometheusRule) error { + if m.UpdateFunc != nil { + return m.UpdateFunc(ctx, pr) + } + + key := pr.Namespace + "/" + pr.Name + if m.PrometheusRules == nil { + m.PrometheusRules = make(map[string]*monitoringv1.PrometheusRule) + } + m.PrometheusRules[key] = &pr + return nil +} + +// Delete mocks the Delete method +func (m *MockPrometheusRuleInterface) Delete(ctx context.Context, namespace string, name string) error { + if m.DeleteFunc != nil { + return m.DeleteFunc(ctx, namespace, name) + } + + key := namespace + "/" + name + if m.PrometheusRules != nil { + delete(m.PrometheusRules, key) + } + return nil +} + +// AddRule mocks the AddRule method +func (m *MockPrometheusRuleInterface) AddRule(ctx context.Context, namespacedName types.NamespacedName, groupName string, rule monitoringv1.Rule) error { + if m.AddRuleFunc != nil { + return m.AddRuleFunc(ctx, namespacedName, groupName, rule) + } + + key := namespacedName.Namespace + "/" + namespacedName.Name + if m.PrometheusRules == nil { + m.PrometheusRules = make(map[string]*monitoringv1.PrometheusRule) + } + + // Get or create PrometheusRule + pr, exists := m.PrometheusRules[key] + if !exists { + pr = &monitoringv1.PrometheusRule{ + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{}, + }, + } + pr.Name = namespacedName.Name + pr.Namespace = namespacedName.Namespace + m.PrometheusRules[key] = pr + } + + // Find or create the group + var group *monitoringv1.RuleGroup + for i := range pr.Spec.Groups { + if pr.Spec.Groups[i].Name == groupName { + group = &pr.Spec.Groups[i] + break + } + } + if group == nil { + pr.Spec.Groups = append(pr.Spec.Groups, monitoringv1.RuleGroup{ + Name: groupName, + Rules: []monitoringv1.Rule{}, + }) + group = &pr.Spec.Groups[len(pr.Spec.Groups)-1] + } + + // Add the new rule to the group + group.Rules = append(group.Rules, rule) + + return nil +} + +// MockPrometheusRuleInformerInterface is a mock implementation of k8s.PrometheusRuleInformerInterface +type MockPrometheusRuleInformerInterface struct { + RunFunc func(ctx context.Context, callbacks k8s.PrometheusRuleInformerCallback) error +} + +// Run mocks the Run method +func (m *MockPrometheusRuleInformerInterface) Run(ctx context.Context, callbacks k8s.PrometheusRuleInformerCallback) error { + if m.RunFunc != nil { + return m.RunFunc(ctx, callbacks) + } + + // Default implementation - just wait for context to be cancelled + <-ctx.Done() + return ctx.Err() +} + +// MockAlertRelabelConfigInterface is a mock implementation of k8s.AlertRelabelConfigInterface +type MockAlertRelabelConfigInterface struct { + ListFunc func(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) + GetFunc func(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) + CreateFunc func(ctx context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) + UpdateFunc func(ctx context.Context, arc osmv1.AlertRelabelConfig) error + DeleteFunc func(ctx context.Context, namespace string, name string) error + + // Storage for test data + AlertRelabelConfigs map[string]*osmv1.AlertRelabelConfig +} + +func (m *MockAlertRelabelConfigInterface) SetAlertRelabelConfigs(configs map[string]*osmv1.AlertRelabelConfig) { + m.AlertRelabelConfigs = configs +} + +// List mocks the List method +func (m *MockAlertRelabelConfigInterface) List(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) { + if m.ListFunc != nil { + return m.ListFunc(ctx, namespace) + } + + var configs []osmv1.AlertRelabelConfig + if m.AlertRelabelConfigs != nil { + for _, config := range m.AlertRelabelConfigs { + if namespace == "" || config.Namespace == namespace { + configs = append(configs, *config) + } + } + } + return configs, nil +} + +// Get mocks the Get method +func (m *MockAlertRelabelConfigInterface) Get(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) { + if m.GetFunc != nil { + return m.GetFunc(ctx, namespace, name) + } + + key := namespace + "/" + name + if m.AlertRelabelConfigs != nil { + if config, exists := m.AlertRelabelConfigs[key]; exists { + return config, true, nil + } + } + + return nil, false, nil +} + +// Create mocks the Create method +func (m *MockAlertRelabelConfigInterface) Create(ctx context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) { + if m.CreateFunc != nil { + return m.CreateFunc(ctx, arc) + } + + key := arc.Namespace + "/" + arc.Name + if m.AlertRelabelConfigs == nil { + m.AlertRelabelConfigs = make(map[string]*osmv1.AlertRelabelConfig) + } + m.AlertRelabelConfigs[key] = &arc + return &arc, nil +} + +// Update mocks the Update method +func (m *MockAlertRelabelConfigInterface) Update(ctx context.Context, arc osmv1.AlertRelabelConfig) error { + if m.UpdateFunc != nil { + return m.UpdateFunc(ctx, arc) + } + + key := arc.Namespace + "/" + arc.Name + if m.AlertRelabelConfigs == nil { + m.AlertRelabelConfigs = make(map[string]*osmv1.AlertRelabelConfig) + } + m.AlertRelabelConfigs[key] = &arc + return nil +} + +// Delete mocks the Delete method +func (m *MockAlertRelabelConfigInterface) Delete(ctx context.Context, namespace string, name string) error { + if m.DeleteFunc != nil { + return m.DeleteFunc(ctx, namespace, name) + } + + key := namespace + "/" + name + if m.AlertRelabelConfigs != nil { + delete(m.AlertRelabelConfigs, key) + } + return nil +} + +// MockAlertRelabelConfigInformerInterface is a mock implementation of k8s.AlertRelabelConfigInformerInterface +type MockAlertRelabelConfigInformerInterface struct { + RunFunc func(ctx context.Context, callbacks k8s.AlertRelabelConfigInformerCallback) error +} + +// Run mocks the Run method +func (m *MockAlertRelabelConfigInformerInterface) Run(ctx context.Context, callbacks k8s.AlertRelabelConfigInformerCallback) error { + if m.RunFunc != nil { + return m.RunFunc(ctx, callbacks) + } + + // Default implementation - just wait for context to be cancelled + <-ctx.Done() + return ctx.Err() +} diff --git a/pkg/management/testutils/mapper_mock.go b/pkg/management/testutils/mapper_mock.go new file mode 100644 index 000000000..e353a3d55 --- /dev/null +++ b/pkg/management/testutils/mapper_mock.go @@ -0,0 +1,82 @@ +package testutils + +import ( + "context" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + + "github.com/openshift/monitoring-plugin/pkg/management/mapper" +) + +var _ mapper.Client = &MockMapperClient{} + +// MockMapperClient is a simple mock for the mapper.Client interface +type MockMapperClient struct { + GetAlertingRuleIdFunc func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId + FindAlertRuleByIdFunc func(alertRuleId mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) + WatchPrometheusRulesFunc func(ctx context.Context) + AddPrometheusRuleFunc func(pr *monitoringv1.PrometheusRule) + DeletePrometheusRuleFunc func(pr *monitoringv1.PrometheusRule) + WatchAlertRelabelConfigsFunc func(ctx context.Context) + AddAlertRelabelConfigFunc func(arc *osmv1.AlertRelabelConfig) + DeleteAlertRelabelConfigFunc func(arc *osmv1.AlertRelabelConfig) + GetAlertRelabelConfigSpecFunc func(alertRule *monitoringv1.Rule) []osmv1.RelabelConfig +} + +func (m *MockMapperClient) GetAlertingRuleId(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + if m.GetAlertingRuleIdFunc != nil { + return m.GetAlertingRuleIdFunc(alertRule) + } + return mapper.PrometheusAlertRuleId("mock-id") +} + +func (m *MockMapperClient) FindAlertRuleById(alertRuleId mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + if m.FindAlertRuleByIdFunc != nil { + return m.FindAlertRuleByIdFunc(alertRuleId) + } + return nil, nil +} + +func (m *MockMapperClient) WatchPrometheusRules(ctx context.Context) { + if m.WatchPrometheusRulesFunc != nil { + m.WatchPrometheusRulesFunc(ctx) + } +} + +func (m *MockMapperClient) AddPrometheusRule(pr *monitoringv1.PrometheusRule) { + if m.AddPrometheusRuleFunc != nil { + m.AddPrometheusRuleFunc(pr) + } +} + +func (m *MockMapperClient) DeletePrometheusRule(pr *monitoringv1.PrometheusRule) { + if m.DeletePrometheusRuleFunc != nil { + m.DeletePrometheusRuleFunc(pr) + } +} + +func (m *MockMapperClient) WatchAlertRelabelConfigs(ctx context.Context) { + if m.WatchAlertRelabelConfigsFunc != nil { + m.WatchAlertRelabelConfigsFunc(ctx) + } +} + +func (m *MockMapperClient) AddAlertRelabelConfig(arc *osmv1.AlertRelabelConfig) { + if m.AddAlertRelabelConfigFunc != nil { + m.AddAlertRelabelConfigFunc(arc) + } +} + +func (m *MockMapperClient) DeleteAlertRelabelConfig(arc *osmv1.AlertRelabelConfig) { + if m.DeleteAlertRelabelConfigFunc != nil { + m.DeleteAlertRelabelConfigFunc(arc) + } +} + +func (m *MockMapperClient) GetAlertRelabelConfigSpec(alertRule *monitoringv1.Rule) []osmv1.RelabelConfig { + if m.GetAlertRelabelConfigSpecFunc != nil { + return m.GetAlertRelabelConfigSpecFunc(alertRule) + } + return nil +} diff --git a/pkg/management/types.go b/pkg/management/types.go new file mode 100644 index 000000000..f5d4e4c40 --- /dev/null +++ b/pkg/management/types.go @@ -0,0 +1,57 @@ +package management + +import ( + "context" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + + "github.com/openshift/monitoring-plugin/pkg/k8s" +) + +// Client is the interface for managing alert rules +type Client interface { + // ListRules lists all alert rules in the specified PrometheusRule resource + ListRules(ctx context.Context, prOptions PrometheusRuleOptions, arOptions AlertRuleOptions) ([]monitoringv1.Rule, error) + + // GetRuleById retrieves a specific alert rule by its ID + GetRuleById(ctx context.Context, alertRuleId string) (monitoringv1.Rule, error) + + // CreateUserDefinedAlertRule creates a new user-defined alert rule + CreateUserDefinedAlertRule(ctx context.Context, alertRule monitoringv1.Rule, prOptions PrometheusRuleOptions) (alertRuleId string, err error) + + // UpdateUserDefinedAlertRule updates an existing user-defined alert rule by its ID + UpdateUserDefinedAlertRule(ctx context.Context, alertRuleId string, alertRule monitoringv1.Rule) error + + // DeleteUserDefinedAlertRuleById deletes a user-defined alert rule by its ID + DeleteUserDefinedAlertRuleById(ctx context.Context, alertRuleId string) error + + // UpdatePlatformAlertRule updates an existing platform alert rule by its ID + // Platform alert rules can only have the labels updated through AlertRelabelConfigs + UpdatePlatformAlertRule(ctx context.Context, alertRuleId string, alertRule monitoringv1.Rule) error + + // GetAlerts retrieves Prometheus alerts + GetAlerts(ctx context.Context, req k8s.GetAlertsRequest) ([]k8s.PrometheusAlert, error) +} + +// PrometheusRuleOptions specifies options for selecting PrometheusRule resources and groups +type PrometheusRuleOptions struct { + // Name of the PrometheusRule resource where the alert rule will be added/listed from + Name string `json:"prometheusRuleName"` + + // Namespace of the PrometheusRule resource where the alert rule will be added/listed from + Namespace string `json:"prometheusRuleNamespace"` + + // GroupName of the RuleGroup within the PrometheusRule resource + GroupName string `json:"groupName"` +} + +type AlertRuleOptions struct { + // Name filters alert rules by alert name + Name string `json:"name,omitempty"` + + // Source filters alert rules by source type (platform or user-defined) + Source string `json:"source,omitempty"` + + // Labels filters alert rules by arbitrary label key-value pairs + Labels map[string]string `json:"labels,omitempty"` +} diff --git a/pkg/management/update_platform_alert_rule.go b/pkg/management/update_platform_alert_rule.go new file mode 100644 index 000000000..4270ce4e2 --- /dev/null +++ b/pkg/management/update_platform_alert_rule.go @@ -0,0 +1,171 @@ +package management + +import ( + "context" + "errors" + "fmt" + "strings" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + "github.com/openshift/monitoring-plugin/pkg/management/mapper" +) + +const openshiftMonitoringNamespace = "openshift-monitoring" + +func (c *client) UpdatePlatformAlertRule(ctx context.Context, alertRuleId string, alertRule monitoringv1.Rule) error { + prId, err := c.mapper.FindAlertRuleById(mapper.PrometheusAlertRuleId(alertRuleId)) + if err != nil { + return err + } + + if !IsPlatformAlertRule(types.NamespacedName(*prId)) { + return errors.New("cannot update non-platform alert rule from " + prId.Namespace + "/" + prId.Name) + } + + originalRule, err := c.getOriginalPlatformRule(ctx, prId, alertRuleId) + if err != nil { + return err + } + + labelChanges := calculateLabelChanges(originalRule.Labels, alertRule.Labels) + if len(labelChanges) == 0 { + return errors.New("no label changes detected; platform alert rules can only have labels updated") + } + + return c.applyLabelChangesViaAlertRelabelConfig(ctx, alertRuleId, originalRule.Alert, labelChanges) +} + +func (c *client) getOriginalPlatformRule(ctx context.Context, prId *mapper.PrometheusRuleId, alertRuleId string) (*monitoringv1.Rule, error) { + pr, found, err := c.k8sClient.PrometheusRules().Get(ctx, prId.Namespace, prId.Name) + if err != nil { + return nil, fmt.Errorf("failed to get PrometheusRule %s/%s: %w", prId.Namespace, prId.Name, err) + } + + if !found { + return nil, &NotFoundError{Resource: "PrometheusRule", Id: fmt.Sprintf("%s/%s", prId.Namespace, prId.Name)} + } + + for groupIdx := range pr.Spec.Groups { + for ruleIdx := range pr.Spec.Groups[groupIdx].Rules { + rule := &pr.Spec.Groups[groupIdx].Rules[ruleIdx] + if c.shouldUpdateRule(*rule, alertRuleId) { + return rule, nil + } + } + } + + return nil, fmt.Errorf("alert rule with id %s not found in PrometheusRule %s/%s", alertRuleId, prId.Namespace, prId.Name) +} + +type labelChange struct { + action string + sourceLabel string + targetLabel string + value string +} + +func calculateLabelChanges(originalLabels, newLabels map[string]string) []labelChange { + var changes []labelChange + + for key, newValue := range newLabels { + originalValue, exists := originalLabels[key] + if !exists || originalValue != newValue { + changes = append(changes, labelChange{ + action: "Replace", + targetLabel: key, + value: newValue, + }) + } + } + + for key := range originalLabels { + // alertname is a special label that is used to identify the alert rule + // and should not be dropped + if key == "alertname" { + continue + } + + if _, exists := newLabels[key]; !exists { + changes = append(changes, labelChange{ + action: "LabelDrop", + sourceLabel: key, + }) + } + } + + return changes +} + +func (c *client) applyLabelChangesViaAlertRelabelConfig(ctx context.Context, alertRuleId string, alertName string, changes []labelChange) error { + arcName := fmt.Sprintf("alertmanagement-%s", strings.ToLower(strings.ReplaceAll(alertRuleId, "/", "-"))) + + existingArc, found, err := c.k8sClient.AlertRelabelConfigs().Get(ctx, openshiftMonitoringNamespace, arcName) + if err != nil { + return fmt.Errorf("failed to get AlertRelabelConfig %s/%s: %w", openshiftMonitoringNamespace, arcName, err) + } + + relabelConfigs := c.buildRelabelConfigs(alertName, changes) + + var arc *osmv1.AlertRelabelConfig + if found { + arc = existingArc + arc.Spec = osmv1.AlertRelabelConfigSpec{ + Configs: relabelConfigs, + } + + err = c.k8sClient.AlertRelabelConfigs().Update(ctx, *arc) + if err != nil { + return fmt.Errorf("failed to update AlertRelabelConfig %s/%s: %w", arc.Namespace, arc.Name, err) + } + } else { + arc = &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: arcName, + Namespace: openshiftMonitoringNamespace, + }, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: relabelConfigs, + }, + } + + _, err = c.k8sClient.AlertRelabelConfigs().Create(ctx, *arc) + if err != nil { + return fmt.Errorf("failed to create AlertRelabelConfig %s/%s: %w", arc.Namespace, arc.Name, err) + } + } + + return nil +} + +func (c *client) buildRelabelConfigs(alertName string, changes []labelChange) []osmv1.RelabelConfig { + var configs []osmv1.RelabelConfig + + for _, change := range changes { + switch change.action { + case "Replace": + config := osmv1.RelabelConfig{ + SourceLabels: []osmv1.LabelName{"alertname", osmv1.LabelName(change.targetLabel)}, + Regex: fmt.Sprintf("%s;.*", alertName), + TargetLabel: change.targetLabel, + Replacement: change.value, + Action: "Replace", + } + configs = append(configs, config) + case "LabelDrop": + config := osmv1.RelabelConfig{ + SourceLabels: []osmv1.LabelName{"alertname"}, + Regex: alertName, + TargetLabel: change.sourceLabel, + Replacement: "", + Action: "Replace", + } + configs = append(configs, config) + } + } + + return configs +} diff --git a/pkg/management/update_platform_alert_rule_test.go b/pkg/management/update_platform_alert_rule_test.go new file mode 100644 index 000000000..a89eedc9a --- /dev/null +++ b/pkg/management/update_platform_alert_rule_test.go @@ -0,0 +1,400 @@ +package management_test + +import ( + "context" + "errors" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/mapper" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" +) + +var _ = Describe("UpdatePlatformAlertRule", func() { + var ( + ctx context.Context + mockK8s *testutils.MockClient + mockPR *testutils.MockPrometheusRuleInterface + mockARC *testutils.MockAlertRelabelConfigInterface + mockMapper *testutils.MockMapperClient + client management.Client + ) + + BeforeEach(func() { + ctx = context.Background() + + mockPR = &testutils.MockPrometheusRuleInterface{} + mockARC = &testutils.MockAlertRelabelConfigInterface{} + mockK8s = &testutils.MockClient{ + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { + return mockPR + }, + AlertRelabelConfigsFunc: func() k8s.AlertRelabelConfigInterface { + return mockARC + }, + } + mockMapper = &testutils.MockMapperClient{} + + client = management.NewWithCustomMapper(ctx, mockK8s, mockMapper) + }) + + Context("when updating a platform alert rule", func() { + It("should create an AlertRelabelConfig to update labels", func() { + By("setting up the existing platform rule") + existingRule := monitoringv1.Rule{ + Alert: "PlatformAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", + "team": "platform", + }, + } + + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "openshift-platform-alerts", + Namespace: "openshift-monitoring", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "platform-group", + Rules: []monitoringv1.Rule{existingRule}, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "openshift-monitoring/openshift-platform-alerts": prometheusRule, + }) + + alertRuleId := "test-platform-rule-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "openshift-monitoring", + Name: "openshift-platform-alerts", + }, nil + } + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + if alertRule.Alert == "PlatformAlert" { + return mapper.PrometheusAlertRuleId(alertRuleId) + } + return mapper.PrometheusAlertRuleId("other-id") + } + + By("updating labels through AlertRelabelConfig") + updatedRule := monitoringv1.Rule{ + Alert: "PlatformAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "critical", + "team": "platform", + "owner": "sre", + }, + } + + err := client.UpdatePlatformAlertRule(ctx, alertRuleId, updatedRule) + Expect(err).ToNot(HaveOccurred()) + + By("verifying AlertRelabelConfig was created") + arcs, err := mockARC.List(ctx, "openshift-monitoring") + Expect(err).ToNot(HaveOccurred()) + Expect(arcs).To(HaveLen(1)) + + arc := arcs[0] + Expect(arc.Namespace).To(Equal("openshift-monitoring")) + Expect(arc.Name).To(Equal("alertmanagement-test-platform-rule-id")) + + By("verifying relabel configs include label updates with alertname matching") + Expect(arc.Spec.Configs).To(HaveLen(2)) + + severityUpdate := false + ownerAdd := false + for _, config := range arc.Spec.Configs { + Expect(config.Action).To(Equal("Replace")) + Expect(config.SourceLabels).To(ContainElement(osmv1.LabelName("alertname"))) + Expect(config.Regex).To(ContainSubstring("PlatformAlert")) + + if config.TargetLabel == "severity" && config.Replacement == "critical" { + severityUpdate = true + Expect(config.SourceLabels).To(ContainElement(osmv1.LabelName("severity"))) + } + if config.TargetLabel == "owner" && config.Replacement == "sre" { + ownerAdd = true + Expect(config.SourceLabels).To(ContainElement(osmv1.LabelName("owner"))) + } + } + Expect(severityUpdate).To(BeTrue()) + Expect(ownerAdd).To(BeTrue()) + }) + + It("should update existing AlertRelabelConfig when one already exists", func() { + By("setting up the existing platform rule and AlertRelabelConfig") + existingRule := monitoringv1.Rule{ + Alert: "PlatformAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", + }, + } + + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "openshift-platform-alerts", + Namespace: "openshift-monitoring", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "platform-group", + Rules: []monitoringv1.Rule{existingRule}, + }, + }, + }, + } + + existingARC := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-platform-rule-id-relabel", + Namespace: "openshift-monitoring", + }, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + { + SourceLabels: []osmv1.LabelName{"alertname"}, + Regex: "PlatformAlert", + Action: "Keep", + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "openshift-monitoring/openshift-platform-alerts": prometheusRule, + }) + mockARC.SetAlertRelabelConfigs(map[string]*osmv1.AlertRelabelConfig{ + "openshift-monitoring/alertmanagement-test-platform-rule-id": existingARC, + }) + + alertRuleId := "test-platform-rule-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "openshift-monitoring", + Name: "openshift-platform-alerts", + }, nil + } + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + if alertRule.Alert == "PlatformAlert" { + return mapper.PrometheusAlertRuleId(alertRuleId) + } + return mapper.PrometheusAlertRuleId("other-id") + } + + By("updating labels through existing AlertRelabelConfig") + updatedRule := monitoringv1.Rule{ + Alert: "PlatformAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "critical", + }, + } + + err := client.UpdatePlatformAlertRule(ctx, alertRuleId, updatedRule) + Expect(err).ToNot(HaveOccurred()) + + By("verifying existing AlertRelabelConfig was updated") + arc, found, err := mockARC.Get(ctx, "openshift-monitoring", "alertmanagement-test-platform-rule-id") + Expect(found).To(BeTrue()) + Expect(err).ToNot(HaveOccurred()) + Expect(arc.Spec.Configs).To(HaveLen(1)) + Expect(arc.Spec.Configs[0].Action).To(Equal("Replace")) + Expect(arc.Spec.Configs[0].SourceLabels).To(ContainElement(osmv1.LabelName("alertname"))) + Expect(arc.Spec.Configs[0].TargetLabel).To(Equal("severity")) + Expect(arc.Spec.Configs[0].Replacement).To(Equal("critical")) + }) + + It("should handle label removal", func() { + By("setting up the existing platform rule with multiple labels") + existingRule := monitoringv1.Rule{ + Alert: "PlatformAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", + "team": "platform", + "owner": "sre", + }, + } + + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "openshift-platform-alerts", + Namespace: "openshift-monitoring", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "platform-group", + Rules: []monitoringv1.Rule{existingRule}, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "openshift-monitoring/openshift-platform-alerts": prometheusRule, + }) + + alertRuleId := "test-platform-rule-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "openshift-monitoring", + Name: "openshift-platform-alerts", + }, nil + } + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + if alertRule.Alert == "PlatformAlert" { + return mapper.PrometheusAlertRuleId(alertRuleId) + } + return mapper.PrometheusAlertRuleId("other-id") + } + + By("updating with fewer labels") + updatedRule := monitoringv1.Rule{ + Alert: "PlatformAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", + }, + } + + err := client.UpdatePlatformAlertRule(ctx, alertRuleId, updatedRule) + Expect(err).ToNot(HaveOccurred()) + + By("verifying AlertRelabelConfig includes label removal actions") + arcs, err := mockARC.List(ctx, "openshift-monitoring") + Expect(err).ToNot(HaveOccurred()) + Expect(arcs).To(HaveLen(1)) + + arc := arcs[0] + Expect(arc.Spec.Configs).To(HaveLen(2)) + + labelRemovalCount := 0 + for _, config := range arc.Spec.Configs { + if config.Replacement == "" && (config.TargetLabel == "team" || config.TargetLabel == "owner") { + labelRemovalCount++ + Expect(config.Action).To(Equal("Replace")) + Expect(config.SourceLabels).To(ContainElement(osmv1.LabelName("alertname"))) + } + } + Expect(labelRemovalCount).To(Equal(2)) + }) + + It("should return error when trying to update non-platform rule", func() { + By("setting up a user-defined rule") + alertRuleId := "test-user-rule-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "user-namespace", + Name: "user-rule", + }, nil + } + + updatedRule := monitoringv1.Rule{ + Alert: "UserAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "critical", + }, + } + + err := client.UpdatePlatformAlertRule(ctx, alertRuleId, updatedRule) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("cannot update non-platform alert rule")) + }) + + It("should return error when no label changes detected", func() { + By("setting up the existing platform rule") + existingRule := monitoringv1.Rule{ + Alert: "PlatformAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", + }, + } + + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "openshift-platform-alerts", + Namespace: "openshift-monitoring", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "platform-group", + Rules: []monitoringv1.Rule{existingRule}, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "openshift-monitoring/openshift-platform-alerts": prometheusRule, + }) + + alertRuleId := "test-platform-rule-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "openshift-monitoring", + Name: "openshift-platform-alerts", + }, nil + } + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + if alertRule.Alert == "PlatformAlert" { + return mapper.PrometheusAlertRuleId(alertRuleId) + } + return mapper.PrometheusAlertRuleId("other-id") + } + + By("updating with same labels") + updatedRule := monitoringv1.Rule{ + Alert: "PlatformAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", + }, + } + + err := client.UpdatePlatformAlertRule(ctx, alertRuleId, updatedRule) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("no label changes detected")) + }) + + It("should return error when alert rule not found", func() { + By("setting up mapper to return rule ID") + alertRuleId := "non-existent-rule-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return nil, errors.New("alert rule not found") + } + + updatedRule := monitoringv1.Rule{ + Alert: "PlatformAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "critical", + }, + } + + err := client.UpdatePlatformAlertRule(ctx, alertRuleId, updatedRule) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("alert rule not found")) + }) + }) +}) diff --git a/pkg/management/update_user_defined_alert_rule.go b/pkg/management/update_user_defined_alert_rule.go new file mode 100644 index 000000000..ebfe1b7cb --- /dev/null +++ b/pkg/management/update_user_defined_alert_rule.go @@ -0,0 +1,61 @@ +package management + +import ( + "context" + "fmt" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/apimachinery/pkg/types" + + "github.com/openshift/monitoring-plugin/pkg/management/mapper" +) + +func (c *client) UpdateUserDefinedAlertRule(ctx context.Context, alertRuleId string, alertRule monitoringv1.Rule) error { + prId, err := c.mapper.FindAlertRuleById(mapper.PrometheusAlertRuleId(alertRuleId)) + if err != nil { + return err + } + + if IsPlatformAlertRule(types.NamespacedName(*prId)) { + return fmt.Errorf("cannot update alert rule in a platform-managed PrometheusRule") + } + + pr, found, err := c.k8sClient.PrometheusRules().Get(ctx, prId.Namespace, prId.Name) + if err != nil { + return err + } + + if !found { + return &NotFoundError{Resource: "PrometheusRule", Id: fmt.Sprintf("%s/%s", prId.Namespace, prId.Name)} + } + + updated := false + for groupIdx := range pr.Spec.Groups { + for ruleIdx := range pr.Spec.Groups[groupIdx].Rules { + rule := &pr.Spec.Groups[groupIdx].Rules[ruleIdx] + if c.shouldUpdateRule(*rule, alertRuleId) { + pr.Spec.Groups[groupIdx].Rules[ruleIdx] = alertRule + updated = true + break + } + } + if updated { + break + } + } + + if !updated { + return fmt.Errorf("alert rule with id %s not found in PrometheusRule %s/%s", alertRuleId, prId.Namespace, prId.Name) + } + + err = c.k8sClient.PrometheusRules().Update(ctx, *pr) + if err != nil { + return fmt.Errorf("failed to update PrometheusRule %s/%s: %w", pr.Namespace, pr.Name, err) + } + + return nil +} + +func (c *client) shouldUpdateRule(rule monitoringv1.Rule, alertRuleId string) bool { + return alertRuleId == string(c.mapper.GetAlertingRuleId(&rule)) +} diff --git a/pkg/management/update_user_defined_alert_rule_test.go b/pkg/management/update_user_defined_alert_rule_test.go new file mode 100644 index 000000000..1b2460807 --- /dev/null +++ b/pkg/management/update_user_defined_alert_rule_test.go @@ -0,0 +1,250 @@ +package management_test + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/mapper" + "github.com/openshift/monitoring-plugin/pkg/management/testutils" +) + +var _ = Describe("UpdateUserDefinedAlertRule", func() { + var ( + ctx context.Context + mockK8s *testutils.MockClient + mockPR *testutils.MockPrometheusRuleInterface + mockMapper *testutils.MockMapperClient + client management.Client + ) + + BeforeEach(func() { + ctx = context.Background() + + mockPR = &testutils.MockPrometheusRuleInterface{} + mockK8s = &testutils.MockClient{ + PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { + return mockPR + }, + } + mockMapper = &testutils.MockMapperClient{} + + client = management.NewWithCustomMapper(ctx, mockK8s, mockMapper) + }) + + Context("when updating a user-defined alert rule", func() { + It("should successfully update an existing alert rule", func() { + By("setting up the existing rule") + existingRule := monitoringv1.Rule{ + Alert: "OldAlert", + Expr: intstr.FromString("up == 0"), + } + + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "user-rule", + Namespace: "user-namespace", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "test-group", + Rules: []monitoringv1.Rule{existingRule}, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "user-namespace/user-rule": prometheusRule, + }) + + alertRuleId := "test-rule-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "user-namespace", + Name: "user-rule", + }, nil + } + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + if alertRule.Alert == "OldAlert" { + return mapper.PrometheusAlertRuleId(alertRuleId) + } + return mapper.PrometheusAlertRuleId("other-id") + } + + By("updating with new values") + updatedRule := monitoringv1.Rule{ + Alert: "UpdatedAlert", + Expr: intstr.FromString("up == 1"), + Annotations: map[string]string{ + "summary": "Updated summary", + }, + } + + err := client.UpdateUserDefinedAlertRule(ctx, alertRuleId, updatedRule) + Expect(err).ToNot(HaveOccurred()) + + By("verifying the update succeeded") + updatedPR, found, err := mockPR.Get(ctx, "user-namespace", "user-rule") + Expect(found).To(BeTrue()) + Expect(err).ToNot(HaveOccurred()) + Expect(updatedPR.Spec.Groups).To(HaveLen(1)) + Expect(updatedPR.Spec.Groups[0].Rules).To(HaveLen(1)) + Expect(updatedPR.Spec.Groups[0].Rules[0].Alert).To(Equal("UpdatedAlert")) + Expect(updatedPR.Spec.Groups[0].Rules[0].Expr.String()).To(Equal("up == 1")) + Expect(updatedPR.Spec.Groups[0].Rules[0].Annotations["summary"]).To(Equal("Updated summary")) + }) + + It("should update the correct rule when multiple rules exist", func() { + By("setting up multiple rules across different groups") + rule1 := monitoringv1.Rule{ + Alert: "Alert1", + Expr: intstr.FromString("up == 0"), + } + + rule2 := monitoringv1.Rule{ + Alert: "Alert2", + Expr: intstr.FromString("cpu_usage > 80"), + } + + rule3 := monitoringv1.Rule{ + Alert: "Alert3", + Expr: intstr.FromString("memory_usage > 90"), + } + + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "multi-rule", + Namespace: "user-namespace", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "group1", + Rules: []monitoringv1.Rule{rule1, rule2}, + }, + { + Name: "group2", + Rules: []monitoringv1.Rule{rule3}, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "user-namespace/multi-rule": prometheusRule, + }) + + alertRuleId := "alert2-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "user-namespace", + Name: "multi-rule", + }, nil + } + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + if alertRule.Alert == "Alert2" { + return mapper.PrometheusAlertRuleId(alertRuleId) + } + return mapper.PrometheusAlertRuleId("other-id") + } + + By("updating only the second rule") + updatedRule := monitoringv1.Rule{ + Alert: "Alert2Updated", + Expr: intstr.FromString("cpu_usage > 90"), + } + + err := client.UpdateUserDefinedAlertRule(ctx, alertRuleId, updatedRule) + Expect(err).ToNot(HaveOccurred()) + + By("verifying only the targeted rule was updated") + updatedPR, found, err := mockPR.Get(ctx, "user-namespace", "multi-rule") + Expect(found).To(BeTrue()) + Expect(err).ToNot(HaveOccurred()) + Expect(updatedPR.Spec.Groups).To(HaveLen(2)) + + Expect(updatedPR.Spec.Groups[0].Rules).To(HaveLen(2)) + Expect(updatedPR.Spec.Groups[0].Rules[0].Alert).To(Equal("Alert1")) + Expect(updatedPR.Spec.Groups[0].Rules[1].Alert).To(Equal("Alert2Updated")) + Expect(updatedPR.Spec.Groups[0].Rules[1].Expr.String()).To(Equal("cpu_usage > 90")) + + Expect(updatedPR.Spec.Groups[1].Rules).To(HaveLen(1)) + Expect(updatedPR.Spec.Groups[1].Rules[0].Alert).To(Equal("Alert3")) + }) + + It("should return error when alert rule ID is not found", func() { + existingRule := monitoringv1.Rule{ + Alert: "ExistingAlert", + Expr: intstr.FromString("up == 0"), + } + + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "user-rule", + Namespace: "user-namespace", + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "test-group", + Rules: []monitoringv1.Rule{existingRule}, + }, + }, + }, + } + + mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ + "user-namespace/user-rule": prometheusRule, + }) + + alertRuleId := "non-existent-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "user-namespace", + Name: "user-rule", + }, nil + } + mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + return mapper.PrometheusAlertRuleId("different-id") + } + + updatedRule := monitoringv1.Rule{ + Alert: "UpdatedAlert", + Expr: intstr.FromString("up == 1"), + } + + err := client.UpdateUserDefinedAlertRule(ctx, alertRuleId, updatedRule) + + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("not found")) + }) + + It("should return error when trying to update a platform-managed alert rule", func() { + alertRuleId := "platform-rule-id" + mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + return &mapper.PrometheusRuleId{ + Namespace: "openshift-monitoring", + Name: "openshift-platform-rules", + }, nil + } + + updatedRule := monitoringv1.Rule{ + Alert: "UpdatedAlert", + Expr: intstr.FromString("up == 1"), + } + + err := client.UpdateUserDefinedAlertRule(ctx, alertRuleId, updatedRule) + + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("platform-managed")) + }) + }) +}) diff --git a/pkg/server.go b/pkg/server.go index 653fca843..271ac4003 100644 --- a/pkg/server.go +++ b/pkg/server.go @@ -12,7 +12,6 @@ import ( "github.com/gorilla/handlers" "github.com/gorilla/mux" - "github.com/openshift/monitoring-plugin/pkg/proxy" "github.com/sirupsen/logrus" "gopkg.in/yaml.v2" v1 "k8s.io/api/core/v1" @@ -21,6 +20,12 @@ import ( "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" "k8s.io/client-go/tools/record" + + "github.com/openshift/monitoring-plugin/internal/managementrouter" + "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/proxy" + + "github.com/openshift/monitoring-plugin/pkg/k8s" ) var log = logrus.WithField("module", "server") @@ -60,6 +65,7 @@ const ( Incidents Feature = "incidents" DevConfig Feature = "dev-config" PersesDashboards Feature = "perses-dashboards" + ManagementAPI Feature = "management-api" ) func (pluginConfig *PluginConfig) MarshalJSON() ([]byte, error) { @@ -103,6 +109,8 @@ func (s *PluginServer) Shutdown(ctx context.Context) error { func createHTTPServer(ctx context.Context, cfg *Config) (*http.Server, error) { acmMode := cfg.Features[AcmAlerting] + managementMode := cfg.Features[ManagementAPI] + acmLocationsLength := len(cfg.AlertmanagerUrl) + len(cfg.ThanosQuerierUrl) if acmLocationsLength > 0 && !acmMode { @@ -116,15 +124,19 @@ func createHTTPServer(ctx context.Context, cfg *Config) (*http.Server, error) { return nil, fmt.Errorf("cannot set default port to reserved port %d", cfg.Port) } + var k8sconfig *rest.Config + var err error + // Uncomment the following line for local development: - // k8sconfig, err := clientcmd.BuildConfigFromFlags("", "$HOME/.kube/config") + // k8sconfig, err = clientcmd.BuildConfigFromFlags("", os.Getenv("KUBECONFIG")) + // if err != nil { + // return nil, fmt.Errorf("cannot get kubeconfig from file: %w", err) + // } // Comment the following line for local development: var k8sclient *dynamic.DynamicClient - if acmMode { - - k8sconfig, err := rest.InClusterConfig() - + if acmMode || managementMode { + k8sconfig, err = rest.InClusterConfig() if err != nil { return nil, fmt.Errorf("cannot get in cluster config: %w", err) } @@ -137,7 +149,23 @@ func createHTTPServer(ctx context.Context, cfg *Config) (*http.Server, error) { k8sclient = nil } - router, pluginConfig := setupRoutes(cfg) + // Initialize management client if management API feature is enabled + var managementClient management.Client + if managementMode { + k8sClient, err := k8s.NewClient(ctx, k8sconfig) + if err != nil { + return nil, fmt.Errorf("failed to create k8s client for management API: %w", err) + } + + if err := k8sClient.TestConnection(ctx); err != nil { + return nil, fmt.Errorf("failed to connect to kubernetes cluster for management API: %w", err) + } + + managementClient = management.New(ctx, k8sClient) + log.Info("Management API enabled") + } + + router, pluginConfig := setupRoutes(cfg, managementClient) router.Use(corsHeaderMiddleware()) tlsConfig := &tls.Config{} @@ -222,7 +250,7 @@ func createHTTPServer(ctx context.Context, cfg *Config) (*http.Server, error) { return httpServer, nil } -func setupRoutes(cfg *Config) (*mux.Router, *PluginConfig) { +func setupRoutes(cfg *Config, managementClient management.Client) (*mux.Router, *PluginConfig) { configHandlerFunc, pluginConfig := configHandler(cfg) router := mux.NewRouter() @@ -233,6 +261,12 @@ func setupRoutes(cfg *Config) (*mux.Router, *PluginConfig) { router.PathPrefix("/features").HandlerFunc(featuresHandler(cfg)) router.PathPrefix("/config").HandlerFunc(configHandlerFunc) + + if managementClient != nil { + managementRouter := managementrouter.New(managementClient) + router.PathPrefix("/api/v1/alerting").Handler(managementRouter) + } + router.PathPrefix("/").Handler(filesHandler(http.Dir(cfg.StaticPath))) return router, pluginConfig From fb8a751501bc6b855a7ba869f27db7a0d426dbf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Vila=C3=A7a?= Date: Tue, 9 Dec 2025 16:08:12 +0000 Subject: [PATCH 2/4] Change IsPlatformAlertRule implementation (#1) Signed-off-by: machadovilaca --- go.mod | 1 + ...ser_defined_alert_rule_bulk_delete_test.go | 20 ++- ...er_defined_alert_rule_delete_by_id_test.go | 16 ++- pkg/k8s/alert_relabel_config.go | 23 +--- pkg/k8s/alert_relabel_config_informer.go | 99 ++++++++------ pkg/k8s/client.go | 18 ++- pkg/k8s/namespace_informer.go | 105 +++++++++++++++ pkg/k8s/prometheus_rule.go | 14 +- pkg/k8s/prometheus_rule_informer.go | 100 ++++++++------ pkg/k8s/types.go | 26 +++- .../create_user_defined_alert_rule.go | 2 +- .../create_user_defined_alert_rule_test.go | 18 ++- .../delete_user_defined_alert_rule_by_id.go | 2 +- ...lete_user_defined_alert_rule_by_id_test.go | 10 +- pkg/management/list_rules.go | 2 +- pkg/management/list_rules_test.go | 27 +++- pkg/management/management.go | 6 +- pkg/management/mapper/mapper.go | 17 +-- pkg/management/mapper/mapper_test.go | 9 +- pkg/management/mapper/types.go | 5 +- pkg/management/testutils/k8s_client_mock.go | 125 +++++++++++++++++- pkg/management/testutils/mapper_mock.go | 13 +- pkg/management/update_platform_alert_rule.go | 14 +- .../update_platform_alert_rule_test.go | 44 +++--- .../update_user_defined_alert_rule.go | 2 +- .../update_user_defined_alert_rule_test.go | 10 +- 26 files changed, 543 insertions(+), 185 deletions(-) create mode 100644 pkg/k8s/namespace_informer.go diff --git a/go.mod b/go.mod index 4107fae38..8cfe2772e 100644 --- a/go.mod +++ b/go.mod @@ -56,6 +56,7 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/spf13/pflag v1.0.6 // indirect github.com/x448/float16 v0.8.4 // indirect go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect diff --git a/internal/managementrouter/user_defined_alert_rule_bulk_delete_test.go b/internal/managementrouter/user_defined_alert_rule_bulk_delete_test.go index 15b6f7ac7..1b3e7ecc3 100644 --- a/internal/managementrouter/user_defined_alert_rule_bulk_delete_test.go +++ b/internal/managementrouter/user_defined_alert_rule_bulk_delete_test.go @@ -42,7 +42,7 @@ var _ = Describe("BulkDeleteUserDefinedAlertRules", func() { platformPR := monitoringv1.PrometheusRule{} platformPR.Name = "platform-pr" - platformPR.Namespace = "openshift-monitoring" + platformPR.Namespace = "platform-namespace-1" platformPR.Spec.Groups = []monitoringv1.RuleGroup{ { Name: "pg1", @@ -52,13 +52,21 @@ var _ = Describe("BulkDeleteUserDefinedAlertRules", func() { mockK8sRules.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ "default/user-pr": &userPR, - "openshift-monitoring/platform-pr": &platformPR, + "platform-namespace-1/platform-pr": &platformPR, }) + mockNSInformer := &testutils.MockNamespaceInformerInterface{} + mockNSInformer.SetMonitoringNamespaces(map[string]bool{ + "platform-namespace-1": true, + "platform-namespace-2": true, + }) mockK8s = &testutils.MockClient{ PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { return mockK8sRules }, + NamespaceInformerFunc: func() k8s.NamespaceInformerInterface { + return mockNSInformer + }, } mockMapper = &testutils.MockMapperClient{ @@ -72,7 +80,7 @@ var _ = Describe("BulkDeleteUserDefinedAlertRules", func() { Name: "user-pr", } if id == "platform1" { - pr.Namespace = "openshift-monitoring" + pr.Namespace = "platform-namespace-1" pr.Name = "platform-pr" } return &pr, nil @@ -125,7 +133,7 @@ var _ = Describe("BulkDeleteUserDefinedAlertRules", func() { Expect(userRuleNames).NotTo(ContainElement("u1")) Expect(userRuleNames).To(ContainElement("u2")) - prPlatform, _, err := mockK8sRules.Get(context.Background(), "openshift-monitoring", "platform-pr") + prPlatform, _, err := mockK8sRules.Get(context.Background(), "platform-namespace-1", "platform-pr") Expect(err).NotTo(HaveOccurred()) foundPlatform := false for _, g := range prPlatform.Spec.Groups { @@ -174,7 +182,7 @@ var _ = Describe("BulkDeleteUserDefinedAlertRules", func() { Expect(userRuleNames).To(ContainElement("u2")) // Platform rule remains intact - prPlatform, _, err := mockK8sRules.Get(context.Background(), "openshift-monitoring", "platform-pr") + prPlatform, _, err := mockK8sRules.Get(context.Background(), "platform-namespace-1", "platform-pr") Expect(err).NotTo(HaveOccurred()) foundPlatform := false for _, g := range prPlatform.Spec.Groups { @@ -215,7 +223,7 @@ var _ = Describe("BulkDeleteUserDefinedAlertRules", func() { Expect(found).To(BeFalse()) // Platform PrometheusRule remains present - _, found, err = mockK8sRules.Get(context.Background(), "openshift-monitoring", "platform-pr") + _, found, err = mockK8sRules.Get(context.Background(), "platform-namespace-1", "platform-pr") Expect(err).NotTo(HaveOccurred()) Expect(found).To(BeTrue()) }) diff --git a/internal/managementrouter/user_defined_alert_rule_delete_by_id_test.go b/internal/managementrouter/user_defined_alert_rule_delete_by_id_test.go index 9b93bebfa..9ddb0371c 100644 --- a/internal/managementrouter/user_defined_alert_rule_delete_by_id_test.go +++ b/internal/managementrouter/user_defined_alert_rule_delete_by_id_test.go @@ -41,7 +41,7 @@ var _ = Describe("DeleteUserDefinedAlertRuleById", func() { platformPR := monitoringv1.PrometheusRule{} platformPR.Name = "platform-pr" - platformPR.Namespace = "openshift-monitoring" + platformPR.Namespace = "platform-namespace-1" platformPR.Spec.Groups = []monitoringv1.RuleGroup{ { Name: "pg1", @@ -51,13 +51,21 @@ var _ = Describe("DeleteUserDefinedAlertRuleById", func() { mockK8sRules.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ "default/user-pr": &userPR, - "openshift-monitoring/platform-pr": &platformPR, + "platform-namespace-1/platform-pr": &platformPR, }) + mockNSInformer := &testutils.MockNamespaceInformerInterface{} + mockNSInformer.SetMonitoringNamespaces(map[string]bool{ + "platform-namespace-1": true, + "platform-namespace-2": true, + }) mockK8s = &testutils.MockClient{ PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { return mockK8sRules }, + NamespaceInformerFunc: func() k8s.NamespaceInformerInterface { + return mockNSInformer + }, } }) @@ -140,7 +148,7 @@ var _ = Describe("DeleteUserDefinedAlertRuleById", func() { }, FindAlertRuleByIdFunc: func(alertRuleId mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { pr := mapper.PrometheusRuleId{ - Namespace: "openshift-monitoring", + Namespace: "platform-namespace-1", Name: "platform-pr", } return &pr, nil @@ -157,7 +165,7 @@ var _ = Describe("DeleteUserDefinedAlertRuleById", func() { Expect(w.Code).To(Equal(http.StatusMethodNotAllowed)) Expect(w.Body.String()).To(ContainSubstring("cannot delete alert rule from a platform-managed PrometheusRule")) - pr, found, err := mockK8sRules.Get(context.Background(), "openshift-monitoring", "platform-pr") + pr, found, err := mockK8sRules.Get(context.Background(), "platform-namespace-1", "platform-pr") Expect(found).To(BeTrue()) Expect(err).NotTo(HaveOccurred()) for _, g := range pr.Spec.Groups { diff --git a/pkg/k8s/alert_relabel_config.go b/pkg/k8s/alert_relabel_config.go index 8ce3501eb..eca561a0e 100644 --- a/pkg/k8s/alert_relabel_config.go +++ b/pkg/k8s/alert_relabel_config.go @@ -6,40 +6,27 @@ import ( osmv1 "github.com/openshift/api/monitoring/v1" osmv1client "github.com/openshift/client-go/monitoring/clientset/versioned" - "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) type alertRelabelConfigManager struct { clientset *osmv1client.Clientset + informer AlertRelabelConfigInformerInterface } -func newAlertRelabelConfigManager(clientset *osmv1client.Clientset) AlertRelabelConfigInterface { +func newAlertRelabelConfigManager(clientset *osmv1client.Clientset, informer AlertRelabelConfigInformerInterface) AlertRelabelConfigInterface { return &alertRelabelConfigManager{ clientset: clientset, + informer: informer, } } func (arcm *alertRelabelConfigManager) List(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) { - arcs, err := arcm.clientset.MonitoringV1().AlertRelabelConfigs(namespace).List(ctx, metav1.ListOptions{}) - if err != nil { - return nil, err - } - - return arcs.Items, nil + return arcm.informer.List(ctx, namespace) } func (arcm *alertRelabelConfigManager) Get(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) { - arc, err := arcm.clientset.MonitoringV1().AlertRelabelConfigs(namespace).Get(ctx, name, metav1.GetOptions{}) - if err != nil { - if errors.IsNotFound(err) { - return nil, false, nil - } - - return nil, false, fmt.Errorf("failed to get AlertRelabelConfig %s/%s: %w", namespace, name, err) - } - - return arc, true, nil + return arcm.informer.Get(ctx, namespace, name) } func (arcm *alertRelabelConfigManager) Create(ctx context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) { diff --git a/pkg/k8s/alert_relabel_config_informer.go b/pkg/k8s/alert_relabel_config_informer.go index eccbd36d4..da6732956 100644 --- a/pkg/k8s/alert_relabel_config_informer.go +++ b/pkg/k8s/alert_relabel_config_informer.go @@ -2,61 +2,84 @@ package k8s import ( "context" - "log" osmv1 "github.com/openshift/api/monitoring/v1" osmv1client "github.com/openshift/client-go/monitoring/clientset/versioned" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/watch" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/client-go/tools/cache" ) type alertRelabelConfigInformer struct { - clientset *osmv1client.Clientset + informer cache.SharedIndexInformer } func newAlertRelabelConfigInformer(clientset *osmv1client.Clientset) AlertRelabelConfigInformerInterface { + informer := cache.NewSharedIndexInformer( + alertRelabelConfigListWatchForAllNamespaces(clientset), + &osmv1.AlertRelabelConfig{}, + 0, + cache.Indexers{}, + ) + return &alertRelabelConfigInformer{ - clientset: clientset, + informer: informer, } } -func (arci *alertRelabelConfigInformer) Run(ctx context.Context, callbacks AlertRelabelConfigInformerCallback) error { - options := metav1.ListOptions{ - Watch: true, - } +func alertRelabelConfigListWatchForAllNamespaces(clientset *osmv1client.Clientset) *cache.ListWatch { + return cache.NewListWatchFromClient(clientset.MonitoringV1().RESTClient(), "alertrelabelconfigs", "", fields.Everything()) +} - watcher, err := arci.clientset.MonitoringV1().AlertRelabelConfigs("").Watch(ctx, options) - if err != nil { - return err - } - defer watcher.Stop() - - ch := watcher.ResultChan() - for event := range ch { - arc, ok := event.Object.(*osmv1.AlertRelabelConfig) - if !ok { - log.Printf("Unexpected type: %v", event.Object) - continue - } - - switch event.Type { - case watch.Added: - if callbacks.OnAdd != nil { - callbacks.OnAdd(arc) +func (arci *alertRelabelConfigInformer) Run(ctx context.Context, callbacks AlertRelabelConfigInformerCallback) error { + _, err := arci.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + arc, ok := obj.(*osmv1.AlertRelabelConfig) + if !ok { + return } - case watch.Modified: - if callbacks.OnUpdate != nil { - callbacks.OnUpdate(arc) + callbacks.OnAdd(arc) + }, + UpdateFunc: func(oldObj interface{}, newObj interface{}) { + arc, ok := newObj.(*osmv1.AlertRelabelConfig) + if !ok { + return } - case watch.Deleted: - if callbacks.OnDelete != nil { - callbacks.OnDelete(arc) + callbacks.OnUpdate(arc) + }, + DeleteFunc: func(obj interface{}) { + k, err := cache.DeletionHandlingObjectToName(obj) + if err != nil { + return } - case watch.Error: - log.Printf("Error occurred while watching AlertRelabelConfig: %s\n", event.Object) - } + callbacks.OnDelete(k) + }, + }) + + go arci.informer.Run(ctx.Done()) + + cache.WaitForNamedCacheSync("AlertRelabelConfig informer", ctx.Done(), + arci.informer.HasSynced, + ) + + return err +} + +func (arci *alertRelabelConfigInformer) List(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) { + arcs := arci.informer.GetStore().List() + + alertRelabelConfigs := make([]osmv1.AlertRelabelConfig, 0, len(arcs)) + for _, arc := range arcs { + alertRelabelConfigs = append(alertRelabelConfigs, *arc.(*osmv1.AlertRelabelConfig)) + } + + return alertRelabelConfigs, nil +} + +func (arci *alertRelabelConfigInformer) Get(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) { + arc, exists, err := arci.informer.GetStore().GetByKey(namespace + "/" + name) + if err != nil { + return nil, exists, err } - log.Fatalf("AlertRelabelConfig watcher channel closed unexpectedly") - return nil + return arc.(*osmv1.AlertRelabelConfig), exists, nil } diff --git a/pkg/k8s/client.go b/pkg/k8s/client.go index e016eb5f6..776eb6687 100644 --- a/pkg/k8s/client.go +++ b/pkg/k8s/client.go @@ -26,9 +26,11 @@ type client struct { alertRelabelConfigManager AlertRelabelConfigInterface alertRelabelConfigInformer AlertRelabelConfigInformerInterface + + namespaceInformer NamespaceInformerInterface } -func newClient(_ context.Context, config *rest.Config) (Client, error) { +func newClient(ctx context.Context, config *rest.Config) (Client, error) { clientset, err := kubernetes.NewForConfig(config) if err != nil { return nil, fmt.Errorf("failed to create clientset: %w", err) @@ -53,11 +55,17 @@ func newClient(_ context.Context, config *rest.Config) (Client, error) { c.prometheusAlerts = newPrometheusAlerts(clientset, config) - c.prometheusRuleManager = newPrometheusRuleManager(monitoringv1clientset) c.prometheusRuleInformer = newPrometheusRuleInformer(monitoringv1clientset) + c.prometheusRuleManager = newPrometheusRuleManager(monitoringv1clientset, c.prometheusRuleInformer) - c.alertRelabelConfigManager = newAlertRelabelConfigManager(osmv1clientset) c.alertRelabelConfigInformer = newAlertRelabelConfigInformer(osmv1clientset) + c.alertRelabelConfigManager = newAlertRelabelConfigManager(osmv1clientset, c.alertRelabelConfigInformer) + + namespaceInformer, err := newNamespaceInformer(ctx, clientset) + if err != nil { + return nil, fmt.Errorf("failed to create namespace informer: %w", err) + } + c.namespaceInformer = namespaceInformer return c, nil } @@ -89,3 +97,7 @@ func (c *client) AlertRelabelConfigs() AlertRelabelConfigInterface { func (c *client) AlertRelabelConfigInformer() AlertRelabelConfigInformerInterface { return c.alertRelabelConfigInformer } + +func (c *client) NamespaceInformer() NamespaceInformerInterface { + return c.namespaceInformer +} diff --git a/pkg/k8s/namespace_informer.go b/pkg/k8s/namespace_informer.go new file mode 100644 index 000000000..27cc61def --- /dev/null +++ b/pkg/k8s/namespace_informer.go @@ -0,0 +1,105 @@ +package k8s + +import ( + "context" + "sync" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + corev1client "k8s.io/client-go/kubernetes/typed/core/v1" + "k8s.io/client-go/tools/cache" +) + +const ( + // ClusterMonitoringLabel is the label used to identify namespaces with cluster monitoring enabled + ClusterMonitoringLabel = "openshift.io/cluster-monitoring" +) + +type namespaceInformer struct { + informer cache.SharedIndexInformer + + // monitoringNamespaces stores namespaces with openshift.io/cluster-monitoring=true + monitoringNamespaces map[string]bool + mu sync.RWMutex +} + +func newNamespaceInformer(ctx context.Context, clientset kubernetes.Interface) (NamespaceInformerInterface, error) { + informer := cache.NewSharedIndexInformer( + namespaceListWatch(clientset.CoreV1()), + &corev1.Namespace{}, + 0, + cache.Indexers{}, + ) + + ni := &namespaceInformer{ + informer: informer, + monitoringNamespaces: make(map[string]bool), + } + + _, err := ni.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + ns, ok := obj.(*corev1.Namespace) + if !ok { + return + } + ni.updateMonitoringNamespace(ns) + }, + UpdateFunc: func(oldObj interface{}, newObj interface{}) { + ns, ok := newObj.(*corev1.Namespace) + if !ok { + return + } + ni.updateMonitoringNamespace(ns) + }, + DeleteFunc: func(obj interface{}) { + namespaceName, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) + if err != nil { + return + } + ni.removeMonitoringNamespace(namespaceName) + }, + }) + + go ni.informer.Run(ctx.Done()) + + cache.WaitForNamedCacheSync("Namespace informer", ctx.Done(), + ni.informer.HasSynced, + ) + + return ni, err +} + +func namespaceListWatch(client corev1client.CoreV1Interface) *cache.ListWatch { + return cache.NewFilteredListWatchFromClient( + client.RESTClient(), + "namespaces", + "", + func(options *metav1.ListOptions) { + options.LabelSelector = ClusterMonitoringLabel + "=true" + }, + ) +} + +func (ni *namespaceInformer) IsClusterMonitoringNamespace(name string) bool { + ni.mu.RLock() + defer ni.mu.RUnlock() + return ni.monitoringNamespaces[name] +} + +func (ni *namespaceInformer) updateMonitoringNamespace(ns *corev1.Namespace) { + ni.mu.Lock() + defer ni.mu.Unlock() + + if ns.Labels != nil && ns.Labels[ClusterMonitoringLabel] == "true" { + ni.monitoringNamespaces[ns.Name] = true + } else { + delete(ni.monitoringNamespaces, ns.Name) + } +} + +func (ni *namespaceInformer) removeMonitoringNamespace(name string) { + ni.mu.Lock() + defer ni.mu.Unlock() + delete(ni.monitoringNamespaces, name) +} diff --git a/pkg/k8s/prometheus_rule.go b/pkg/k8s/prometheus_rule.go index eb9246130..877750ca1 100644 --- a/pkg/k8s/prometheus_rule.go +++ b/pkg/k8s/prometheus_rule.go @@ -13,11 +13,13 @@ import ( type prometheusRuleManager struct { clientset *monitoringv1client.Clientset + informer PrometheusRuleInformerInterface } -func newPrometheusRuleManager(clientset *monitoringv1client.Clientset) PrometheusRuleInterface { +func newPrometheusRuleManager(clientset *monitoringv1client.Clientset, informer PrometheusRuleInformerInterface) PrometheusRuleInterface { return &prometheusRuleManager{ clientset: clientset, + informer: informer, } } @@ -31,16 +33,12 @@ func (prm *prometheusRuleManager) List(ctx context.Context, namespace string) ([ } func (prm *prometheusRuleManager) Get(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { - pr, err := prm.clientset.MonitoringV1().PrometheusRules(namespace).Get(ctx, name, metav1.GetOptions{}) + pr, exists, err := prm.informer.Get(ctx, namespace, name) if err != nil { - if errors.IsNotFound(err) { - return nil, false, nil - } - - return nil, false, fmt.Errorf("failed to get PrometheusRule %s/%s: %w", namespace, name, err) + return nil, exists, fmt.Errorf("failed to get PrometheusRule %s/%s: %w", namespace, name, err) } - return pr, true, nil + return pr, exists, nil } func (prm *prometheusRuleManager) Update(ctx context.Context, pr monitoringv1.PrometheusRule) error { diff --git a/pkg/k8s/prometheus_rule_informer.go b/pkg/k8s/prometheus_rule_informer.go index c0e7a716b..ec68dfc52 100644 --- a/pkg/k8s/prometheus_rule_informer.go +++ b/pkg/k8s/prometheus_rule_informer.go @@ -2,61 +2,85 @@ package k8s import ( "context" - "log" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" monitoringv1client "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/watch" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/client-go/tools/cache" ) type prometheusRuleInformer struct { - clientset *monitoringv1client.Clientset + informer cache.SharedIndexInformer } func newPrometheusRuleInformer(clientset *monitoringv1client.Clientset) PrometheusRuleInformerInterface { + informer := cache.NewSharedIndexInformer( + prometheusRuleListWatchForAllNamespaces(clientset), + &monitoringv1.PrometheusRule{}, + 0, + cache.Indexers{}, + ) + return &prometheusRuleInformer{ - clientset: clientset, + informer: informer, } } -func (pri *prometheusRuleInformer) Run(ctx context.Context, callbacks PrometheusRuleInformerCallback) error { - options := metav1.ListOptions{ - Watch: true, - } +func prometheusRuleListWatchForAllNamespaces(clientset *monitoringv1client.Clientset) *cache.ListWatch { + return cache.NewListWatchFromClient(clientset.MonitoringV1().RESTClient(), "prometheusrules", "", fields.Everything()) +} - watcher, err := pri.clientset.MonitoringV1().PrometheusRules("").Watch(ctx, options) - if err != nil { - return err - } - defer watcher.Stop() - - ch := watcher.ResultChan() - for event := range ch { - pr, ok := event.Object.(*monitoringv1.PrometheusRule) - if !ok { - log.Printf("Unexpected type: %v", event.Object) - continue - } - - switch event.Type { - case watch.Added: - if callbacks.OnAdd != nil { - callbacks.OnAdd(pr) +func (pri *prometheusRuleInformer) Run(ctx context.Context, callbacks PrometheusRuleInformerCallback) error { + _, err := pri.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + pr, ok := obj.(*monitoringv1.PrometheusRule) + if !ok { + return } - case watch.Modified: - if callbacks.OnUpdate != nil { - callbacks.OnUpdate(pr) + callbacks.OnAdd(pr) + }, + UpdateFunc: func(oldObj interface{}, newObj interface{}) { + pr, ok := newObj.(*monitoringv1.PrometheusRule) + if !ok { + return } - case watch.Deleted: - if callbacks.OnDelete != nil { - callbacks.OnDelete(pr) + callbacks.OnUpdate(pr) + }, + DeleteFunc: func(obj interface{}) { + k, err := cache.DeletionHandlingObjectToName(obj) + if err != nil { + return } - case watch.Error: - log.Printf("Error occurred while watching PrometheusRule: %s\n", event.Object) - } + + callbacks.OnDelete(k) + }, + }) + + go pri.informer.Run(ctx.Done()) + + cache.WaitForNamedCacheSync("PrometheusRule informer", ctx.Done(), + pri.informer.HasSynced, + ) + + return err +} + +func (pri *prometheusRuleInformer) List(ctx context.Context, namespace string) ([]monitoringv1.PrometheusRule, error) { + prs := pri.informer.GetStore().List() + + prometheusRules := make([]monitoringv1.PrometheusRule, 0, len(prs)) + for _, pr := range prs { + prometheusRules = append(prometheusRules, *pr.(*monitoringv1.PrometheusRule)) + } + + return prometheusRules, nil +} + +func (pri *prometheusRuleInformer) Get(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + pr, exists, err := pri.informer.GetStore().GetByKey(namespace + "/" + name) + if err != nil { + return nil, exists, err } - log.Fatalf("PrometheusRule watcher channel closed unexpectedly") - return nil + return pr.(*monitoringv1.PrometheusRule), exists, nil } diff --git a/pkg/k8s/types.go b/pkg/k8s/types.go index c3579841f..550b5114c 100644 --- a/pkg/k8s/types.go +++ b/pkg/k8s/types.go @@ -6,6 +6,7 @@ import ( osmv1 "github.com/openshift/api/monitoring/v1" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/cache" ) // ClientOptions holds configuration options for creating a Kubernetes client @@ -34,6 +35,9 @@ type Client interface { // AlertRelabelConfigInformer returns the AlertRelabelConfigInformer interface AlertRelabelConfigInformer() AlertRelabelConfigInformerInterface + + // NamespaceInformer returns the NamespaceInformer interface + NamespaceInformer() NamespaceInformerInterface } // PrometheusAlertsInterface defines operations for managing PrometheusAlerts @@ -64,6 +68,12 @@ type PrometheusRuleInterface interface { type PrometheusRuleInformerInterface interface { // Run starts the informer and sets up the provided callbacks for add, update, and delete events Run(ctx context.Context, callbacks PrometheusRuleInformerCallback) error + + // List lists all PrometheusRules in the cluster + List(ctx context.Context, namespace string) ([]monitoringv1.PrometheusRule, error) + + // Get retrieves a PrometheusRule by namespace and name + Get(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) } // PrometheusRuleInformerCallback holds the callback functions for informer events @@ -75,7 +85,7 @@ type PrometheusRuleInformerCallback struct { OnUpdate func(pr *monitoringv1.PrometheusRule) // OnDelete is called when a PrometheusRule is deleted - OnDelete func(pr *monitoringv1.PrometheusRule) + OnDelete func(key cache.ObjectName) } // AlertRelabelConfigInterface defines operations for managing AlertRelabelConfigs @@ -100,6 +110,12 @@ type AlertRelabelConfigInterface interface { type AlertRelabelConfigInformerInterface interface { // Run starts the informer and sets up the provided callbacks for add, update, and delete events Run(ctx context.Context, callbacks AlertRelabelConfigInformerCallback) error + + // List lists all AlertRelabelConfigs in the cluster + List(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) + + // Get retrieves an AlertRelabelConfig by namespace and name + Get(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) } // AlertRelabelConfigInformerCallback holds the callback functions for informer events @@ -111,5 +127,11 @@ type AlertRelabelConfigInformerCallback struct { OnUpdate func(arc *osmv1.AlertRelabelConfig) // OnDelete is called when an AlertRelabelConfig is deleted - OnDelete func(arc *osmv1.AlertRelabelConfig) + OnDelete func(key cache.ObjectName) +} + +// NamespaceInformerInterface defines operations for Namespace informers +type NamespaceInformerInterface interface { + // IsClusterMonitoringNamespace checks if a namespace has the openshift.io/cluster-monitoring=true label + IsClusterMonitoringNamespace(name string) bool } diff --git a/pkg/management/create_user_defined_alert_rule.go b/pkg/management/create_user_defined_alert_rule.go index 226b371f2..403489bcc 100644 --- a/pkg/management/create_user_defined_alert_rule.go +++ b/pkg/management/create_user_defined_alert_rule.go @@ -22,7 +22,7 @@ func (c *client) CreateUserDefinedAlertRule(ctx context.Context, alertRule monit Namespace: prOptions.Namespace, } - if IsPlatformAlertRule(nn) { + if c.IsPlatformAlertRule(nn) { return "", errors.New("cannot add user-defined alert rule to a platform-managed PrometheusRule") } diff --git a/pkg/management/create_user_defined_alert_rule_test.go b/pkg/management/create_user_defined_alert_rule_test.go index f45355e60..4f7253af5 100644 --- a/pkg/management/create_user_defined_alert_rule_test.go +++ b/pkg/management/create_user_defined_alert_rule_test.go @@ -29,10 +29,18 @@ var _ = Describe("CreateUserDefinedAlertRule", func() { ctx = context.Background() mockPR = &testutils.MockPrometheusRuleInterface{} + mockNSInformer := &testutils.MockNamespaceInformerInterface{} + mockNSInformer.SetMonitoringNamespaces(map[string]bool{ + "platform-namespace-1": true, + "platform-namespace-2": true, + }) mockK8s = &testutils.MockClient{ PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { return mockPR }, + NamespaceInformerFunc: func() k8s.NamespaceInformerInterface { + return mockNSInformer + }, } mockMapper = &testutils.MockMapperClient{} @@ -172,9 +180,11 @@ var _ = Describe("CreateUserDefinedAlertRule", func() { prOptions := management.PrometheusRuleOptions{ Name: "openshift-platform-alerts", - Namespace: "openshift-monitoring", + Namespace: "platform-namespace-1", } + // Don't set up mapper - we should fail before mapper check + By("attempting to create the alert rule") _, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) @@ -287,8 +297,8 @@ var _ = Describe("CreateUserDefinedAlertRule", func() { Expect(addRuleCalled).To(BeTrue()) }) - It("should reject PrometheusRules in openshift- prefixed namespaces", func() { - By("setting up test data with openshift- namespace prefix") + It("should reject PrometheusRules in cluster monitoring namespaces", func() { + By("setting up test data with cluster monitoring namespace") alertRule := monitoringv1.Rule{ Alert: "TestAlert", Expr: intstr.FromString("up == 0"), @@ -296,7 +306,7 @@ var _ = Describe("CreateUserDefinedAlertRule", func() { prOptions := management.PrometheusRuleOptions{ Name: "custom-rule", - Namespace: "openshift-user-namespace", + Namespace: "platform-namespace-1", } By("attempting to create the alert rule") diff --git a/pkg/management/delete_user_defined_alert_rule_by_id.go b/pkg/management/delete_user_defined_alert_rule_by_id.go index 18ac94b0d..713a93906 100644 --- a/pkg/management/delete_user_defined_alert_rule_by_id.go +++ b/pkg/management/delete_user_defined_alert_rule_by_id.go @@ -16,7 +16,7 @@ func (c *client) DeleteUserDefinedAlertRuleById(ctx context.Context, alertRuleId return &NotFoundError{Resource: "AlertRule", Id: alertRuleId} } - if IsPlatformAlertRule(types.NamespacedName(*prId)) { + if c.IsPlatformAlertRule(types.NamespacedName(*prId)) { return &NotAllowedError{Message: "cannot delete alert rule from a platform-managed PrometheusRule"} } diff --git a/pkg/management/delete_user_defined_alert_rule_by_id_test.go b/pkg/management/delete_user_defined_alert_rule_by_id_test.go index 879d87307..f0f2f5731 100644 --- a/pkg/management/delete_user_defined_alert_rule_by_id_test.go +++ b/pkg/management/delete_user_defined_alert_rule_by_id_test.go @@ -30,10 +30,18 @@ var _ = Describe("DeleteUserDefinedAlertRuleById", func() { ctx = context.Background() mockPR = &testutils.MockPrometheusRuleInterface{} + mockNSInformer := &testutils.MockNamespaceInformerInterface{} + mockNSInformer.SetMonitoringNamespaces(map[string]bool{ + "platform-namespace-1": true, + "platform-namespace-2": true, + }) mockK8s = &testutils.MockClient{ PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { return mockPR }, + NamespaceInformerFunc: func() k8s.NamespaceInformerInterface { + return mockNSInformer + }, } mockMapper = &testutils.MockMapperClient{} @@ -311,7 +319,7 @@ var _ = Describe("DeleteUserDefinedAlertRuleById", func() { alertRuleId := "platform-rule-id" mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { return &mapper.PrometheusRuleId{ - Namespace: "openshift-monitoring", + Namespace: "platform-namespace-1", Name: "openshift-platform-alerts", }, nil } diff --git a/pkg/management/list_rules.go b/pkg/management/list_rules.go index 24d92a8c1..bd24a8d63 100644 --- a/pkg/management/list_rules.go +++ b/pkg/management/list_rules.go @@ -85,7 +85,7 @@ func (c *client) matchesAlertRuleFilters(rule monitoringv1.Rule, pr monitoringv1 // Filter by source (platform or user-defined) if arOptions.Source != "" { prId := types.NamespacedName{Name: pr.Name, Namespace: pr.Namespace} - isPlatform := IsPlatformAlertRule(prId) + isPlatform := c.IsPlatformAlertRule(prId) if arOptions.Source == "platform" && !isPlatform { return false diff --git a/pkg/management/list_rules_test.go b/pkg/management/list_rules_test.go index 3003801b2..802863d4c 100644 --- a/pkg/management/list_rules_test.go +++ b/pkg/management/list_rules_test.go @@ -12,6 +12,7 @@ import ( "github.com/openshift/monitoring-plugin/pkg/k8s" "github.com/openshift/monitoring-plugin/pkg/management" + "github.com/openshift/monitoring-plugin/pkg/management/mapper" "github.com/openshift/monitoring-plugin/pkg/management/testutils" ) @@ -28,12 +29,28 @@ var _ = Describe("ListRules", func() { ctx = context.Background() mockPR = &testutils.MockPrometheusRuleInterface{} + mockNSInformer := &testutils.MockNamespaceInformerInterface{} + mockNSInformer.SetMonitoringNamespaces(map[string]bool{ + "platform-namespace-1": true, + "platform-namespace-2": true, + }) mockK8s = &testutils.MockClient{ PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { return mockPR }, + NamespaceInformerFunc: func() k8s.NamespaceInformerInterface { + return mockNSInformer + }, + } + mockMapper = &testutils.MockMapperClient{ + GetAlertingRuleIdFunc: func(rule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { + return mapper.PrometheusAlertRuleId(rule.Alert) + }, + FindAlertRuleByIdFunc: func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { + // Mock successful lookup for all alert rules + return &mapper.PrometheusRuleId{}, nil + }, } - mockMapper = &testutils.MockMapperClient{} client = management.NewWithCustomMapper(ctx, mockK8s, mockMapper) }) @@ -337,7 +354,7 @@ var _ = Describe("ListRules", func() { platformRule := &monitoringv1.PrometheusRule{ ObjectMeta: metav1.ObjectMeta{ Name: "openshift-platform-alerts", - Namespace: "openshift-monitoring", + Namespace: "platform-namespace-1", }, Spec: monitoringv1.PrometheusRuleSpec{ Groups: []monitoringv1.RuleGroup{ @@ -356,7 +373,7 @@ var _ = Describe("ListRules", func() { mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ "monitoring/test-alerts": prometheusRule, - "openshift-monitoring/openshift-platform-alerts": platformRule, + "platform-namespace-1/openshift-platform-alerts": platformRule, }) prOptions := management.PrometheusRuleOptions{} @@ -375,7 +392,7 @@ var _ = Describe("ListRules", func() { platformRule := &monitoringv1.PrometheusRule{ ObjectMeta: metav1.ObjectMeta{ Name: "openshift-platform-alerts", - Namespace: "openshift-monitoring", + Namespace: "platform-namespace-1", }, Spec: monitoringv1.PrometheusRuleSpec{ Groups: []monitoringv1.RuleGroup{ @@ -394,7 +411,7 @@ var _ = Describe("ListRules", func() { mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ "monitoring/test-alerts": prometheusRule, - "openshift-monitoring/openshift-platform-alerts": platformRule, + "platform-namespace-1/openshift-platform-alerts": platformRule, }) prOptions := management.PrometheusRuleOptions{} diff --git a/pkg/management/management.go b/pkg/management/management.go index 7135755b6..a42f2dcbe 100644 --- a/pkg/management/management.go +++ b/pkg/management/management.go @@ -1,8 +1,6 @@ package management import ( - "strings" - "k8s.io/apimachinery/pkg/types" "github.com/openshift/monitoring-plugin/pkg/k8s" @@ -14,6 +12,6 @@ type client struct { mapper mapper.Client } -func IsPlatformAlertRule(prId types.NamespacedName) bool { - return strings.HasPrefix(prId.Namespace, "openshift-") +func (c *client) IsPlatformAlertRule(prId types.NamespacedName) bool { + return c.k8sClient.NamespaceInformer().IsClusterMonitoringNamespace(prId.Namespace) } diff --git a/pkg/management/mapper/mapper.go b/pkg/management/mapper/mapper.go index 4941270b9..f2f9a325f 100644 --- a/pkg/management/mapper/mapper.go +++ b/pkg/management/mapper/mapper.go @@ -14,6 +14,7 @@ import ( osmv1 "github.com/openshift/api/monitoring/v1" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/cache" "github.com/openshift/monitoring-plugin/pkg/k8s" ) @@ -101,8 +102,8 @@ func (m *mapper) WatchPrometheusRules(ctx context.Context) { OnUpdate: func(pr *monitoringv1.PrometheusRule) { m.AddPrometheusRule(pr) }, - OnDelete: func(pr *monitoringv1.PrometheusRule) { - m.DeletePrometheusRule(pr) + OnDelete: func(key cache.ObjectName) { + m.DeletePrometheusRule(key) }, } @@ -135,11 +136,11 @@ func (m *mapper) AddPrometheusRule(pr *monitoringv1.PrometheusRule) { m.prometheusRules[promRuleId] = rules } -func (m *mapper) DeletePrometheusRule(pr *monitoringv1.PrometheusRule) { +func (m *mapper) DeletePrometheusRule(key cache.ObjectName) { m.mu.Lock() defer m.mu.Unlock() - delete(m.prometheusRules, PrometheusRuleId(types.NamespacedName{Namespace: pr.Namespace, Name: pr.Name})) + delete(m.prometheusRules, PrometheusRuleId(key)) } func (m *mapper) WatchAlertRelabelConfigs(ctx context.Context) { @@ -151,8 +152,8 @@ func (m *mapper) WatchAlertRelabelConfigs(ctx context.Context) { OnUpdate: func(arc *osmv1.AlertRelabelConfig) { m.AddAlertRelabelConfig(arc) }, - OnDelete: func(arc *osmv1.AlertRelabelConfig) { - m.DeleteAlertRelabelConfig(arc) + OnDelete: func(key cache.ObjectName) { + m.DeleteAlertRelabelConfig(key) }, } @@ -214,11 +215,11 @@ func parseAlertnameFromRelabelConfig(config osmv1.RelabelConfig) string { return "" } -func (m *mapper) DeleteAlertRelabelConfig(arc *osmv1.AlertRelabelConfig) { +func (m *mapper) DeleteAlertRelabelConfig(key cache.ObjectName) { m.mu.Lock() defer m.mu.Unlock() - arcId := AlertRelabelConfigId(types.NamespacedName{Namespace: arc.Namespace, Name: arc.Name}) + arcId := AlertRelabelConfigId(key) delete(m.alertRelabelConfigs, arcId) } diff --git a/pkg/management/mapper/mapper_test.go b/pkg/management/mapper/mapper_test.go index fff7158ca..ceae3c594 100644 --- a/pkg/management/mapper/mapper_test.go +++ b/pkg/management/mapper/mapper_test.go @@ -9,6 +9,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/tools/cache" "github.com/openshift/monitoring-plugin/pkg/management/mapper" "github.com/openshift/monitoring-plugin/pkg/management/testutils" @@ -320,7 +321,7 @@ var _ = Describe("Mapper", func() { Expect(err).ToNot(HaveOccurred()) By("deleting the PrometheusRule") - mapperClient.DeletePrometheusRule(pr) + mapperClient.DeletePrometheusRule(cache.ObjectName(types.NamespacedName{Namespace: pr.Namespace, Name: pr.Name})) By("verifying the rule is no longer found") _, err = mapperClient.FindAlertRuleById(ruleId) @@ -338,7 +339,7 @@ var _ = Describe("Mapper", func() { By("deleting the non-existent PrometheusRule") Expect(func() { - mapperClient.DeletePrometheusRule(pr) + mapperClient.DeletePrometheusRule(cache.ObjectName(types.NamespacedName{Namespace: pr.Namespace, Name: pr.Name})) }).NotTo(Panic()) By("verifying mapper still works after delete attempt") @@ -635,7 +636,7 @@ var _ = Describe("Mapper", func() { Expect(specs).To(HaveLen(1)) By("deleting the AlertRelabelConfig") - mapperClient.DeleteAlertRelabelConfig(arc) + mapperClient.DeleteAlertRelabelConfig(cache.ObjectName(types.NamespacedName{Namespace: arc.Namespace, Name: arc.Name})) By("verifying it's no longer found") specs = mapperClient.GetAlertRelabelConfigSpec(alertRule) @@ -656,7 +657,7 @@ var _ = Describe("Mapper", func() { By("deleting the non-existent AlertRelabelConfig") Expect(func() { - mapperClient.DeleteAlertRelabelConfig(arc) + mapperClient.DeleteAlertRelabelConfig(cache.ObjectName(types.NamespacedName{Namespace: arc.Namespace, Name: arc.Name})) }).NotTo(Panic()) By("verifying mapper still works after delete attempt") diff --git a/pkg/management/mapper/types.go b/pkg/management/mapper/types.go index f662a4d84..8929ea1af 100644 --- a/pkg/management/mapper/types.go +++ b/pkg/management/mapper/types.go @@ -6,6 +6,7 @@ import ( osmv1 "github.com/openshift/api/monitoring/v1" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/cache" ) // PrometheusRuleId is a unique identifier for a PrometheusRule resource in Kubernetes, represented by its NamespacedName. @@ -32,7 +33,7 @@ type Client interface { AddPrometheusRule(pr *monitoringv1.PrometheusRule) // DeletePrometheusRule removes a PrometheusRule from the mapper. - DeletePrometheusRule(pr *monitoringv1.PrometheusRule) + DeletePrometheusRule(key cache.ObjectName) // WatchAlertRelabelConfigs starts watching for changes to AlertRelabelConfigs. WatchAlertRelabelConfigs(ctx context.Context) @@ -41,7 +42,7 @@ type Client interface { AddAlertRelabelConfig(arc *osmv1.AlertRelabelConfig) // DeleteAlertRelabelConfig removes an AlertRelabelConfig from the mapper. - DeleteAlertRelabelConfig(arc *osmv1.AlertRelabelConfig) + DeleteAlertRelabelConfig(key cache.ObjectName) // GetAlertRelabelConfigSpec returns the RelabelConfigs that match the given alert rule's labels. GetAlertRelabelConfigSpec(alertRule *monitoringv1.Rule) []osmv1.RelabelConfig diff --git a/pkg/management/testutils/k8s_client_mock.go b/pkg/management/testutils/k8s_client_mock.go index 7849c5a0b..cd860d9cb 100644 --- a/pkg/management/testutils/k8s_client_mock.go +++ b/pkg/management/testutils/k8s_client_mock.go @@ -3,9 +3,10 @@ package testutils import ( "context" + "k8s.io/apimachinery/pkg/types" + osmv1 "github.com/openshift/api/monitoring/v1" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - "k8s.io/apimachinery/pkg/types" "github.com/openshift/monitoring-plugin/pkg/k8s" ) @@ -18,6 +19,7 @@ type MockClient struct { PrometheusRuleInformerFunc func() k8s.PrometheusRuleInformerInterface AlertRelabelConfigsFunc func() k8s.AlertRelabelConfigInterface AlertRelabelConfigInformerFunc func() k8s.AlertRelabelConfigInformerInterface + NamespaceInformerFunc func() k8s.NamespaceInformerInterface } // TestConnection mocks the TestConnection method @@ -68,6 +70,14 @@ func (m *MockClient) AlertRelabelConfigInformer() k8s.AlertRelabelConfigInformer return &MockAlertRelabelConfigInformerInterface{} } +// NamespaceInformer mocks the NamespaceInformer method +func (m *MockClient) NamespaceInformer() k8s.NamespaceInformerInterface { + if m.NamespaceInformerFunc != nil { + return m.NamespaceInformerFunc() + } + return &MockNamespaceInformerInterface{} +} + // MockPrometheusAlertsInterface is a mock implementation of k8s.PrometheusAlertsInterface type MockPrometheusAlertsInterface struct { GetAlertsFunc func(ctx context.Context, req k8s.GetAlertsRequest) ([]k8s.PrometheusAlert, error) @@ -216,7 +226,16 @@ func (m *MockPrometheusRuleInterface) AddRule(ctx context.Context, namespacedNam // MockPrometheusRuleInformerInterface is a mock implementation of k8s.PrometheusRuleInformerInterface type MockPrometheusRuleInformerInterface struct { - RunFunc func(ctx context.Context, callbacks k8s.PrometheusRuleInformerCallback) error + RunFunc func(ctx context.Context, callbacks k8s.PrometheusRuleInformerCallback) error + ListFunc func(ctx context.Context, namespace string) ([]monitoringv1.PrometheusRule, error) + GetFunc func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) + + // Storage for test data + PrometheusRules map[string]*monitoringv1.PrometheusRule +} + +func (m *MockPrometheusRuleInformerInterface) SetPrometheusRules(rules map[string]*monitoringv1.PrometheusRule) { + m.PrometheusRules = rules } // Run mocks the Run method @@ -230,6 +249,39 @@ func (m *MockPrometheusRuleInformerInterface) Run(ctx context.Context, callbacks return ctx.Err() } +// List mocks the List method +func (m *MockPrometheusRuleInformerInterface) List(ctx context.Context, namespace string) ([]monitoringv1.PrometheusRule, error) { + if m.ListFunc != nil { + return m.ListFunc(ctx, namespace) + } + + var rules []monitoringv1.PrometheusRule + if m.PrometheusRules != nil { + for _, rule := range m.PrometheusRules { + if namespace == "" || rule.Namespace == namespace { + rules = append(rules, *rule) + } + } + } + return rules, nil +} + +// Get mocks the Get method +func (m *MockPrometheusRuleInformerInterface) Get(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + if m.GetFunc != nil { + return m.GetFunc(ctx, namespace, name) + } + + key := namespace + "/" + name + if m.PrometheusRules != nil { + if rule, exists := m.PrometheusRules[key]; exists { + return rule, true, nil + } + } + + return nil, false, nil +} + // MockAlertRelabelConfigInterface is a mock implementation of k8s.AlertRelabelConfigInterface type MockAlertRelabelConfigInterface struct { ListFunc func(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) @@ -322,7 +374,16 @@ func (m *MockAlertRelabelConfigInterface) Delete(ctx context.Context, namespace // MockAlertRelabelConfigInformerInterface is a mock implementation of k8s.AlertRelabelConfigInformerInterface type MockAlertRelabelConfigInformerInterface struct { - RunFunc func(ctx context.Context, callbacks k8s.AlertRelabelConfigInformerCallback) error + RunFunc func(ctx context.Context, callbacks k8s.AlertRelabelConfigInformerCallback) error + ListFunc func(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) + GetFunc func(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) + + // Storage for test data + AlertRelabelConfigs map[string]*osmv1.AlertRelabelConfig +} + +func (m *MockAlertRelabelConfigInformerInterface) SetAlertRelabelConfigs(configs map[string]*osmv1.AlertRelabelConfig) { + m.AlertRelabelConfigs = configs } // Run mocks the Run method @@ -335,3 +396,61 @@ func (m *MockAlertRelabelConfigInformerInterface) Run(ctx context.Context, callb <-ctx.Done() return ctx.Err() } + +// List mocks the List method +func (m *MockAlertRelabelConfigInformerInterface) List(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) { + if m.ListFunc != nil { + return m.ListFunc(ctx, namespace) + } + + var configs []osmv1.AlertRelabelConfig + if m.AlertRelabelConfigs != nil { + for _, config := range m.AlertRelabelConfigs { + if namespace == "" || config.Namespace == namespace { + configs = append(configs, *config) + } + } + } + return configs, nil +} + +// Get mocks the Get method +func (m *MockAlertRelabelConfigInformerInterface) Get(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) { + if m.GetFunc != nil { + return m.GetFunc(ctx, namespace, name) + } + + key := namespace + "/" + name + if m.AlertRelabelConfigs != nil { + if config, exists := m.AlertRelabelConfigs[key]; exists { + return config, true, nil + } + } + + return nil, false, nil +} + +// MockNamespaceInformerInterface is a mock implementation of k8s.NamespaceInformerInterface +type MockNamespaceInformerInterface struct { + IsClusterMonitoringNamespaceFunc func(name string) bool + + // Storage for test data + MonitoringNamespaces map[string]bool +} + +func (m *MockNamespaceInformerInterface) SetMonitoringNamespaces(namespaces map[string]bool) { + m.MonitoringNamespaces = namespaces +} + +// IsClusterMonitoringNamespace mocks the IsClusterMonitoringNamespace method +func (m *MockNamespaceInformerInterface) IsClusterMonitoringNamespace(name string) bool { + if m.IsClusterMonitoringNamespaceFunc != nil { + return m.IsClusterMonitoringNamespaceFunc(name) + } + + if m.MonitoringNamespaces != nil { + return m.MonitoringNamespaces[name] + } + + return false +} diff --git a/pkg/management/testutils/mapper_mock.go b/pkg/management/testutils/mapper_mock.go index e353a3d55..79d1aa53b 100644 --- a/pkg/management/testutils/mapper_mock.go +++ b/pkg/management/testutils/mapper_mock.go @@ -5,6 +5,7 @@ import ( osmv1 "github.com/openshift/api/monitoring/v1" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/client-go/tools/cache" "github.com/openshift/monitoring-plugin/pkg/management/mapper" ) @@ -17,10 +18,10 @@ type MockMapperClient struct { FindAlertRuleByIdFunc func(alertRuleId mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) WatchPrometheusRulesFunc func(ctx context.Context) AddPrometheusRuleFunc func(pr *monitoringv1.PrometheusRule) - DeletePrometheusRuleFunc func(pr *monitoringv1.PrometheusRule) + DeletePrometheusRuleFunc func(key cache.ObjectName) WatchAlertRelabelConfigsFunc func(ctx context.Context) AddAlertRelabelConfigFunc func(arc *osmv1.AlertRelabelConfig) - DeleteAlertRelabelConfigFunc func(arc *osmv1.AlertRelabelConfig) + DeleteAlertRelabelConfigFunc func(key cache.ObjectName) GetAlertRelabelConfigSpecFunc func(alertRule *monitoringv1.Rule) []osmv1.RelabelConfig } @@ -50,9 +51,9 @@ func (m *MockMapperClient) AddPrometheusRule(pr *monitoringv1.PrometheusRule) { } } -func (m *MockMapperClient) DeletePrometheusRule(pr *monitoringv1.PrometheusRule) { +func (m *MockMapperClient) DeletePrometheusRule(key cache.ObjectName) { if m.DeletePrometheusRuleFunc != nil { - m.DeletePrometheusRuleFunc(pr) + m.DeletePrometheusRuleFunc(key) } } @@ -68,9 +69,9 @@ func (m *MockMapperClient) AddAlertRelabelConfig(arc *osmv1.AlertRelabelConfig) } } -func (m *MockMapperClient) DeleteAlertRelabelConfig(arc *osmv1.AlertRelabelConfig) { +func (m *MockMapperClient) DeleteAlertRelabelConfig(key cache.ObjectName) { if m.DeleteAlertRelabelConfigFunc != nil { - m.DeleteAlertRelabelConfigFunc(arc) + m.DeleteAlertRelabelConfigFunc(key) } } diff --git a/pkg/management/update_platform_alert_rule.go b/pkg/management/update_platform_alert_rule.go index 4270ce4e2..80248cc08 100644 --- a/pkg/management/update_platform_alert_rule.go +++ b/pkg/management/update_platform_alert_rule.go @@ -14,15 +14,13 @@ import ( "github.com/openshift/monitoring-plugin/pkg/management/mapper" ) -const openshiftMonitoringNamespace = "openshift-monitoring" - func (c *client) UpdatePlatformAlertRule(ctx context.Context, alertRuleId string, alertRule monitoringv1.Rule) error { prId, err := c.mapper.FindAlertRuleById(mapper.PrometheusAlertRuleId(alertRuleId)) if err != nil { return err } - if !IsPlatformAlertRule(types.NamespacedName(*prId)) { + if !c.IsPlatformAlertRule(types.NamespacedName(*prId)) { return errors.New("cannot update non-platform alert rule from " + prId.Namespace + "/" + prId.Name) } @@ -36,7 +34,7 @@ func (c *client) UpdatePlatformAlertRule(ctx context.Context, alertRuleId string return errors.New("no label changes detected; platform alert rules can only have labels updated") } - return c.applyLabelChangesViaAlertRelabelConfig(ctx, alertRuleId, originalRule.Alert, labelChanges) + return c.applyLabelChangesViaAlertRelabelConfig(ctx, prId.Namespace, alertRuleId, originalRule.Alert, labelChanges) } func (c *client) getOriginalPlatformRule(ctx context.Context, prId *mapper.PrometheusRuleId, alertRuleId string) (*monitoringv1.Rule, error) { @@ -100,12 +98,12 @@ func calculateLabelChanges(originalLabels, newLabels map[string]string) []labelC return changes } -func (c *client) applyLabelChangesViaAlertRelabelConfig(ctx context.Context, alertRuleId string, alertName string, changes []labelChange) error { +func (c *client) applyLabelChangesViaAlertRelabelConfig(ctx context.Context, namespace string, alertRuleId string, alertName string, changes []labelChange) error { arcName := fmt.Sprintf("alertmanagement-%s", strings.ToLower(strings.ReplaceAll(alertRuleId, "/", "-"))) - existingArc, found, err := c.k8sClient.AlertRelabelConfigs().Get(ctx, openshiftMonitoringNamespace, arcName) + existingArc, found, err := c.k8sClient.AlertRelabelConfigs().Get(ctx, namespace, arcName) if err != nil { - return fmt.Errorf("failed to get AlertRelabelConfig %s/%s: %w", openshiftMonitoringNamespace, arcName, err) + return fmt.Errorf("failed to get AlertRelabelConfig %s/%s: %w", namespace, arcName, err) } relabelConfigs := c.buildRelabelConfigs(alertName, changes) @@ -125,7 +123,7 @@ func (c *client) applyLabelChangesViaAlertRelabelConfig(ctx context.Context, ale arc = &osmv1.AlertRelabelConfig{ ObjectMeta: metav1.ObjectMeta{ Name: arcName, - Namespace: openshiftMonitoringNamespace, + Namespace: namespace, }, Spec: osmv1.AlertRelabelConfigSpec{ Configs: relabelConfigs, diff --git a/pkg/management/update_platform_alert_rule_test.go b/pkg/management/update_platform_alert_rule_test.go index a89eedc9a..93ee1b054 100644 --- a/pkg/management/update_platform_alert_rule_test.go +++ b/pkg/management/update_platform_alert_rule_test.go @@ -32,6 +32,11 @@ var _ = Describe("UpdatePlatformAlertRule", func() { mockPR = &testutils.MockPrometheusRuleInterface{} mockARC = &testutils.MockAlertRelabelConfigInterface{} + mockNSInformer := &testutils.MockNamespaceInformerInterface{} + mockNSInformer.SetMonitoringNamespaces(map[string]bool{ + "platform-namespace-1": true, + "platform-namespace-2": true, + }) mockK8s = &testutils.MockClient{ PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { return mockPR @@ -39,6 +44,9 @@ var _ = Describe("UpdatePlatformAlertRule", func() { AlertRelabelConfigsFunc: func() k8s.AlertRelabelConfigInterface { return mockARC }, + NamespaceInformerFunc: func() k8s.NamespaceInformerInterface { + return mockNSInformer + }, } mockMapper = &testutils.MockMapperClient{} @@ -60,7 +68,7 @@ var _ = Describe("UpdatePlatformAlertRule", func() { prometheusRule := &monitoringv1.PrometheusRule{ ObjectMeta: metav1.ObjectMeta{ Name: "openshift-platform-alerts", - Namespace: "openshift-monitoring", + Namespace: "platform-namespace-1", }, Spec: monitoringv1.PrometheusRuleSpec{ Groups: []monitoringv1.RuleGroup{ @@ -73,13 +81,13 @@ var _ = Describe("UpdatePlatformAlertRule", func() { } mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "openshift-monitoring/openshift-platform-alerts": prometheusRule, + "platform-namespace-1/openshift-platform-alerts": prometheusRule, }) alertRuleId := "test-platform-rule-id" mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { return &mapper.PrometheusRuleId{ - Namespace: "openshift-monitoring", + Namespace: "platform-namespace-1", Name: "openshift-platform-alerts", }, nil } @@ -105,12 +113,12 @@ var _ = Describe("UpdatePlatformAlertRule", func() { Expect(err).ToNot(HaveOccurred()) By("verifying AlertRelabelConfig was created") - arcs, err := mockARC.List(ctx, "openshift-monitoring") + arcs, err := mockARC.List(ctx, "platform-namespace-1") Expect(err).ToNot(HaveOccurred()) Expect(arcs).To(HaveLen(1)) arc := arcs[0] - Expect(arc.Namespace).To(Equal("openshift-monitoring")) + Expect(arc.Namespace).To(Equal("platform-namespace-1")) Expect(arc.Name).To(Equal("alertmanagement-test-platform-rule-id")) By("verifying relabel configs include label updates with alertname matching") @@ -149,7 +157,7 @@ var _ = Describe("UpdatePlatformAlertRule", func() { prometheusRule := &monitoringv1.PrometheusRule{ ObjectMeta: metav1.ObjectMeta{ Name: "openshift-platform-alerts", - Namespace: "openshift-monitoring", + Namespace: "platform-namespace-1", }, Spec: monitoringv1.PrometheusRuleSpec{ Groups: []monitoringv1.RuleGroup{ @@ -164,7 +172,7 @@ var _ = Describe("UpdatePlatformAlertRule", func() { existingARC := &osmv1.AlertRelabelConfig{ ObjectMeta: metav1.ObjectMeta{ Name: "test-platform-rule-id-relabel", - Namespace: "openshift-monitoring", + Namespace: "platform-namespace-1", }, Spec: osmv1.AlertRelabelConfigSpec{ Configs: []osmv1.RelabelConfig{ @@ -178,16 +186,16 @@ var _ = Describe("UpdatePlatformAlertRule", func() { } mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "openshift-monitoring/openshift-platform-alerts": prometheusRule, + "platform-namespace-1/openshift-platform-alerts": prometheusRule, }) mockARC.SetAlertRelabelConfigs(map[string]*osmv1.AlertRelabelConfig{ - "openshift-monitoring/alertmanagement-test-platform-rule-id": existingARC, + "platform-namespace-1/alertmanagement-test-platform-rule-id": existingARC, }) alertRuleId := "test-platform-rule-id" mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { return &mapper.PrometheusRuleId{ - Namespace: "openshift-monitoring", + Namespace: "platform-namespace-1", Name: "openshift-platform-alerts", }, nil } @@ -211,7 +219,7 @@ var _ = Describe("UpdatePlatformAlertRule", func() { Expect(err).ToNot(HaveOccurred()) By("verifying existing AlertRelabelConfig was updated") - arc, found, err := mockARC.Get(ctx, "openshift-monitoring", "alertmanagement-test-platform-rule-id") + arc, found, err := mockARC.Get(ctx, "platform-namespace-1", "alertmanagement-test-platform-rule-id") Expect(found).To(BeTrue()) Expect(err).ToNot(HaveOccurred()) Expect(arc.Spec.Configs).To(HaveLen(1)) @@ -236,7 +244,7 @@ var _ = Describe("UpdatePlatformAlertRule", func() { prometheusRule := &monitoringv1.PrometheusRule{ ObjectMeta: metav1.ObjectMeta{ Name: "openshift-platform-alerts", - Namespace: "openshift-monitoring", + Namespace: "platform-namespace-1", }, Spec: monitoringv1.PrometheusRuleSpec{ Groups: []monitoringv1.RuleGroup{ @@ -249,13 +257,13 @@ var _ = Describe("UpdatePlatformAlertRule", func() { } mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "openshift-monitoring/openshift-platform-alerts": prometheusRule, + "platform-namespace-1/openshift-platform-alerts": prometheusRule, }) alertRuleId := "test-platform-rule-id" mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { return &mapper.PrometheusRuleId{ - Namespace: "openshift-monitoring", + Namespace: "platform-namespace-1", Name: "openshift-platform-alerts", }, nil } @@ -279,7 +287,7 @@ var _ = Describe("UpdatePlatformAlertRule", func() { Expect(err).ToNot(HaveOccurred()) By("verifying AlertRelabelConfig includes label removal actions") - arcs, err := mockARC.List(ctx, "openshift-monitoring") + arcs, err := mockARC.List(ctx, "platform-namespace-1") Expect(err).ToNot(HaveOccurred()) Expect(arcs).To(HaveLen(1)) @@ -333,7 +341,7 @@ var _ = Describe("UpdatePlatformAlertRule", func() { prometheusRule := &monitoringv1.PrometheusRule{ ObjectMeta: metav1.ObjectMeta{ Name: "openshift-platform-alerts", - Namespace: "openshift-monitoring", + Namespace: "platform-namespace-1", }, Spec: monitoringv1.PrometheusRuleSpec{ Groups: []monitoringv1.RuleGroup{ @@ -346,13 +354,13 @@ var _ = Describe("UpdatePlatformAlertRule", func() { } mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "openshift-monitoring/openshift-platform-alerts": prometheusRule, + "platform-namespace-1/openshift-platform-alerts": prometheusRule, }) alertRuleId := "test-platform-rule-id" mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { return &mapper.PrometheusRuleId{ - Namespace: "openshift-monitoring", + Namespace: "platform-namespace-1", Name: "openshift-platform-alerts", }, nil } diff --git a/pkg/management/update_user_defined_alert_rule.go b/pkg/management/update_user_defined_alert_rule.go index ebfe1b7cb..a9ac7bc8d 100644 --- a/pkg/management/update_user_defined_alert_rule.go +++ b/pkg/management/update_user_defined_alert_rule.go @@ -16,7 +16,7 @@ func (c *client) UpdateUserDefinedAlertRule(ctx context.Context, alertRuleId str return err } - if IsPlatformAlertRule(types.NamespacedName(*prId)) { + if c.IsPlatformAlertRule(types.NamespacedName(*prId)) { return fmt.Errorf("cannot update alert rule in a platform-managed PrometheusRule") } diff --git a/pkg/management/update_user_defined_alert_rule_test.go b/pkg/management/update_user_defined_alert_rule_test.go index 1b2460807..2380381b5 100644 --- a/pkg/management/update_user_defined_alert_rule_test.go +++ b/pkg/management/update_user_defined_alert_rule_test.go @@ -28,10 +28,18 @@ var _ = Describe("UpdateUserDefinedAlertRule", func() { ctx = context.Background() mockPR = &testutils.MockPrometheusRuleInterface{} + mockNSInformer := &testutils.MockNamespaceInformerInterface{} + mockNSInformer.SetMonitoringNamespaces(map[string]bool{ + "platform-namespace-1": true, + "platform-namespace-2": true, + }) mockK8s = &testutils.MockClient{ PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { return mockPR }, + NamespaceInformerFunc: func() k8s.NamespaceInformerInterface { + return mockNSInformer + }, } mockMapper = &testutils.MockMapperClient{} @@ -231,7 +239,7 @@ var _ = Describe("UpdateUserDefinedAlertRule", func() { alertRuleId := "platform-rule-id" mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { return &mapper.PrometheusRuleId{ - Namespace: "openshift-monitoring", + Namespace: "platform-namespace-1", Name: "openshift-platform-rules", }, nil } From f622f25d4b696f40be53272bccd023cdd6667463 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Vila=C3=A7a?= Date: Wed, 10 Dec 2025 12:13:51 +0000 Subject: [PATCH 3/4] Set source label to platform on OpenShift alerting rules (#3) Signed-off-by: machadovilaca --- pkg/management/get_rule_by_id.go | 11 ++++++++++- pkg/management/get_rule_by_id_test.go | 9 +++++++++ pkg/management/list_rules.go | 20 +++++++++++++++++++- pkg/management/list_rules_test.go | 1 + 4 files changed, 39 insertions(+), 2 deletions(-) diff --git a/pkg/management/get_rule_by_id.go b/pkg/management/get_rule_by_id.go index 524aeaeb9..c9af605c1 100644 --- a/pkg/management/get_rule_by_id.go +++ b/pkg/management/get_rule_by_id.go @@ -5,6 +5,7 @@ import ( "fmt" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "k8s.io/apimachinery/pkg/types" "github.com/openshift/monitoring-plugin/pkg/management/mapper" ) @@ -37,7 +38,15 @@ func (c *client) GetRuleById(ctx context.Context, alertRuleId string) (monitorin } if rule != nil { - return c.updateRuleBasedOnRelabelConfig(rule) + ruleWithRelabel, err := c.updateRuleBasedOnRelabelConfig(rule) + if err != nil { + return monitoringv1.Rule{}, err + } + + isPlatformRule := c.IsPlatformAlertRule(types.NamespacedName(*prId)) + c.addPlatformSourceLabel(&ruleWithRelabel, isPlatformRule) + + return ruleWithRelabel, nil } return monitoringv1.Rule{}, fmt.Errorf("alert rule with id %s not found in PrometheusRule %s/%s", alertRuleId, prId.Namespace, prId.Name) diff --git a/pkg/management/get_rule_by_id_test.go b/pkg/management/get_rule_by_id_test.go index 27e61d94a..f467632b5 100644 --- a/pkg/management/get_rule_by_id_test.go +++ b/pkg/management/get_rule_by_id_test.go @@ -23,6 +23,7 @@ var _ = Describe("GetRuleById", func() { ctx context.Context mockK8s *testutils.MockClient mockPR *testutils.MockPrometheusRuleInterface + mockNS *testutils.MockNamespaceInformerInterface mockMapper *testutils.MockMapperClient client management.Client ) @@ -31,10 +32,17 @@ var _ = Describe("GetRuleById", func() { ctx = context.Background() mockPR = &testutils.MockPrometheusRuleInterface{} + mockNS = &testutils.MockNamespaceInformerInterface{} + mockNS.SetMonitoringNamespaces(map[string]bool{ + "monitoring": true, + }) mockK8s = &testutils.MockClient{ PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { return mockPR }, + NamespaceInformerFunc: func() k8s.NamespaceInformerInterface { + return mockNS + }, } mockMapper = &testutils.MockMapperClient{} @@ -104,6 +112,7 @@ var _ = Describe("GetRuleById", func() { By("verifying the returned rule is correct") Expect(rule.Alert).To(Equal("TestAlert2")) Expect(rule.Expr.String()).To(Equal("cpu > 80")) + Expect(rule.Labels).To(HaveKeyWithValue("source", "platform")) Expect(rule.Annotations).To(HaveKeyWithValue("summary", "High CPU usage")) }) diff --git a/pkg/management/list_rules.go b/pkg/management/list_rules.go index bd24a8d63..2d5307dba 100644 --- a/pkg/management/list_rules.go +++ b/pkg/management/list_rules.go @@ -11,7 +11,11 @@ import ( "github.com/openshift/monitoring-plugin/pkg/management/mapper" ) -const alertRuleIdLabel = "alert_rule_id" +const ( + alertRuleIdLabel = "alert_rule_id" + sourceLabel = "source" + platformSourceValue = "platform" +) func (c *client) ListRules(ctx context.Context, prOptions PrometheusRuleOptions, arOptions AlertRuleOptions) ([]monitoringv1.Rule, error) { if prOptions.Name != "" && prOptions.Namespace == "" { @@ -47,6 +51,8 @@ func (c *client) ListRules(ctx context.Context, prOptions PrometheusRuleOptions, func (c *client) extractAndFilterRules(pr monitoringv1.PrometheusRule, prOptions *PrometheusRuleOptions, arOptions *AlertRuleOptions) []monitoringv1.Rule { var rules []monitoringv1.Rule + prId := types.NamespacedName{Name: pr.Name, Namespace: pr.Namespace} + isPlatformRule := c.IsPlatformAlertRule(prId) for _, group := range pr.Spec.Groups { // Filter by group name if specified @@ -68,6 +74,7 @@ func (c *client) extractAndFilterRules(pr monitoringv1.PrometheusRule, prOptions // Parse and update the rule based on relabeling configurations r := c.parseRule(rule) if r != nil { + c.addPlatformSourceLabel(r, isPlatformRule) rules = append(rules, *r) } } @@ -76,6 +83,17 @@ func (c *client) extractAndFilterRules(pr monitoringv1.PrometheusRule, prOptions return rules } +func (c *client) addPlatformSourceLabel(rule *monitoringv1.Rule, isPlatformRule bool) { + if rule == nil || !isPlatformRule { + return + } + + if rule.Labels == nil { + rule.Labels = make(map[string]string) + } + rule.Labels[sourceLabel] = platformSourceValue +} + func (c *client) matchesAlertRuleFilters(rule monitoringv1.Rule, pr monitoringv1.PrometheusRule, arOptions *AlertRuleOptions) bool { // Filter by alert name if arOptions.Name != "" && string(rule.Alert) != arOptions.Name { diff --git a/pkg/management/list_rules_test.go b/pkg/management/list_rules_test.go index 802863d4c..61bb1162b 100644 --- a/pkg/management/list_rules_test.go +++ b/pkg/management/list_rules_test.go @@ -386,6 +386,7 @@ var _ = Describe("ListRules", func() { Expect(err).ToNot(HaveOccurred()) Expect(rules).To(HaveLen(1)) Expect(rules[0].Alert).To(Equal("PlatformAlert")) + Expect(rules[0].Labels).To(HaveKeyWithValue("source", "platform")) }) It("should filter by source user-defined", func() { From 7f4226284b637cb4e52a7eb69bf7d9e12baf1576 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Vila=C3=A7a?= Date: Wed, 17 Dec 2025 13:02:15 +0000 Subject: [PATCH 4/4] Add persistent relabeled alerts rules (#5) Signed-off-by: machadovilaca --- Makefile | 4 + cmd/plugin-backend.go | 2 +- go.mod | 19 +- go.sum | 40 +- internal/managementrouter/alerts_get_test.go | 2 +- ...ser_defined_alert_rule_bulk_delete_test.go | 240 ++--- ...er_defined_alert_rule_delete_by_id_test.go | 199 ++-- pkg/alert_rule/alert_rule.go | 65 ++ pkg/k8s/alert_relabel_config.go | 58 +- pkg/k8s/alert_relabel_config_informer.go | 85 -- pkg/k8s/client.go | 47 +- .../{namespace_informer.go => namespace.go} | 55 +- pkg/k8s/prometheus_alerts.go | 2 +- pkg/k8s/prometheus_rule.go | 47 +- pkg/k8s/prometheus_rule_informer.go | 86 -- pkg/k8s/relabeled_rules.go | 422 +++++++++ pkg/k8s/types.go | 69 +- .../create_user_defined_alert_rule.go | 10 +- .../create_user_defined_alert_rule_test.go | 396 ++++---- .../delete_user_defined_alert_rule_by_id.go | 21 +- ...lete_user_defined_alert_rule_by_id_test.go | 770 +++++++--------- pkg/management/errors.go | 10 +- pkg/management/get_alerts.go | 39 +- pkg/management/get_alerts_test.go | 211 +++-- pkg/management/get_rule_by_id.go | 55 +- pkg/management/get_rule_by_id_test.go | 254 +++--- pkg/management/list_rules.go | 128 +-- pkg/management/list_rules_test.go | 527 ++++------- pkg/management/management.go | 4 +- pkg/management/management_suite_test.go | 6 + pkg/management/mapper/mapper.go | 287 ------ pkg/management/mapper/mapper_suite_test.go | 13 - pkg/management/mapper/mapper_test.go | 856 ------------------ pkg/management/mapper/new.go | 16 - pkg/management/mapper/types.go | 49 - pkg/management/new.go | 10 - pkg/management/relabel_config.go | 46 - pkg/management/relabel_config_test.go | 171 ---- pkg/management/testutils/k8s_client_mock.go | 177 +--- pkg/management/testutils/mapper_mock.go | 83 -- pkg/management/update_platform_alert_rule.go | 31 +- .../update_platform_alert_rule_test.go | 619 ++++++------- .../update_user_defined_alert_rule.go | 23 +- .../update_user_defined_alert_rule_test.go | 531 +++++++---- pkg/server.go | 22 +- test/e2e/alert_management_api_test.go | 334 +++++++ test/e2e/framework/framework.go | 95 ++ test/e2e/relabeled_rules_test.go | 318 +++++++ 48 files changed, 3310 insertions(+), 4244 deletions(-) create mode 100644 pkg/alert_rule/alert_rule.go delete mode 100644 pkg/k8s/alert_relabel_config_informer.go rename pkg/k8s/{namespace_informer.go => namespace.go} (61%) delete mode 100644 pkg/k8s/prometheus_rule_informer.go create mode 100644 pkg/k8s/relabeled_rules.go delete mode 100644 pkg/management/mapper/mapper.go delete mode 100644 pkg/management/mapper/mapper_suite_test.go delete mode 100644 pkg/management/mapper/mapper_test.go delete mode 100644 pkg/management/mapper/new.go delete mode 100644 pkg/management/mapper/types.go delete mode 100644 pkg/management/relabel_config.go delete mode 100644 pkg/management/relabel_config_test.go delete mode 100644 pkg/management/testutils/mapper_mock.go create mode 100644 test/e2e/alert_management_api_test.go create mode 100644 test/e2e/framework/framework.go create mode 100644 test/e2e/relabeled_rules_test.go diff --git a/Makefile b/Makefile index 9c6706886..20a641653 100644 --- a/Makefile +++ b/Makefile @@ -59,6 +59,10 @@ start-backend: test-backend: go test ./pkg/... ./internal/... -v +.PHONY: test-e2e +test-e2e: + PLUGIN_URL=http://localhost:9001 go test -v -timeout=150m -count=1 ./test/e2e + .PHONY: build-image build-image: ./scripts/build-image.sh diff --git a/cmd/plugin-backend.go b/cmd/plugin-backend.go index 0d1a3b165..c7b79d6da 100644 --- a/cmd/plugin-backend.go +++ b/cmd/plugin-backend.go @@ -17,7 +17,7 @@ var ( portArg = flag.Int("port", 0, "server port to listen on (default: 9443)\nports 9444 and 9445 reserved for other use") certArg = flag.String("cert", "", "cert file path to enable TLS (disabled by default)") keyArg = flag.String("key", "", "private key file path to enable TLS (disabled by default)") - featuresArg = flag.String("features", "", "enabled features, comma separated.\noptions: ['acm-alerting', 'incidents', 'dev-config', 'perses-dashboards', 'management-api']") + featuresArg = flag.String("features", "", "enabled features, comma separated.\noptions: ['acm-alerting', 'incidents', 'dev-config', 'perses-dashboards', 'alert-management-api']") staticPathArg = flag.String("static-path", "", "static files path to serve frontend (default: './web/dist')") configPathArg = flag.String("config-path", "", "config files path (default: './config')") pluginConfigArg = flag.String("plugin-config-path", "", "plugin yaml configuration") diff --git a/go.mod b/go.mod index 8cfe2772e..dbb42c311 100644 --- a/go.mod +++ b/go.mod @@ -14,6 +14,8 @@ require ( github.com/openshift/library-go v0.0.0-20240905123346-5bdbfe35a6f5 github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.87.0 github.com/prometheus-operator/prometheus-operator/pkg/client v0.87.0 + github.com/prometheus/common v0.67.4 + github.com/prometheus/prometheus v0.308.0 github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.11.1 gopkg.in/yaml.v2 v2.4.0 @@ -24,6 +26,7 @@ require ( ) require ( + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emicklei/go-restful/v3 v3.13.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect @@ -48,25 +51,27 @@ require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/google/gnostic-models v0.7.0 // indirect github.com/google/go-cmp v0.7.0 // indirect - github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect + github.com/google/pprof v0.0.0-20250923004556-9e5a51aed1e8 // indirect github.com/google/uuid v1.6.0 // indirect + github.com/grafana/regexp v0.0.0-20250905093917-f7b3be9d1853 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/prometheus/client_model v0.6.2 // indirect github.com/spf13/pflag v1.0.6 // indirect github.com/x448/float16 v0.8.4 // indirect go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect - golang.org/x/net v0.44.0 // indirect - golang.org/x/oauth2 v0.31.0 // indirect - golang.org/x/sys v0.36.0 // indirect - golang.org/x/term v0.35.0 // indirect - golang.org/x/text v0.29.0 // indirect + golang.org/x/net v0.46.0 // indirect + golang.org/x/oauth2 v0.32.0 // indirect + golang.org/x/sys v0.37.0 // indirect + golang.org/x/term v0.36.0 // indirect + golang.org/x/text v0.30.0 // indirect golang.org/x/time v0.13.0 // indirect - golang.org/x/tools v0.36.0 // indirect + golang.org/x/tools v0.37.0 // indirect google.golang.org/protobuf v1.36.10 // indirect gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect diff --git a/go.sum b/go.sum index 975b1a057..3a26917ce 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= @@ -57,14 +59,16 @@ github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7O github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= -github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/pprof v0.0.0-20250923004556-9e5a51aed1e8 h1:ZI8gCoCjGzPsum4L21jHdQs8shFBIQih1TM9Rd/c+EQ= +github.com/google/pprof v0.0.0-20250923004556-9e5a51aed1e8/go.mod h1:I6V7YzU0XDpsHqbsyrghnFZLO1gwK6NPTNvmetQIk9U= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyEE= github.com/gorilla/handlers v1.5.2/go.mod h1:dX+xVpaxdSw+q0Qek8SSsl3dfMk3jNddUkMzo0GtH0w= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= +github.com/grafana/regexp v0.0.0-20250905093917-f7b3be9d1853 h1:cLN4IBkmkYZNnk7EAJ0BHIethd+J6LqxFNw5mSiI2bM= +github.com/grafana/regexp v0.0.0-20250905093917-f7b3be9d1853/go.mod h1:+JKpmjMGhpgPL+rXZ5nsZieVzvarn86asRlBg4uNGnk= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= @@ -100,6 +104,12 @@ github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.87.0 h github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.87.0/go.mod h1:WHiLZmOWVop/MoYvRD58LfnPeyE+dcITby/jQjg83Hw= github.com/prometheus-operator/prometheus-operator/pkg/client v0.87.0 h1:rrZriucuC8ZUOPr8Asvavb9pbzqXSsAeY79aH8xnXlc= github.com/prometheus-operator/prometheus-operator/pkg/client v0.87.0/go.mod h1:OMvC2XJGxPeEAKf5qB1u7DudV46HA8ePxYslRjxQcbk= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.67.4 h1:yR3NqWO1/UyO1w2PhUvXlGQs/PtFmoveVO0KZ4+Lvsc= +github.com/prometheus/common v0.67.4/go.mod h1:gP0fq6YjjNCLssJCQp0yk4M8W6ikLURwkdd/YKtTbyI= +github.com/prometheus/prometheus v0.308.0 h1:kVh/5m1n6m4cSK9HYTDEbMxzuzCWyEdPdKSxFRxXj04= +github.com/prometheus/prometheus v0.308.0/go.mod h1:xXYKzScyqyFHihpS0UsXpC2F3RA/CygOs7wb4mpdusE= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= @@ -117,6 +127,8 @@ github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= +go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= @@ -132,10 +144,10 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I= -golang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY= -golang.org/x/oauth2 v0.31.0 h1:8Fq0yVZLh4j4YA47vHKFTa9Ew5XIrCP8LC6UeNZnLxo= -golang.org/x/oauth2 v0.31.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4= +golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= +golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY= +golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -143,22 +155,22 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= -golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= -golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= +golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= +golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.36.0 h1:zMPR+aF8gfksFprF/Nc/rd1wRS1EI6nDBGyWAvDzx2Q= +golang.org/x/term v0.36.0/go.mod h1:Qu394IJq6V6dCBRgwqshf3mPF85AqzYEzofzRdZkWss= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= -golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= +golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k= +golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM= golang.org/x/time v0.13.0 h1:eUlYslOIt32DgYD6utsuUeHs4d7AsEYLuIAdg7FlYgI= golang.org/x/time v0.13.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= -golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= +golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= +golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/internal/managementrouter/alerts_get_test.go b/internal/managementrouter/alerts_get_test.go index 3c612c878..a27091b06 100644 --- a/internal/managementrouter/alerts_get_test.go +++ b/internal/managementrouter/alerts_get_test.go @@ -34,7 +34,7 @@ var _ = Describe("GetAlerts", func() { }, } - mockManagement = management.NewWithCustomMapper(context.Background(), mockK8s, &testutils.MockMapperClient{}) + mockManagement = management.New(context.Background(), mockK8s) router = managementrouter.New(mockManagement) }) diff --git a/internal/managementrouter/user_defined_alert_rule_bulk_delete_test.go b/internal/managementrouter/user_defined_alert_rule_bulk_delete_test.go index 1b3e7ecc3..53e29949a 100644 --- a/internal/managementrouter/user_defined_alert_rule_bulk_delete_test.go +++ b/internal/managementrouter/user_defined_alert_rule_bulk_delete_test.go @@ -6,94 +6,100 @@ import ( "encoding/json" "net/http" "net/http/httptest" + "strings" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/openshift/monitoring-plugin/internal/managementrouter" + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" "github.com/openshift/monitoring-plugin/pkg/k8s" "github.com/openshift/monitoring-plugin/pkg/management" - "github.com/openshift/monitoring-plugin/pkg/management/mapper" "github.com/openshift/monitoring-plugin/pkg/management/testutils" ) var _ = Describe("BulkDeleteUserDefinedAlertRules", func() { var ( - router http.Handler - mockK8sRules *testutils.MockPrometheusRuleInterface - mockK8s *testutils.MockClient - mockMapper *testutils.MockMapperClient + router http.Handler + mockK8s *testutils.MockClient ) - BeforeEach(func() { - mockK8sRules = &testutils.MockPrometheusRuleInterface{} - - userPR := monitoringv1.PrometheusRule{} - userPR.Name = "user-pr" - userPR.Namespace = "default" - userPR.Spec.Groups = []monitoringv1.RuleGroup{ - { - Name: "g1", - Rules: []monitoringv1.Rule{{Alert: "u1"}, {Alert: "u2"}}, - }, - } + var ( + userRule1Name = "u1" + userRule1 = monitoringv1.Rule{Alert: userRule1Name, Labels: map[string]string{k8s.PrometheusRuleLabelNamespace: "default", k8s.PrometheusRuleLabelName: "user-pr"}} + userRule1Id = alertrule.GetAlertingRuleId(&userRule1) - platformPR := monitoringv1.PrometheusRule{} - platformPR.Name = "platform-pr" - platformPR.Namespace = "platform-namespace-1" - platformPR.Spec.Groups = []monitoringv1.RuleGroup{ - { - Name: "pg1", - Rules: []monitoringv1.Rule{{Alert: "platform1"}}, - }, - } + userRule2Name = "u2" + userRule2 = monitoringv1.Rule{Alert: userRule2Name, Labels: map[string]string{k8s.PrometheusRuleLabelNamespace: "default", k8s.PrometheusRuleLabelName: "user-pr"}} + userRule2Id = alertrule.GetAlertingRuleId(&userRule2) - mockK8sRules.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "default/user-pr": &userPR, - "platform-namespace-1/platform-pr": &platformPR, - }) + platformRuleName = "platform" + platformRule = monitoringv1.Rule{Alert: platformRuleName, Labels: map[string]string{k8s.PrometheusRuleLabelNamespace: "platform-namespace-1", k8s.PrometheusRuleLabelName: "platform-pr"}} + platformRuleId = alertrule.GetAlertingRuleId(&platformRule) + ) - mockNSInformer := &testutils.MockNamespaceInformerInterface{} - mockNSInformer.SetMonitoringNamespaces(map[string]bool{ - "platform-namespace-1": true, - "platform-namespace-2": true, - }) - mockK8s = &testutils.MockClient{ - PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { - return mockK8sRules - }, - NamespaceInformerFunc: func() k8s.NamespaceInformerInterface { - return mockNSInformer - }, + BeforeEach(func() { + mockK8s = &testutils.MockClient{} + mgmt := management.New(context.Background(), mockK8s) + router = managementrouter.New(mgmt) + + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Rules: []monitoringv1.Rule{userRule1, userRule2, platformRule}, + }, + }, + }, + }, true, nil + }, + DeleteFunc: func(ctx context.Context, namespace string, name string) error { + return nil + }, + UpdateFunc: func(ctx context.Context, pr monitoringv1.PrometheusRule) error { + return nil + }, + } } - mockMapper = &testutils.MockMapperClient{ - GetAlertingRuleIdFunc: func(rule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - return mapper.PrometheusAlertRuleId(rule.Alert) - }, - FindAlertRuleByIdFunc: func(alertRuleId mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - id := string(alertRuleId) - pr := mapper.PrometheusRuleId{ - Namespace: "default", - Name: "user-pr", - } - if id == "platform1" { - pr.Namespace = "platform-namespace-1" - pr.Name = "platform-pr" - } - return &pr, nil - }, + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + switch id { + case userRule1Id: + return userRule1, true + case userRule2Id: + return userRule2, true + case platformRuleId: + return platformRule, true + default: + return monitoringv1.Rule{}, false + } + }, + } } - mgmt := management.NewWithCustomMapper(context.Background(), mockK8s, mockMapper) - router = managementrouter.New(mgmt) + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return strings.HasPrefix(name, "platform-namespace-") + }, + } + } }) Context("when deleting multiple rules", func() { It("returns deleted and failed for mixed ruleIds and updates rules", func() { - body := map[string]interface{}{"ruleIds": []string{"u1", "platform1", ""}} + body := map[string]any{"ruleIds": []string{userRule1Id, platformRuleId, ""}} buf, _ := json.Marshal(body) req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules", bytes.NewReader(buf)) w := httptest.NewRecorder() @@ -109,50 +115,29 @@ var _ = Describe("BulkDeleteUserDefinedAlertRules", func() { } Expect(json.NewDecoder(w.Body).Decode(&resp)).To(Succeed()) Expect(resp.Rules).To(HaveLen(3)) + // u1 -> success - Expect(resp.Rules[0].Id).To(Equal("u1")) - Expect(resp.Rules[0].StatusCode).To(Equal(http.StatusNoContent)) + Expect(resp.Rules[0].Id).To(Equal(userRule1Id)) + Expect(resp.Rules[0].StatusCode).To(Equal(http.StatusNoContent), resp.Rules[0].Message) Expect(resp.Rules[0].Message).To(BeEmpty()) + // platform1 -> not allowed - Expect(resp.Rules[1].Id).To(Equal("platform1")) - Expect(resp.Rules[1].StatusCode).To(Equal(http.StatusMethodNotAllowed)) + Expect(resp.Rules[1].Id).To(Equal(platformRuleId)) + Expect(resp.Rules[1].StatusCode).To(Equal(http.StatusMethodNotAllowed), resp.Rules[1].Message) Expect(resp.Rules[1].Message).To(ContainSubstring("cannot delete alert rule from a platform-managed PrometheusRule")) + // "" -> bad request (missing id) Expect(resp.Rules[2].Id).To(Equal("")) - Expect(resp.Rules[2].StatusCode).To(Equal(http.StatusBadRequest)) + Expect(resp.Rules[2].StatusCode).To(Equal(http.StatusBadRequest), resp.Rules[2].Message) Expect(resp.Rules[2].Message).To(ContainSubstring("missing ruleId")) - - prUser, _, err := mockK8sRules.Get(context.Background(), "default", "user-pr") - Expect(err).NotTo(HaveOccurred()) - userRuleNames := []string{} - for _, g := range prUser.Spec.Groups { - for _, r := range g.Rules { - userRuleNames = append(userRuleNames, r.Alert) - } - } - Expect(userRuleNames).NotTo(ContainElement("u1")) - Expect(userRuleNames).To(ContainElement("u2")) - - prPlatform, _, err := mockK8sRules.Get(context.Background(), "platform-namespace-1", "platform-pr") - Expect(err).NotTo(HaveOccurred()) - foundPlatform := false - for _, g := range prPlatform.Spec.Groups { - for _, r := range g.Rules { - if r.Alert == "platform1" { - foundPlatform = true - } - } - } - Expect(foundPlatform).To(BeTrue()) }) - It("succeeds for user rule and fails for platform rule (mixed case)", func() { - body := map[string]interface{}{"ruleIds": []string{"u1", "platform1"}} + It("returns all deleted when all user ruleIds succeed", func() { + body := map[string]any{"ruleIds": []string{userRule1Id, userRule2Id}} buf, _ := json.Marshal(body) req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules", bytes.NewReader(buf)) w := httptest.NewRecorder() router.ServeHTTP(w, req) - Expect(w.Code).To(Equal(http.StatusOK)) var resp struct { Rules []struct { @@ -163,69 +148,16 @@ var _ = Describe("BulkDeleteUserDefinedAlertRules", func() { } Expect(json.NewDecoder(w.Body).Decode(&resp)).To(Succeed()) Expect(resp.Rules).To(HaveLen(2)) - Expect(resp.Rules[0].Id).To(Equal("u1")) - Expect(resp.Rules[0].StatusCode).To(Equal(http.StatusNoContent)) - Expect(resp.Rules[1].Id).To(Equal("platform1")) - Expect(resp.Rules[1].StatusCode).To(Equal(http.StatusMethodNotAllowed)) - Expect(resp.Rules[1].Message).To(ContainSubstring("cannot delete alert rule from a platform-managed PrometheusRule")) - - // Ensure only user rule was removed - prUser, _, err := mockK8sRules.Get(context.Background(), "default", "user-pr") - Expect(err).NotTo(HaveOccurred()) - userRuleNames := []string{} - for _, g := range prUser.Spec.Groups { - for _, r := range g.Rules { - userRuleNames = append(userRuleNames, r.Alert) - } - } - Expect(userRuleNames).NotTo(ContainElement("u1")) - Expect(userRuleNames).To(ContainElement("u2")) - - // Platform rule remains intact - prPlatform, _, err := mockK8sRules.Get(context.Background(), "platform-namespace-1", "platform-pr") - Expect(err).NotTo(HaveOccurred()) - foundPlatform := false - for _, g := range prPlatform.Spec.Groups { - for _, r := range g.Rules { - if r.Alert == "platform1" { - foundPlatform = true - } - } - } - Expect(foundPlatform).To(BeTrue()) - }) - It("returns all deleted when all user ruleIds succeed", func() { - body := map[string]interface{}{"ruleIds": []string{"u1", "u2"}} - buf, _ := json.Marshal(body) - req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules", bytes.NewReader(buf)) - w := httptest.NewRecorder() - router.ServeHTTP(w, req) + // platform1 -> success + Expect(resp.Rules[0].Id).To(Equal(userRule1Id)) + Expect(resp.Rules[0].StatusCode).To(Equal(http.StatusNoContent), resp.Rules[0].Message) + Expect(resp.Rules[0].Message).To(BeEmpty()) - Expect(w.Code).To(Equal(http.StatusOK)) - var resp struct { - Rules []struct { - Id string `json:"id"` - StatusCode int `json:"status_code"` - Message string `json:"message"` - } `json:"rules"` - } - Expect(json.NewDecoder(w.Body).Decode(&resp)).To(Succeed()) - Expect(resp.Rules).To(HaveLen(2)) - Expect(resp.Rules[0].Id).To(Equal("u1")) - Expect(resp.Rules[0].StatusCode).To(Equal(http.StatusNoContent)) - Expect(resp.Rules[1].Id).To(Equal("u2")) - Expect(resp.Rules[1].StatusCode).To(Equal(http.StatusNoContent)) - - // User PrometheusRule should be deleted after removing the last rule - _, found, err := mockK8sRules.Get(context.Background(), "default", "user-pr") - Expect(err).NotTo(HaveOccurred()) - Expect(found).To(BeFalse()) - - // Platform PrometheusRule remains present - _, found, err = mockK8sRules.Get(context.Background(), "platform-namespace-1", "platform-pr") - Expect(err).NotTo(HaveOccurred()) - Expect(found).To(BeTrue()) + // platform2 -> success + Expect(resp.Rules[1].Id).To(Equal(userRule2Id)) + Expect(resp.Rules[1].StatusCode).To(Equal(http.StatusNoContent), resp.Rules[1].Message) + Expect(resp.Rules[1].Message).To(BeEmpty()) }) }) diff --git a/internal/managementrouter/user_defined_alert_rule_delete_by_id_test.go b/internal/managementrouter/user_defined_alert_rule_delete_by_id_test.go index 9ddb0371c..6669951b7 100644 --- a/internal/managementrouter/user_defined_alert_rule_delete_by_id_test.go +++ b/internal/managementrouter/user_defined_alert_rule_delete_by_id_test.go @@ -2,78 +2,101 @@ package managementrouter_test import ( "context" - "fmt" "net/http" "net/http/httptest" + "strings" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/openshift/monitoring-plugin/internal/managementrouter" + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" "github.com/openshift/monitoring-plugin/pkg/k8s" "github.com/openshift/monitoring-plugin/pkg/management" - "github.com/openshift/monitoring-plugin/pkg/management/mapper" "github.com/openshift/monitoring-plugin/pkg/management/testutils" ) var _ = Describe("DeleteUserDefinedAlertRuleById", func() { var ( - router http.Handler - mockK8sRules *testutils.MockPrometheusRuleInterface - mockK8s *testutils.MockClient - mockMapper *testutils.MockMapperClient + router http.Handler + mockK8s *testutils.MockClient + ) + + var ( + userRule1Name = "u1" + userRule1 = monitoringv1.Rule{Alert: userRule1Name, Labels: map[string]string{k8s.PrometheusRuleLabelNamespace: "default", k8s.PrometheusRuleLabelName: "user-pr"}} + userRule1Id = alertrule.GetAlertingRuleId(&userRule1) + + userRule2Name = "u2" + userRule2 = monitoringv1.Rule{Alert: userRule2Name, Labels: map[string]string{k8s.PrometheusRuleLabelNamespace: "default", k8s.PrometheusRuleLabelName: "user-pr"}} + userRule2Id = alertrule.GetAlertingRuleId(&userRule2) + + platformRuleName = "p1" + platformRule = monitoringv1.Rule{Alert: platformRuleName, Labels: map[string]string{k8s.PrometheusRuleLabelNamespace: "platform-namespace-1", k8s.PrometheusRuleLabelName: "platform-pr"}} + platformRuleId = alertrule.GetAlertingRuleId(&platformRule) ) BeforeEach(func() { - mockK8sRules = &testutils.MockPrometheusRuleInterface{} - - userPR := monitoringv1.PrometheusRule{} - userPR.Name = "user-pr" - userPR.Namespace = "default" - userPR.Spec.Groups = []monitoringv1.RuleGroup{ - { - Name: "g1", - Rules: []monitoringv1.Rule{{Alert: "u1"}, {Alert: "u2"}}, - }, + mockK8s = &testutils.MockClient{} + mgmt := management.New(context.Background(), mockK8s) + router = managementrouter.New(mgmt) + + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Rules: []monitoringv1.Rule{userRule1, userRule2, platformRule}, + }, + }, + }, + }, true, nil + }, + DeleteFunc: func(ctx context.Context, namespace string, name string) error { + return nil + }, + UpdateFunc: func(ctx context.Context, pr monitoringv1.PrometheusRule) error { + return nil + }, + } } - platformPR := monitoringv1.PrometheusRule{} - platformPR.Name = "platform-pr" - platformPR.Namespace = "platform-namespace-1" - platformPR.Spec.Groups = []monitoringv1.RuleGroup{ - { - Name: "pg1", - Rules: []monitoringv1.Rule{{Alert: "p1"}}, - }, + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + switch id { + case userRule1Id: + return userRule1, true + case userRule2Id: + return userRule2, true + case platformRuleId: + return platformRule, true + default: + return monitoringv1.Rule{}, false + } + }, + } } - mockK8sRules.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "default/user-pr": &userPR, - "platform-namespace-1/platform-pr": &platformPR, - }) - - mockNSInformer := &testutils.MockNamespaceInformerInterface{} - mockNSInformer.SetMonitoringNamespaces(map[string]bool{ - "platform-namespace-1": true, - "platform-namespace-2": true, - }) - mockK8s = &testutils.MockClient{ - PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { - return mockK8sRules - }, - NamespaceInformerFunc: func() k8s.NamespaceInformerInterface { - return mockNSInformer - }, + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return strings.HasPrefix(name, "platform-namespace-") + }, + } } }) Context("when ruleId is missing or blank", func() { It("returns 400 with missing ruleId message", func() { - mgmt := management.NewWithCustomMapper(context.Background(), mockK8s, mockMapper) - router = managementrouter.New(mgmt) - req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules/%20", nil) w := httptest.NewRecorder() router.ServeHTTP(w, req) @@ -83,54 +106,8 @@ var _ = Describe("DeleteUserDefinedAlertRuleById", func() { }) }) - Context("when deletion succeeds", func() { - It("deletes a user-defined rule and keeps the other intact", func() { - mockMapper = &testutils.MockMapperClient{ - GetAlertingRuleIdFunc: func(rule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - return mapper.PrometheusAlertRuleId(rule.Alert) - }, - FindAlertRuleByIdFunc: func(alertRuleId mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - pr := mapper.PrometheusRuleId{ - Namespace: "default", - Name: "user-pr", - } - return &pr, nil - }, - } - - mgmt := management.NewWithCustomMapper(context.Background(), mockK8s, mockMapper) - router = managementrouter.New(mgmt) - - req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules/u1", nil) - w := httptest.NewRecorder() - router.ServeHTTP(w, req) - - Expect(w.Code).To(Equal(http.StatusNoContent)) - - pr, found, err := mockK8sRules.Get(context.Background(), "default", "user-pr") - Expect(found).To(BeTrue()) - Expect(err).NotTo(HaveOccurred()) - ruleNames := []string{} - for _, g := range pr.Spec.Groups { - for _, r := range g.Rules { - ruleNames = append(ruleNames, r.Alert) - } - } - Expect(ruleNames).NotTo(ContainElement("u1")) - Expect(ruleNames).To(ContainElement("u2")) - }) - }) - Context("when rule is not found", func() { It("returns 404 with expected message", func() { - mockMapper = &testutils.MockMapperClient{ - FindAlertRuleByIdFunc: func(alertRuleId mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return nil, fmt.Errorf("alert rule not found") - }, - } - mgmt := management.NewWithCustomMapper(context.Background(), mockK8s, mockMapper) - router = managementrouter.New(mgmt) - req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules/missing", nil) w := httptest.NewRecorder() router.ServeHTTP(w, req) @@ -140,42 +117,24 @@ var _ = Describe("DeleteUserDefinedAlertRuleById", func() { }) }) - Context("when platform rule", func() { - It("rejects platform rule deletion and PR remains unchanged", func() { - mockMapper = &testutils.MockMapperClient{ - GetAlertingRuleIdFunc: func(rule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - return mapper.PrometheusAlertRuleId(rule.Alert) - }, - FindAlertRuleByIdFunc: func(alertRuleId mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - pr := mapper.PrometheusRuleId{ - Namespace: "platform-namespace-1", - Name: "platform-pr", - } - return &pr, nil - }, - } + Context("when deleting a user-defined rule", func() { + It("returns 204", func() { + req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules/"+userRule1Id, nil) + w := httptest.NewRecorder() + router.ServeHTTP(w, req) - mgmt := management.NewWithCustomMapper(context.Background(), mockK8s, mockMapper) - router = managementrouter.New(mgmt) + Expect(w.Code).To(Equal(http.StatusNoContent)) + }) + }) - req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules/p1", nil) + Context("when deleting a platform rule", func() { + It("returns 405 with expected message", func() { + req := httptest.NewRequest(http.MethodDelete, "/api/v1/alerting/rules/"+platformRuleId, nil) w := httptest.NewRecorder() router.ServeHTTP(w, req) Expect(w.Code).To(Equal(http.StatusMethodNotAllowed)) Expect(w.Body.String()).To(ContainSubstring("cannot delete alert rule from a platform-managed PrometheusRule")) - - pr, found, err := mockK8sRules.Get(context.Background(), "platform-namespace-1", "platform-pr") - Expect(found).To(BeTrue()) - Expect(err).NotTo(HaveOccurred()) - for _, g := range pr.Spec.Groups { - for _, r := range g.Rules { - if r.Alert == "p1" { - found = true - } - } - } - Expect(found).To(BeTrue()) }) }) }) diff --git a/pkg/alert_rule/alert_rule.go b/pkg/alert_rule/alert_rule.go new file mode 100644 index 000000000..7fea718d9 --- /dev/null +++ b/pkg/alert_rule/alert_rule.go @@ -0,0 +1,65 @@ +package alertrule + +import ( + "crypto/sha256" + "fmt" + "sort" + "strings" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" +) + +func GetAlertingRuleId(alertRule *monitoringv1.Rule) string { + var kind, name string + if alertRule.Alert != "" { + kind = "alert" + name = alertRule.Alert + } else if alertRule.Record != "" { + kind = "record" + name = alertRule.Record + } else { + return "" + } + + expr := alertRule.Expr.String() + forDuration := "" + if alertRule.For != nil { + forDuration = string(*alertRule.For) + } + + var sortedLabels []string + if alertRule.Labels != nil { + for key, value := range alertRule.Labels { + if strings.HasPrefix(key, "openshift_io_") || key == "alertname" { + // Skip system labels + continue + } + + sortedLabels = append(sortedLabels, fmt.Sprintf("%s=%s", key, value)) + } + sort.Strings(sortedLabels) + } + + var sortedAnnotations []string + if alertRule.Annotations != nil { + for key, value := range alertRule.Annotations { + sortedAnnotations = append(sortedAnnotations, fmt.Sprintf("%s=%s", key, value)) + } + sort.Strings(sortedAnnotations) + } + + // Build the hash input string + hashInput := strings.Join([]string{ + kind, + name, + expr, + forDuration, + strings.Join(sortedLabels, ","), + strings.Join(sortedAnnotations, ","), + }, "\n") + + // Generate SHA256 hash + hash := sha256.Sum256([]byte(hashInput)) + + return fmt.Sprintf("%s;%x", name, hash) +} diff --git a/pkg/k8s/alert_relabel_config.go b/pkg/k8s/alert_relabel_config.go index eca561a0e..2405e2e42 100644 --- a/pkg/k8s/alert_relabel_config.go +++ b/pkg/k8s/alert_relabel_config.go @@ -6,27 +6,69 @@ import ( osmv1 "github.com/openshift/api/monitoring/v1" osmv1client "github.com/openshift/client-go/monitoring/clientset/versioned" + "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/client-go/tools/cache" ) type alertRelabelConfigManager struct { - clientset *osmv1client.Clientset - informer AlertRelabelConfigInformerInterface + clientset *osmv1client.Clientset + arcInformer cache.SharedIndexInformer } -func newAlertRelabelConfigManager(clientset *osmv1client.Clientset, informer AlertRelabelConfigInformerInterface) AlertRelabelConfigInterface { - return &alertRelabelConfigManager{ - clientset: clientset, - informer: informer, +func newAlertRelabelConfigManager(ctx context.Context, clientset *osmv1client.Clientset) (*alertRelabelConfigManager, error) { + arcInformer := cache.NewSharedIndexInformer( + alertRelabelConfigListWatchForAllNamespaces(clientset), + &osmv1.AlertRelabelConfig{}, + 0, + cache.Indexers{}, + ) + + arcm := &alertRelabelConfigManager{ + clientset: clientset, + arcInformer: arcInformer, } + + go arcm.arcInformer.Run(ctx.Done()) + + cache.WaitForNamedCacheSync("AlertRelabelConfig informer", ctx.Done(), + arcm.arcInformer.HasSynced, + ) + + return arcm, nil +} + +func alertRelabelConfigListWatchForAllNamespaces(clientset *osmv1client.Clientset) *cache.ListWatch { + return cache.NewListWatchFromClient(clientset.MonitoringV1().RESTClient(), "alertrelabelconfigs", "", fields.Everything()) } func (arcm *alertRelabelConfigManager) List(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) { - return arcm.informer.List(ctx, namespace) + arcs := arcm.arcInformer.GetStore().List() + + alertRelabelConfigs := make([]osmv1.AlertRelabelConfig, 0, len(arcs)) + for _, item := range arcs { + arc, ok := item.(*osmv1.AlertRelabelConfig) + if !ok { + continue + } + alertRelabelConfigs = append(alertRelabelConfigs, *arc) + } + + return alertRelabelConfigs, nil } func (arcm *alertRelabelConfigManager) Get(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) { - return arcm.informer.Get(ctx, namespace, name) + arc, err := arcm.clientset.MonitoringV1().AlertRelabelConfigs(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + if errors.IsNotFound(err) { + return nil, false, nil + } + + return nil, false, err + } + + return arc, true, nil } func (arcm *alertRelabelConfigManager) Create(ctx context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) { diff --git a/pkg/k8s/alert_relabel_config_informer.go b/pkg/k8s/alert_relabel_config_informer.go deleted file mode 100644 index da6732956..000000000 --- a/pkg/k8s/alert_relabel_config_informer.go +++ /dev/null @@ -1,85 +0,0 @@ -package k8s - -import ( - "context" - - osmv1 "github.com/openshift/api/monitoring/v1" - osmv1client "github.com/openshift/client-go/monitoring/clientset/versioned" - "k8s.io/apimachinery/pkg/fields" - "k8s.io/client-go/tools/cache" -) - -type alertRelabelConfigInformer struct { - informer cache.SharedIndexInformer -} - -func newAlertRelabelConfigInformer(clientset *osmv1client.Clientset) AlertRelabelConfigInformerInterface { - informer := cache.NewSharedIndexInformer( - alertRelabelConfigListWatchForAllNamespaces(clientset), - &osmv1.AlertRelabelConfig{}, - 0, - cache.Indexers{}, - ) - - return &alertRelabelConfigInformer{ - informer: informer, - } -} - -func alertRelabelConfigListWatchForAllNamespaces(clientset *osmv1client.Clientset) *cache.ListWatch { - return cache.NewListWatchFromClient(clientset.MonitoringV1().RESTClient(), "alertrelabelconfigs", "", fields.Everything()) -} - -func (arci *alertRelabelConfigInformer) Run(ctx context.Context, callbacks AlertRelabelConfigInformerCallback) error { - _, err := arci.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { - arc, ok := obj.(*osmv1.AlertRelabelConfig) - if !ok { - return - } - callbacks.OnAdd(arc) - }, - UpdateFunc: func(oldObj interface{}, newObj interface{}) { - arc, ok := newObj.(*osmv1.AlertRelabelConfig) - if !ok { - return - } - callbacks.OnUpdate(arc) - }, - DeleteFunc: func(obj interface{}) { - k, err := cache.DeletionHandlingObjectToName(obj) - if err != nil { - return - } - callbacks.OnDelete(k) - }, - }) - - go arci.informer.Run(ctx.Done()) - - cache.WaitForNamedCacheSync("AlertRelabelConfig informer", ctx.Done(), - arci.informer.HasSynced, - ) - - return err -} - -func (arci *alertRelabelConfigInformer) List(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) { - arcs := arci.informer.GetStore().List() - - alertRelabelConfigs := make([]osmv1.AlertRelabelConfig, 0, len(arcs)) - for _, arc := range arcs { - alertRelabelConfigs = append(alertRelabelConfigs, *arc.(*osmv1.AlertRelabelConfig)) - } - - return alertRelabelConfigs, nil -} - -func (arci *alertRelabelConfigInformer) Get(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) { - arc, exists, err := arci.informer.GetStore().GetByKey(namespace + "/" + name) - if err != nil { - return nil, exists, err - } - - return arc.(*osmv1.AlertRelabelConfig), exists, nil -} diff --git a/pkg/k8s/client.go b/pkg/k8s/client.go index 776eb6687..3db48fe1c 100644 --- a/pkg/k8s/client.go +++ b/pkg/k8s/client.go @@ -9,8 +9,11 @@ import ( osmv1client "github.com/openshift/client-go/monitoring/clientset/versioned" monitoringv1client "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned" + "github.com/sirupsen/logrus" ) +var log = logrus.WithField("module", "k8s") + var _ Client = (*client)(nil) type client struct { @@ -19,15 +22,12 @@ type client struct { osmv1clientset *osmv1client.Clientset config *rest.Config - prometheusAlerts PrometheusAlertsInterface - - prometheusRuleManager PrometheusRuleInterface - prometheusRuleInformer PrometheusRuleInformerInterface - - alertRelabelConfigManager AlertRelabelConfigInterface - alertRelabelConfigInformer AlertRelabelConfigInformerInterface + prometheusAlerts *prometheusAlerts - namespaceInformer NamespaceInformerInterface + prometheusRuleManager *prometheusRuleManager + alertRelabelConfigManager *alertRelabelConfigManager + namespaceManager *namespaceManager + relabeledRulesManager *relabeledRulesManager } func newClient(ctx context.Context, config *rest.Config) (Client, error) { @@ -55,17 +55,22 @@ func newClient(ctx context.Context, config *rest.Config) (Client, error) { c.prometheusAlerts = newPrometheusAlerts(clientset, config) - c.prometheusRuleInformer = newPrometheusRuleInformer(monitoringv1clientset) - c.prometheusRuleManager = newPrometheusRuleManager(monitoringv1clientset, c.prometheusRuleInformer) + c.prometheusRuleManager = newPrometheusRuleManager(ctx, monitoringv1clientset) + + c.alertRelabelConfigManager, err = newAlertRelabelConfigManager(ctx, osmv1clientset) + if err != nil { + return nil, fmt.Errorf("failed to create alert relabel config manager: %w", err) + } - c.alertRelabelConfigInformer = newAlertRelabelConfigInformer(osmv1clientset) - c.alertRelabelConfigManager = newAlertRelabelConfigManager(osmv1clientset, c.alertRelabelConfigInformer) + c.namespaceManager, err = newNamespaceManager(ctx, clientset) + if err != nil { + return nil, fmt.Errorf("failed to create namespace manager: %w", err) + } - namespaceInformer, err := newNamespaceInformer(ctx, clientset) + c.relabeledRulesManager, err = newRelabeledRulesManager(ctx, c.namespaceManager, monitoringv1clientset, clientset) if err != nil { - return nil, fmt.Errorf("failed to create namespace informer: %w", err) + return nil, fmt.Errorf("failed to create relabeled rules config manager: %w", err) } - c.namespaceInformer = namespaceInformer return c, nil } @@ -86,18 +91,14 @@ func (c *client) PrometheusRules() PrometheusRuleInterface { return c.prometheusRuleManager } -func (c *client) PrometheusRuleInformer() PrometheusRuleInformerInterface { - return c.prometheusRuleInformer -} - func (c *client) AlertRelabelConfigs() AlertRelabelConfigInterface { return c.alertRelabelConfigManager } -func (c *client) AlertRelabelConfigInformer() AlertRelabelConfigInformerInterface { - return c.alertRelabelConfigInformer +func (c *client) RelabeledRules() RelabeledRulesInterface { + return c.relabeledRulesManager } -func (c *client) NamespaceInformer() NamespaceInformerInterface { - return c.namespaceInformer +func (c *client) Namespace() NamespaceInterface { + return c.namespaceManager } diff --git a/pkg/k8s/namespace_informer.go b/pkg/k8s/namespace.go similarity index 61% rename from pkg/k8s/namespace_informer.go rename to pkg/k8s/namespace.go index 27cc61def..aba97a2a4 100644 --- a/pkg/k8s/namespace_informer.go +++ b/pkg/k8s/namespace.go @@ -2,6 +2,7 @@ package k8s import ( "context" + "fmt" "sync" corev1 "k8s.io/api/core/v1" @@ -16,7 +17,7 @@ const ( ClusterMonitoringLabel = "openshift.io/cluster-monitoring" ) -type namespaceInformer struct { +type namespaceManager struct { informer cache.SharedIndexInformer // monitoringNamespaces stores namespaces with openshift.io/cluster-monitoring=true @@ -24,7 +25,7 @@ type namespaceInformer struct { mu sync.RWMutex } -func newNamespaceInformer(ctx context.Context, clientset kubernetes.Interface) (NamespaceInformerInterface, error) { +func newNamespaceManager(ctx context.Context, clientset *kubernetes.Clientset) (*namespaceManager, error) { informer := cache.NewSharedIndexInformer( namespaceListWatch(clientset.CoreV1()), &corev1.Namespace{}, @@ -32,42 +33,46 @@ func newNamespaceInformer(ctx context.Context, clientset kubernetes.Interface) ( cache.Indexers{}, ) - ni := &namespaceInformer{ + nm := &namespaceManager{ informer: informer, monitoringNamespaces: make(map[string]bool), + mu: sync.RWMutex{}, } - _, err := ni.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + _, err := nm.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { ns, ok := obj.(*corev1.Namespace) if !ok { return } - ni.updateMonitoringNamespace(ns) + nm.updateMonitoringNamespace(ns) }, UpdateFunc: func(oldObj interface{}, newObj interface{}) { ns, ok := newObj.(*corev1.Namespace) if !ok { return } - ni.updateMonitoringNamespace(ns) + nm.updateMonitoringNamespace(ns) }, DeleteFunc: func(obj interface{}) { namespaceName, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) if err != nil { return } - ni.removeMonitoringNamespace(namespaceName) + nm.removeMonitoringNamespace(namespaceName) }, }) + if err != nil { + return nil, fmt.Errorf("failed to add event handler to namespace informer: %w", err) + } - go ni.informer.Run(ctx.Done()) + go nm.informer.Run(ctx.Done()) cache.WaitForNamedCacheSync("Namespace informer", ctx.Done(), - ni.informer.HasSynced, + nm.informer.HasSynced, ) - return ni, err + return nm, nil } func namespaceListWatch(client corev1client.CoreV1Interface) *cache.ListWatch { @@ -81,25 +86,25 @@ func namespaceListWatch(client corev1client.CoreV1Interface) *cache.ListWatch { ) } -func (ni *namespaceInformer) IsClusterMonitoringNamespace(name string) bool { - ni.mu.RLock() - defer ni.mu.RUnlock() - return ni.monitoringNamespaces[name] -} - -func (ni *namespaceInformer) updateMonitoringNamespace(ns *corev1.Namespace) { - ni.mu.Lock() - defer ni.mu.Unlock() +func (nm *namespaceManager) updateMonitoringNamespace(ns *corev1.Namespace) { + nm.mu.Lock() + defer nm.mu.Unlock() if ns.Labels != nil && ns.Labels[ClusterMonitoringLabel] == "true" { - ni.monitoringNamespaces[ns.Name] = true + nm.monitoringNamespaces[ns.Name] = true } else { - delete(ni.monitoringNamespaces, ns.Name) + delete(nm.monitoringNamespaces, ns.Name) } } -func (ni *namespaceInformer) removeMonitoringNamespace(name string) { - ni.mu.Lock() - defer ni.mu.Unlock() - delete(ni.monitoringNamespaces, name) +func (nm *namespaceManager) removeMonitoringNamespace(name string) { + nm.mu.Lock() + defer nm.mu.Unlock() + delete(nm.monitoringNamespaces, name) +} + +func (nm *namespaceManager) IsClusterMonitoringNamespace(name string) bool { + nm.mu.RLock() + defer nm.mu.RUnlock() + return nm.monitoringNamespaces[name] } diff --git a/pkg/k8s/prometheus_alerts.go b/pkg/k8s/prometheus_alerts.go index e659c8a9f..878dd9021 100644 --- a/pkg/k8s/prometheus_alerts.go +++ b/pkg/k8s/prometheus_alerts.go @@ -60,7 +60,7 @@ type prometheusRoute struct { } `json:"spec"` } -func newPrometheusAlerts(clientset *kubernetes.Clientset, config *rest.Config) PrometheusAlertsInterface { +func newPrometheusAlerts(clientset *kubernetes.Clientset, config *rest.Config) *prometheusAlerts { return &prometheusAlerts{ clientset: clientset, config: config, diff --git a/pkg/k8s/prometheus_rule.go b/pkg/k8s/prometheus_rule.go index 877750ca1..48e7bae93 100644 --- a/pkg/k8s/prometheus_rule.go +++ b/pkg/k8s/prometheus_rule.go @@ -8,37 +8,66 @@ import ( monitoringv1client "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/cache" ) type prometheusRuleManager struct { clientset *monitoringv1client.Clientset - informer PrometheusRuleInformerInterface + informer cache.SharedIndexInformer } -func newPrometheusRuleManager(clientset *monitoringv1client.Clientset, informer PrometheusRuleInformerInterface) PrometheusRuleInterface { +func newPrometheusRuleManager(ctx context.Context, clientset *monitoringv1client.Clientset) *prometheusRuleManager { + informer := cache.NewSharedIndexInformer( + prometheusRuleListWatchForAllNamespaces(clientset), + &monitoringv1.PrometheusRule{}, + 0, + cache.Indexers{}, + ) + + go informer.Run(ctx.Done()) + + cache.WaitForNamedCacheSync("PrometheusRule informer", ctx.Done(), + informer.HasSynced, + ) + return &prometheusRuleManager{ clientset: clientset, informer: informer, } } +func prometheusRuleListWatchForAllNamespaces(clientset *monitoringv1client.Clientset) *cache.ListWatch { + return cache.NewListWatchFromClient(clientset.MonitoringV1().RESTClient(), "prometheusrules", "", fields.Everything()) +} + func (prm *prometheusRuleManager) List(ctx context.Context, namespace string) ([]monitoringv1.PrometheusRule, error) { - prs, err := prm.clientset.MonitoringV1().PrometheusRules(namespace).List(ctx, metav1.ListOptions{}) - if err != nil { - return nil, err + prs := prm.informer.GetStore().List() + + prometheusRules := make([]monitoringv1.PrometheusRule, 0, len(prs)) + for _, item := range prs { + pr, ok := item.(*monitoringv1.PrometheusRule) + if !ok { + continue + } + prometheusRules = append(prometheusRules, *pr) } - return prs.Items, nil + return prometheusRules, nil } func (prm *prometheusRuleManager) Get(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { - pr, exists, err := prm.informer.Get(ctx, namespace, name) + pr, err := prm.clientset.MonitoringV1().PrometheusRules(namespace).Get(ctx, name, metav1.GetOptions{}) if err != nil { - return nil, exists, fmt.Errorf("failed to get PrometheusRule %s/%s: %w", namespace, name, err) + if errors.IsNotFound(err) { + return nil, false, nil + } + + return nil, false, err } - return pr, exists, nil + return pr, true, nil } func (prm *prometheusRuleManager) Update(ctx context.Context, pr monitoringv1.PrometheusRule) error { diff --git a/pkg/k8s/prometheus_rule_informer.go b/pkg/k8s/prometheus_rule_informer.go deleted file mode 100644 index ec68dfc52..000000000 --- a/pkg/k8s/prometheus_rule_informer.go +++ /dev/null @@ -1,86 +0,0 @@ -package k8s - -import ( - "context" - - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - monitoringv1client "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned" - "k8s.io/apimachinery/pkg/fields" - "k8s.io/client-go/tools/cache" -) - -type prometheusRuleInformer struct { - informer cache.SharedIndexInformer -} - -func newPrometheusRuleInformer(clientset *monitoringv1client.Clientset) PrometheusRuleInformerInterface { - informer := cache.NewSharedIndexInformer( - prometheusRuleListWatchForAllNamespaces(clientset), - &monitoringv1.PrometheusRule{}, - 0, - cache.Indexers{}, - ) - - return &prometheusRuleInformer{ - informer: informer, - } -} - -func prometheusRuleListWatchForAllNamespaces(clientset *monitoringv1client.Clientset) *cache.ListWatch { - return cache.NewListWatchFromClient(clientset.MonitoringV1().RESTClient(), "prometheusrules", "", fields.Everything()) -} - -func (pri *prometheusRuleInformer) Run(ctx context.Context, callbacks PrometheusRuleInformerCallback) error { - _, err := pri.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { - pr, ok := obj.(*monitoringv1.PrometheusRule) - if !ok { - return - } - callbacks.OnAdd(pr) - }, - UpdateFunc: func(oldObj interface{}, newObj interface{}) { - pr, ok := newObj.(*monitoringv1.PrometheusRule) - if !ok { - return - } - callbacks.OnUpdate(pr) - }, - DeleteFunc: func(obj interface{}) { - k, err := cache.DeletionHandlingObjectToName(obj) - if err != nil { - return - } - - callbacks.OnDelete(k) - }, - }) - - go pri.informer.Run(ctx.Done()) - - cache.WaitForNamedCacheSync("PrometheusRule informer", ctx.Done(), - pri.informer.HasSynced, - ) - - return err -} - -func (pri *prometheusRuleInformer) List(ctx context.Context, namespace string) ([]monitoringv1.PrometheusRule, error) { - prs := pri.informer.GetStore().List() - - prometheusRules := make([]monitoringv1.PrometheusRule, 0, len(prs)) - for _, pr := range prs { - prometheusRules = append(prometheusRules, *pr.(*monitoringv1.PrometheusRule)) - } - - return prometheusRules, nil -} - -func (pri *prometheusRuleInformer) Get(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { - pr, exists, err := pri.informer.GetStore().GetByKey(namespace + "/" + name) - if err != nil { - return nil, exists, err - } - - return pr.(*monitoringv1.PrometheusRule), exists, nil -} diff --git a/pkg/k8s/relabeled_rules.go b/pkg/k8s/relabeled_rules.go new file mode 100644 index 000000000..c4d808100 --- /dev/null +++ b/pkg/k8s/relabeled_rules.go @@ -0,0 +1,422 @@ +package k8s + +import ( + "context" + "fmt" + "sync" + "time" + + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + monitoringv1client "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned" + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/relabel" + "gopkg.in/yaml.v2" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" +) + +const ( + resyncPeriod = 15 * time.Minute + queueBaseDelay = 50 * time.Millisecond + queueMaxDelay = 3 * time.Minute + + ClusterMonitoringNamespace = "openshift-monitoring" + + RelabeledRulesConfigMapName = "relabeled-rules-config" + RelabeledRulesConfigMapKey = "config.yaml" + + AlertRelabelConfigSecretName = "alert-relabel-configs" + AlertRelabelConfigSecretKey = "config.yaml" + + PrometheusRuleLabelNamespace = "openshift_io_prometheus_rule_namespace" + PrometheusRuleLabelName = "openshift_io_prometheus_rule_name" + AlertRuleLabelId = "openshift_io_alert_rule_id" + + AppKubernetesIoComponent = "app.kubernetes.io/component" + AppKubernetesIoManagedBy = "app.kubernetes.io/managed-by" + AppKubernetesIoComponentAlertManagementApi = "alert-management-api" + AppKubernetesIoComponentMonitoringPlugin = "monitoring-plugin" +) + +type relabeledRulesManager struct { + queue workqueue.TypedRateLimitingInterface[string] + + namespaceManager NamespaceInterface + prometheusRulesInformer cache.SharedIndexInformer + secretInformer cache.SharedIndexInformer + configMapInformer cache.SharedIndexInformer + clientset *kubernetes.Clientset + + // relabeledRules stores the relabeled rules + relabeledRules map[string]monitoringv1.Rule + relabelConfigs []*relabel.Config + mu sync.RWMutex +} + +func newRelabeledRulesManager(ctx context.Context, namespaceManager NamespaceInterface, monitoringv1clientset *monitoringv1client.Clientset, clientset *kubernetes.Clientset) (*relabeledRulesManager, error) { + prometheusRulesInformer := cache.NewSharedIndexInformer( + prometheusRuleListWatchForAllNamespaces(monitoringv1clientset), + &monitoringv1.PrometheusRule{}, + resyncPeriod, + cache.Indexers{}, + ) + + secretInformer := cache.NewSharedIndexInformer( + alertRelabelConfigSecretListWatch(clientset, ClusterMonitoringNamespace), + &corev1.Secret{}, + resyncPeriod, + cache.Indexers{}, + ) + + configMapInformer := cache.NewSharedIndexInformer( + configMapListWatch(clientset, ClusterMonitoringNamespace), + &corev1.ConfigMap{}, + resyncPeriod, + cache.Indexers{}, + ) + + queue := workqueue.NewTypedRateLimitingQueueWithConfig( + workqueue.NewTypedItemExponentialFailureRateLimiter[string](queueBaseDelay, queueMaxDelay), + workqueue.TypedRateLimitingQueueConfig[string]{Name: "relabeled-rules"}, + ) + + rrm := &relabeledRulesManager{ + queue: queue, + namespaceManager: namespaceManager, + prometheusRulesInformer: prometheusRulesInformer, + secretInformer: secretInformer, + configMapInformer: configMapInformer, + clientset: clientset, + } + + _, err := rrm.prometheusRulesInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + promRule, ok := obj.(*monitoringv1.PrometheusRule) + if !ok { + return + } + log.Debugf("prometheus rule added: %s/%s", promRule.Namespace, promRule.Name) + rrm.queue.Add("prometheus-rule-sync") + }, + UpdateFunc: func(oldObj interface{}, newObj interface{}) { + promRule, ok := newObj.(*monitoringv1.PrometheusRule) + if !ok { + return + } + log.Debugf("prometheus rule updated: %s/%s", promRule.Namespace, promRule.Name) + rrm.queue.Add("prometheus-rule-sync") + }, + DeleteFunc: func(obj interface{}) { + if tombstone, ok := obj.(cache.DeletedFinalStateUnknown); ok { + obj = tombstone.Obj + } + + promRule, ok := obj.(*monitoringv1.PrometheusRule) + if !ok { + return + } + log.Debugf("prometheus rule deleted: %s/%s", promRule.Namespace, promRule.Name) + rrm.queue.Add("prometheus-rule-sync") + }, + }) + if err != nil { + return nil, fmt.Errorf("failed to add event handler to prometheus rules informer: %w", err) + } + + _, err = rrm.secretInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + rrm.queue.Add("secret-sync") + }, + UpdateFunc: func(oldObj interface{}, newObj interface{}) { + rrm.queue.Add("secret-sync") + }, + DeleteFunc: func(obj interface{}) { + rrm.queue.Add("secret-sync") + }, + }) + if err != nil { + return nil, fmt.Errorf("failed to add event handler to secret informer: %w", err) + } + + _, err = rrm.configMapInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + rrm.queue.Add("config-map-sync") + }, + UpdateFunc: func(oldObj interface{}, newObj interface{}) { + rrm.queue.Add("config-map-sync") + }, + DeleteFunc: func(obj interface{}) { + rrm.queue.Add("config-map-sync") + }, + }) + if err != nil { + return nil, fmt.Errorf("failed to add event handler to config map informer: %w", err) + } + + go rrm.prometheusRulesInformer.Run(ctx.Done()) + go rrm.secretInformer.Run(ctx.Done()) + go rrm.configMapInformer.Run(ctx.Done()) + + cache.WaitForNamedCacheSync("RelabeledRulesConfig informer", ctx.Done(), + rrm.prometheusRulesInformer.HasSynced, + rrm.secretInformer.HasSynced, + rrm.configMapInformer.HasSynced, + ) + + go rrm.worker(ctx) + rrm.queue.Add("initial-sync") + + return rrm, nil +} + +func alertRelabelConfigSecretListWatch(clientset *kubernetes.Clientset, namespace string) *cache.ListWatch { + return cache.NewListWatchFromClient( + clientset.CoreV1().RESTClient(), + "secrets", + namespace, + fields.OneTermEqualSelector("metadata.name", AlertRelabelConfigSecretName), + ) +} + +func configMapListWatch(clientset *kubernetes.Clientset, namespace string) *cache.ListWatch { + return cache.NewListWatchFromClient( + clientset.CoreV1().RESTClient(), + "configmaps", + namespace, + fields.OneTermEqualSelector("metadata.name", RelabeledRulesConfigMapName), + ) +} + +func (rrm *relabeledRulesManager) worker(ctx context.Context) { + for rrm.processNextWorkItem(ctx) { + } +} + +func (rrm *relabeledRulesManager) processNextWorkItem(ctx context.Context) bool { + key, quit := rrm.queue.Get() + if quit { + return false + } + + defer rrm.queue.Done(key) + + if err := rrm.sync(ctx, key); err != nil { + log.Errorf("error syncing relabeled rules: %v", err) + rrm.queue.AddRateLimited(key) + return true + } + + rrm.queue.Forget(key) + + return true +} + +func (rrm *relabeledRulesManager) sync(ctx context.Context, key string) error { + if key == "config-map-sync" { + return rrm.reapplyConfigMap(ctx) + } + + relabelConfigs, err := rrm.loadRelabelConfigs() + if err != nil { + return fmt.Errorf("failed to load relabel configs: %w", err) + } + + rrm.mu.Lock() + rrm.relabelConfigs = relabelConfigs + rrm.mu.Unlock() + + alerts := rrm.collectAlerts(relabelConfigs) + + rrm.mu.Lock() + rrm.relabeledRules = alerts + rrm.mu.Unlock() + + return rrm.reapplyConfigMap(ctx) +} + +func (rrm *relabeledRulesManager) reapplyConfigMap(ctx context.Context) error { + rrm.mu.RLock() + defer rrm.mu.RUnlock() + + data, err := yaml.Marshal(rrm.relabeledRules) + if err != nil { + return fmt.Errorf("failed to marshal relabeled rules: %w", err) + } + + configMapData := map[string]string{ + RelabeledRulesConfigMapKey: string(data), + } + + configMapClient := rrm.clientset.CoreV1().ConfigMaps(ClusterMonitoringNamespace) + + existingConfigMap, err := configMapClient.Get(ctx, RelabeledRulesConfigMapName, metav1.GetOptions{}) + if err != nil { + if errors.IsNotFound(err) { + log.Infof("Creating ConfigMap %s with %d relabeled rules", RelabeledRulesConfigMapName, len(rrm.relabeledRules)) + newConfigMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: RelabeledRulesConfigMapName, + Namespace: ClusterMonitoringNamespace, + Labels: map[string]string{ + AppKubernetesIoManagedBy: AppKubernetesIoComponentMonitoringPlugin, + AppKubernetesIoComponent: AppKubernetesIoComponentAlertManagementApi, + }, + }, + Data: configMapData, + } + + if _, err := configMapClient.Create(ctx, newConfigMap, metav1.CreateOptions{}); err != nil { + return fmt.Errorf("failed to create config map: %w", err) + } + + log.Infof("Successfully created ConfigMap %s", RelabeledRulesConfigMapName) + return nil + } + + return fmt.Errorf("failed to get config map: %w", err) + } + + if existingConfigMap.Data[RelabeledRulesConfigMapKey] == configMapData[RelabeledRulesConfigMapKey] { + log.Debugf("ConfigMap %s data unchanged, skipping update", RelabeledRulesConfigMapName) + return nil + } + + log.Infof("Updating ConfigMap %s with %d relabeled rules", RelabeledRulesConfigMapName, len(rrm.relabeledRules)) + existingConfigMap.Data = configMapData + + if _, err := configMapClient.Update(ctx, existingConfigMap, metav1.UpdateOptions{}); err != nil { + return fmt.Errorf("failed to update config map: %w", err) + } + + log.Infof("Successfully updated ConfigMap %s", RelabeledRulesConfigMapName) + return nil +} + +func (rrm *relabeledRulesManager) loadRelabelConfigs() ([]*relabel.Config, error) { + storeKey := fmt.Sprintf("%s/%s", ClusterMonitoringNamespace, AlertRelabelConfigSecretName) + obj, exists, err := rrm.secretInformer.GetStore().GetByKey(storeKey) + if err != nil { + return nil, fmt.Errorf("failed to get secret from store: %w", err) + } + if !exists { + log.Infof("Alert relabel config secret %q not found", storeKey) + return nil, nil + } + + secret, ok := obj.(*corev1.Secret) + if !ok { + return nil, fmt.Errorf("unexpected object type in secret store: %T", obj) + } + + configData, ok := secret.Data[AlertRelabelConfigSecretKey] + if !ok { + return nil, fmt.Errorf("no config data found in secret %q", AlertRelabelConfigSecretName) + } + + var configs []*relabel.Config + if err := yaml.Unmarshal(configData, &configs); err != nil { + return nil, fmt.Errorf("failed to unmarshal relabel configs: %w", err) + } + + for _, config := range configs { + if config.NameValidationScheme == model.UnsetValidation { + config.NameValidationScheme = model.UTF8Validation + } + } + + log.Infof("Loaded %d relabel configs from secret %s", len(configs), storeKey) + return configs, nil +} + +func (rrm *relabeledRulesManager) collectAlerts(relabelConfigs []*relabel.Config) map[string]monitoringv1.Rule { + alerts := make(map[string]monitoringv1.Rule) + + for _, obj := range rrm.prometheusRulesInformer.GetStore().List() { + promRule, ok := obj.(*monitoringv1.PrometheusRule) + if !ok { + continue + } + + // Skip deleted rules + if promRule.DeletionTimestamp != nil { + continue + } + + for _, group := range promRule.Spec.Groups { + for _, rule := range group.Rules { + // Only process alerting rules (skip recording rules) + if rule.Alert == "" { + continue + } + + alertRuleId := alertrule.GetAlertingRuleId(&rule) + + if rule.Labels == nil { + rule.Labels = make(map[string]string) + } + + rule.Labels["alertname"] = rule.Alert + + if rrm.namespaceManager.IsClusterMonitoringNamespace(promRule.Namespace) { + // Relabel the alert labels + relabeledLabels, keep := relabel.Process(labels.FromMap(rule.Labels), relabelConfigs...) + if !keep { + // Alert was dropped by relabeling, skip it + log.Infof("Skipping dropped alert %s from %s/%s", rule.Alert, promRule.Namespace, promRule.Name) + continue + } + + // Update the alert labels + rule.Labels = relabeledLabels.Map() + } + + rule.Labels[AlertRuleLabelId] = alertRuleId + rule.Labels[PrometheusRuleLabelNamespace] = promRule.Namespace + rule.Labels[PrometheusRuleLabelName] = promRule.Name + + alerts[alertRuleId] = rule + } + } + } + + log.Debugf("Collected %d alerts", len(alerts)) + return alerts +} + +func (rrm *relabeledRulesManager) List(ctx context.Context) []monitoringv1.Rule { + rrm.mu.RLock() + defer rrm.mu.RUnlock() + + var result []monitoringv1.Rule + for _, rule := range rrm.relabeledRules { + result = append(result, rule) + } + + return result +} + +func (rrm *relabeledRulesManager) Get(ctx context.Context, id string) (monitoringv1.Rule, bool) { + rrm.mu.RLock() + defer rrm.mu.RUnlock() + + rule, ok := rrm.relabeledRules[id] + if !ok { + return monitoringv1.Rule{}, false + } + + return rule, true +} + +func (rrm *relabeledRulesManager) Config() []*relabel.Config { + rrm.mu.RLock() + defer rrm.mu.RUnlock() + + return append([]*relabel.Config{}, rrm.relabelConfigs...) +} diff --git a/pkg/k8s/types.go b/pkg/k8s/types.go index 550b5114c..6786b6193 100644 --- a/pkg/k8s/types.go +++ b/pkg/k8s/types.go @@ -5,8 +5,8 @@ import ( osmv1 "github.com/openshift/api/monitoring/v1" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "github.com/prometheus/prometheus/model/relabel" "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/tools/cache" ) // ClientOptions holds configuration options for creating a Kubernetes client @@ -27,17 +27,14 @@ type Client interface { // PrometheusRules returns the PrometheusRule interface PrometheusRules() PrometheusRuleInterface - // PrometheusRuleInformer returns the PrometheusRuleInformer interface - PrometheusRuleInformer() PrometheusRuleInformerInterface - // AlertRelabelConfigs returns the AlertRelabelConfig interface AlertRelabelConfigs() AlertRelabelConfigInterface - // AlertRelabelConfigInformer returns the AlertRelabelConfigInformer interface - AlertRelabelConfigInformer() AlertRelabelConfigInformerInterface + // RelabeledRules returns the RelabeledRules interface + RelabeledRules() RelabeledRulesInterface - // NamespaceInformer returns the NamespaceInformer interface - NamespaceInformer() NamespaceInformerInterface + // Namespace returns the Namespace interface + Namespace() NamespaceInterface } // PrometheusAlertsInterface defines operations for managing PrometheusAlerts @@ -64,30 +61,6 @@ type PrometheusRuleInterface interface { AddRule(ctx context.Context, namespacedName types.NamespacedName, groupName string, rule monitoringv1.Rule) error } -// PrometheusRuleInformerInterface defines operations for PrometheusRules informers -type PrometheusRuleInformerInterface interface { - // Run starts the informer and sets up the provided callbacks for add, update, and delete events - Run(ctx context.Context, callbacks PrometheusRuleInformerCallback) error - - // List lists all PrometheusRules in the cluster - List(ctx context.Context, namespace string) ([]monitoringv1.PrometheusRule, error) - - // Get retrieves a PrometheusRule by namespace and name - Get(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) -} - -// PrometheusRuleInformerCallback holds the callback functions for informer events -type PrometheusRuleInformerCallback struct { - // OnAdd is called when a new PrometheusRule is added - OnAdd func(pr *monitoringv1.PrometheusRule) - - // OnUpdate is called when an existing PrometheusRule is updated - OnUpdate func(pr *monitoringv1.PrometheusRule) - - // OnDelete is called when a PrometheusRule is deleted - OnDelete func(key cache.ObjectName) -} - // AlertRelabelConfigInterface defines operations for managing AlertRelabelConfigs type AlertRelabelConfigInterface interface { // List lists all AlertRelabelConfigs in the cluster @@ -106,32 +79,20 @@ type AlertRelabelConfigInterface interface { Delete(ctx context.Context, namespace string, name string) error } -// AlertRelabelConfigInformerInterface defines operations for AlertRelabelConfig informers -type AlertRelabelConfigInformerInterface interface { - // Run starts the informer and sets up the provided callbacks for add, update, and delete events - Run(ctx context.Context, callbacks AlertRelabelConfigInformerCallback) error - - // List lists all AlertRelabelConfigs in the cluster - List(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) - - // Get retrieves an AlertRelabelConfig by namespace and name - Get(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) -} - -// AlertRelabelConfigInformerCallback holds the callback functions for informer events -type AlertRelabelConfigInformerCallback struct { - // OnAdd is called when a new AlertRelabelConfig is added - OnAdd func(arc *osmv1.AlertRelabelConfig) +// RelabeledRulesInterface defines operations for managing relabeled rules +type RelabeledRulesInterface interface { + // List retrieves the relabeled rules for a given PrometheusRule + List(ctx context.Context) []monitoringv1.Rule - // OnUpdate is called when an existing AlertRelabelConfig is updated - OnUpdate func(arc *osmv1.AlertRelabelConfig) + // Get retrieves the relabeled rule for a given id + Get(ctx context.Context, id string) (monitoringv1.Rule, bool) - // OnDelete is called when an AlertRelabelConfig is deleted - OnDelete func(key cache.ObjectName) + // Config returns the list of alert relabel configs + Config() []*relabel.Config } -// NamespaceInformerInterface defines operations for Namespace informers -type NamespaceInformerInterface interface { +// NamespaceInterface defines operations for Namespaces +type NamespaceInterface interface { // IsClusterMonitoringNamespace checks if a namespace has the openshift.io/cluster-monitoring=true label IsClusterMonitoringNamespace(name string) bool } diff --git a/pkg/management/create_user_defined_alert_rule.go b/pkg/management/create_user_defined_alert_rule.go index 403489bcc..17ca070ab 100644 --- a/pkg/management/create_user_defined_alert_rule.go +++ b/pkg/management/create_user_defined_alert_rule.go @@ -4,6 +4,7 @@ import ( "context" "errors" + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" "k8s.io/apimachinery/pkg/types" ) @@ -27,9 +28,8 @@ func (c *client) CreateUserDefinedAlertRule(ctx context.Context, alertRule monit } // Check if rule with the same ID already exists - ruleId := c.mapper.GetAlertingRuleId(&alertRule) - _, err := c.mapper.FindAlertRuleById(ruleId) - if err == nil { + _, found := c.k8sClient.RelabeledRules().Get(ctx, alertrule.GetAlertingRuleId(&alertRule)) + if found { return "", errors.New("alert rule with exact config already exists") } @@ -37,10 +37,10 @@ func (c *client) CreateUserDefinedAlertRule(ctx context.Context, alertRule monit prOptions.GroupName = DefaultGroupName } - err = c.k8sClient.PrometheusRules().AddRule(ctx, nn, prOptions.GroupName, alertRule) + err := c.k8sClient.PrometheusRules().AddRule(ctx, nn, prOptions.GroupName, alertRule) if err != nil { return "", err } - return string(c.mapper.GetAlertingRuleId(&alertRule)), nil + return alertrule.GetAlertingRuleId(&alertRule), nil } diff --git a/pkg/management/create_user_defined_alert_rule_test.go b/pkg/management/create_user_defined_alert_rule_test.go index 4f7253af5..bc6eeb100 100644 --- a/pkg/management/create_user_defined_alert_rule_test.go +++ b/pkg/management/create_user_defined_alert_rule_test.go @@ -10,311 +10,255 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" "github.com/openshift/monitoring-plugin/pkg/k8s" "github.com/openshift/monitoring-plugin/pkg/management" - "github.com/openshift/monitoring-plugin/pkg/management/mapper" "github.com/openshift/monitoring-plugin/pkg/management/testutils" ) var _ = Describe("CreateUserDefinedAlertRule", func() { var ( - ctx context.Context - mockK8s *testutils.MockClient - mockPR *testutils.MockPrometheusRuleInterface - mockMapper *testutils.MockMapperClient - client management.Client + ctx context.Context + mockK8s *testutils.MockClient + client management.Client ) - BeforeEach(func() { - ctx = context.Background() - - mockPR = &testutils.MockPrometheusRuleInterface{} - mockNSInformer := &testutils.MockNamespaceInformerInterface{} - mockNSInformer.SetMonitoringNamespaces(map[string]bool{ - "platform-namespace-1": true, - "platform-namespace-2": true, - }) - mockK8s = &testutils.MockClient{ - PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { - return mockPR + var ( + testRule = monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + For: (*monitoringv1.Duration)(stringPtr("5m")), + Labels: map[string]string{ + "severity": "warning", }, - NamespaceInformerFunc: func() k8s.NamespaceInformerInterface { - return mockNSInformer + Annotations: map[string]string{ + "summary": "Test alert", }, } - mockMapper = &testutils.MockMapperClient{} + ) - client = management.NewWithCustomMapper(ctx, mockK8s, mockMapper) + BeforeEach(func() { + ctx = context.Background() + mockK8s = &testutils.MockClient{} + client = management.New(ctx, mockK8s) }) - Context("when creating a user-defined alert rule", func() { - It("should successfully create with default group name", func() { - By("setting up test data") - alertRule := monitoringv1.Rule{ - Alert: "TestAlert", - Expr: intstr.FromString("up == 0"), - Labels: map[string]string{ - "severity": "warning", - }, - Annotations: map[string]string{ - "summary": "Test alert", - }, - } - + Context("when PrometheusRule Name is not specified", func() { + It("returns an error", func() { prOptions := management.PrometheusRuleOptions{ - Name: "test-rule", Namespace: "test-namespace", } - ruleId := "test-rule-id" - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - return mapper.PrometheusAlertRuleId(ruleId) - } - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return nil, errors.New("not found") - } - - addRuleCalled := false - var capturedGroupName string - mockPR.AddRuleFunc = func(ctx context.Context, nn types.NamespacedName, groupName string, rule monitoringv1.Rule) error { - addRuleCalled = true - capturedGroupName = groupName - Expect(nn.Name).To(Equal("test-rule")) - Expect(nn.Namespace).To(Equal("test-namespace")) - Expect(rule.Alert).To(Equal("TestAlert")) - return nil - } - - By("creating the alert rule") - returnedId, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) - - By("verifying the result") - Expect(err).ToNot(HaveOccurred()) - Expect(returnedId).To(Equal(ruleId)) - Expect(addRuleCalled).To(BeTrue()) - Expect(capturedGroupName).To(Equal("user-defined-rules")) - }) - - It("should successfully create with custom group name", func() { - By("setting up test data") - alertRule := monitoringv1.Rule{ - Alert: "CustomGroupAlert", - Expr: intstr.FromString("memory_usage > 90"), - } - - prOptions := management.PrometheusRuleOptions{ - Name: "custom-rule", - Namespace: "custom-namespace", - GroupName: "custom-group", - } - - ruleId := "custom-rule-id" - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - return mapper.PrometheusAlertRuleId(ruleId) - } - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return nil, errors.New("not found") - } - - var capturedGroupName string - mockPR.AddRuleFunc = func(ctx context.Context, nn types.NamespacedName, groupName string, rule monitoringv1.Rule) error { - capturedGroupName = groupName - return nil - } - - By("creating the alert rule") - returnedId, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) - - By("verifying the result") - Expect(err).ToNot(HaveOccurred()) - Expect(returnedId).To(Equal(ruleId)) - Expect(capturedGroupName).To(Equal("custom-group")) + _, err := client.CreateUserDefinedAlertRule(ctx, testRule, prOptions) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("PrometheusRule Name and Namespace must be specified")) }) + }) - It("should return error when namespace is missing", func() { - By("setting up test data with missing namespace") - alertRule := monitoringv1.Rule{ - Alert: "TestAlert", - Expr: intstr.FromString("up == 0"), - } - + Context("when PrometheusRule Namespace is not specified", func() { + It("returns an error", func() { prOptions := management.PrometheusRuleOptions{ - Name: "test-rule", - Namespace: "", + Name: "test-rule", } - By("attempting to create the alert rule") - _, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) - - By("verifying the error") + _, err := client.CreateUserDefinedAlertRule(ctx, testRule, prOptions) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("PrometheusRule Name and Namespace must be specified")) }) + }) - It("should return error when name is missing", func() { - By("setting up test data with missing name") - alertRule := monitoringv1.Rule{ - Alert: "TestAlert", - Expr: intstr.FromString("up == 0"), + Context("when trying to add rule to platform-managed PrometheusRule", func() { + BeforeEach(func() { + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return name == "openshift-monitoring" + }, + } } - prOptions := management.PrometheusRuleOptions{ - Name: "", - Namespace: "test-namespace", + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } } - - By("attempting to create the alert rule") - _, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) - - By("verifying the error") - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("PrometheusRule Name and Namespace must be specified")) }) - It("should return error when trying to add to platform-managed PrometheusRule", func() { - By("setting up test data with platform-managed PrometheusRule name") - alertRule := monitoringv1.Rule{ - Alert: "TestAlert", - Expr: intstr.FromString("up == 0"), - } - + It("returns an error", func() { prOptions := management.PrometheusRuleOptions{ - Name: "openshift-platform-alerts", - Namespace: "platform-namespace-1", + Name: "platform-rule", + Namespace: "openshift-monitoring", } - // Don't set up mapper - we should fail before mapper check - - By("attempting to create the alert rule") - _, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) - - By("verifying the error") + _, err := client.CreateUserDefinedAlertRule(ctx, testRule, prOptions) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("cannot add user-defined alert rule to a platform-managed PrometheusRule")) }) + }) - It("should return error when rule with same config already exists", func() { - By("setting up test data") - alertRule := monitoringv1.Rule{ - Alert: "DuplicateAlert", - Expr: intstr.FromString("up == 0"), - } + Context("when rule with same ID already exists", func() { + BeforeEach(func() { + ruleId := alertrule.GetAlertingRuleId(&testRule) - prOptions := management.PrometheusRuleOptions{ - Name: "test-rule", - Namespace: "test-namespace", + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return false + }, + } } - ruleId := "duplicate-rule-id" - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - return mapper.PrometheusAlertRuleId(ruleId) - } - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - // Return success, indicating the rule already exists - return &mapper.PrometheusRuleId{ - Namespace: "test-namespace", - Name: "test-rule", - }, nil + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == ruleId { + return testRule, true + } + return monitoringv1.Rule{}, false + }, + } } + }) - By("attempting to create the duplicate alert rule") - _, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) + It("returns an error", func() { + prOptions := management.PrometheusRuleOptions{ + Name: "user-rule", + Namespace: "user-namespace", + } - By("verifying the error") + _, err := client.CreateUserDefinedAlertRule(ctx, testRule, prOptions) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("alert rule with exact config already exists")) }) + }) - It("should return error when AddRule fails", func() { - By("setting up test data") - alertRule := monitoringv1.Rule{ - Alert: "TestAlert", - Expr: intstr.FromString("up == 0"), + Context("when AddRule fails", func() { + BeforeEach(func() { + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return false + }, + } } - prOptions := management.PrometheusRuleOptions{ - Name: "test-rule", - Namespace: "test-namespace", + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } } - ruleId := "test-rule-id" - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - return mapper.PrometheusAlertRuleId(ruleId) - } - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return nil, errors.New("not found") + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + AddRuleFunc: func(ctx context.Context, namespacedName types.NamespacedName, groupName string, rule monitoringv1.Rule) error { + return errors.New("failed to add rule") + }, + } } + }) - expectedError := errors.New("failed to add rule to kubernetes") - mockPR.AddRuleFunc = func(ctx context.Context, nn types.NamespacedName, groupName string, rule monitoringv1.Rule) error { - return expectedError + It("returns the error", func() { + prOptions := management.PrometheusRuleOptions{ + Name: "user-rule", + Namespace: "user-namespace", } - By("attempting to create the alert rule") - _, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) - - By("verifying the error is propagated") + _, err := client.CreateUserDefinedAlertRule(ctx, testRule, prOptions) Expect(err).To(HaveOccurred()) - Expect(err).To(Equal(expectedError)) + Expect(err.Error()).To(ContainSubstring("failed to add rule")) }) }) - Context("when dealing with edge cases", func() { - It("should handle alert rule with no labels or annotations", func() { - By("setting up minimal alert rule") - alertRule := monitoringv1.Rule{ - Alert: "MinimalAlert", - Expr: intstr.FromString("up == 0"), + Context("when successfully creating a rule", func() { + BeforeEach(func() { + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return false + }, + } } - prOptions := management.PrometheusRuleOptions{ - Name: "minimal-rule", - Namespace: "test-namespace", + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } } - ruleId := "minimal-rule-id" - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - return mapper.PrometheusAlertRuleId(ruleId) + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + AddRuleFunc: func(ctx context.Context, namespacedName types.NamespacedName, groupName string, rule monitoringv1.Rule) error { + return nil + }, + } } - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return nil, errors.New("not found") + }) + + It("returns the rule ID", func() { + prOptions := management.PrometheusRuleOptions{ + Name: "user-rule", + Namespace: "user-namespace", } - addRuleCalled := false - mockPR.AddRuleFunc = func(ctx context.Context, nn types.NamespacedName, groupName string, rule monitoringv1.Rule) error { - addRuleCalled = true - Expect(rule.Labels).To(BeNil()) - Expect(rule.Annotations).To(BeNil()) - return nil + ruleId, err := client.CreateUserDefinedAlertRule(ctx, testRule, prOptions) + Expect(err).NotTo(HaveOccurred()) + Expect(ruleId).NotTo(BeEmpty()) + Expect(ruleId).To(Equal(alertrule.GetAlertingRuleId(&testRule))) + }) + + It("uses default group name when not specified", func() { + var capturedGroupName string + + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + AddRuleFunc: func(ctx context.Context, namespacedName types.NamespacedName, groupName string, rule monitoringv1.Rule) error { + capturedGroupName = groupName + return nil + }, + } } - By("creating the minimal alert rule") - returnedId, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) + prOptions := management.PrometheusRuleOptions{ + Name: "user-rule", + Namespace: "user-namespace", + } - By("verifying the result") - Expect(err).ToNot(HaveOccurred()) - Expect(returnedId).To(Equal(ruleId)) - Expect(addRuleCalled).To(BeTrue()) + _, err := client.CreateUserDefinedAlertRule(ctx, testRule, prOptions) + Expect(err).NotTo(HaveOccurred()) + Expect(capturedGroupName).To(Equal("user-defined-rules")) }) - It("should reject PrometheusRules in cluster monitoring namespaces", func() { - By("setting up test data with cluster monitoring namespace") - alertRule := monitoringv1.Rule{ - Alert: "TestAlert", - Expr: intstr.FromString("up == 0"), + It("uses custom group name when specified", func() { + var capturedGroupName string + + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + AddRuleFunc: func(ctx context.Context, namespacedName types.NamespacedName, groupName string, rule monitoringv1.Rule) error { + capturedGroupName = groupName + return nil + }, + } } prOptions := management.PrometheusRuleOptions{ - Name: "custom-rule", - Namespace: "platform-namespace-1", + Name: "user-rule", + Namespace: "user-namespace", + GroupName: "custom-group", } - By("attempting to create the alert rule") - _, err := client.CreateUserDefinedAlertRule(ctx, alertRule, prOptions) - - By("verifying the error") - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("cannot add user-defined alert rule to a platform-managed PrometheusRule")) + _, err := client.CreateUserDefinedAlertRule(ctx, testRule, prOptions) + Expect(err).NotTo(HaveOccurred()) + Expect(capturedGroupName).To(Equal("custom-group")) }) }) }) + +func stringPtr(s string) *string { + return &s +} diff --git a/pkg/management/delete_user_defined_alert_rule_by_id.go b/pkg/management/delete_user_defined_alert_rule_by_id.go index 713a93906..6431a915a 100644 --- a/pkg/management/delete_user_defined_alert_rule_by_id.go +++ b/pkg/management/delete_user_defined_alert_rule_by_id.go @@ -4,29 +4,32 @@ import ( "context" "fmt" + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" + "github.com/openshift/monitoring-plugin/pkg/k8s" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" "k8s.io/apimachinery/pkg/types" - - "github.com/openshift/monitoring-plugin/pkg/management/mapper" ) func (c *client) DeleteUserDefinedAlertRuleById(ctx context.Context, alertRuleId string) error { - prId, err := c.mapper.FindAlertRuleById(mapper.PrometheusAlertRuleId(alertRuleId)) - if err != nil { + rule, found := c.k8sClient.RelabeledRules().Get(ctx, alertRuleId) + if !found { return &NotFoundError{Resource: "AlertRule", Id: alertRuleId} } - if c.IsPlatformAlertRule(types.NamespacedName(*prId)) { + namespace := rule.Labels[k8s.PrometheusRuleLabelNamespace] + name := rule.Labels[k8s.PrometheusRuleLabelName] + + if c.IsPlatformAlertRule(types.NamespacedName{Namespace: namespace, Name: name}) { return &NotAllowedError{Message: "cannot delete alert rule from a platform-managed PrometheusRule"} } - pr, found, err := c.k8sClient.PrometheusRules().Get(ctx, prId.Namespace, prId.Name) + pr, found, err := c.k8sClient.PrometheusRules().Get(ctx, namespace, name) if err != nil { return err } if !found { - return &NotFoundError{Resource: "PrometheusRule", Id: fmt.Sprintf("%s/%s", prId.Namespace, prId.Name)} + return &NotFoundError{Resource: "PrometheusRule", Id: fmt.Sprintf("%s/%s", namespace, name)} } updated := false @@ -63,7 +66,7 @@ func (c *client) DeleteUserDefinedAlertRuleById(ctx context.Context, alertRuleId return nil } - return &NotFoundError{Resource: "PrometheusRule", Id: fmt.Sprintf("%s/%s", pr.Namespace, pr.Name)} + return &NotFoundError{Resource: "AlertRule", Id: alertRuleId, AdditionalInfo: "rule not found in the given PrometheusRule"} } func (c *client) filterRulesById(rules []monitoringv1.Rule, alertRuleId string, updated *bool) []monitoringv1.Rule { @@ -81,5 +84,5 @@ func (c *client) filterRulesById(rules []monitoringv1.Rule, alertRuleId string, } func (c *client) shouldDeleteRule(rule monitoringv1.Rule, alertRuleId string) bool { - return alertRuleId == string(c.mapper.GetAlertingRuleId(&rule)) + return alertRuleId == alertrule.GetAlertingRuleId(&rule) } diff --git a/pkg/management/delete_user_defined_alert_rule_by_id_test.go b/pkg/management/delete_user_defined_alert_rule_by_id_test.go index f0f2f5731..7b8d63e8c 100644 --- a/pkg/management/delete_user_defined_alert_rule_by_id_test.go +++ b/pkg/management/delete_user_defined_alert_rule_by_id_test.go @@ -3,533 +3,449 @@ package management_test import ( "context" "errors" - "fmt" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/intstr" + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" "github.com/openshift/monitoring-plugin/pkg/k8s" "github.com/openshift/monitoring-plugin/pkg/management" - "github.com/openshift/monitoring-plugin/pkg/management/mapper" "github.com/openshift/monitoring-plugin/pkg/management/testutils" ) var _ = Describe("DeleteUserDefinedAlertRuleById", func() { var ( - ctx context.Context - mockK8s *testutils.MockClient - mockPR *testutils.MockPrometheusRuleInterface - mockMapper *testutils.MockMapperClient - client management.Client + ctx context.Context + mockK8s *testutils.MockClient + client management.Client ) - BeforeEach(func() { - ctx = context.Background() + var ( + userRule1 = monitoringv1.Rule{ + Alert: "UserAlert1", + Labels: map[string]string{ + k8s.PrometheusRuleLabelNamespace: "user-namespace", + k8s.PrometheusRuleLabelName: "user-rule", + }, + } + userRule1Id = alertrule.GetAlertingRuleId(&userRule1) - mockPR = &testutils.MockPrometheusRuleInterface{} - mockNSInformer := &testutils.MockNamespaceInformerInterface{} - mockNSInformer.SetMonitoringNamespaces(map[string]bool{ - "platform-namespace-1": true, - "platform-namespace-2": true, - }) - mockK8s = &testutils.MockClient{ - PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { - return mockPR + userRule2 = monitoringv1.Rule{ + Alert: "UserAlert2", + Labels: map[string]string{ + k8s.PrometheusRuleLabelNamespace: "user-namespace", + k8s.PrometheusRuleLabelName: "user-rule", }, - NamespaceInformerFunc: func() k8s.NamespaceInformerInterface { - return mockNSInformer + } + + platformRule = monitoringv1.Rule{ + Alert: "PlatformAlert", + Labels: map[string]string{ + k8s.PrometheusRuleLabelNamespace: "openshift-monitoring", + k8s.PrometheusRuleLabelName: "platform-rule", }, } - mockMapper = &testutils.MockMapperClient{} + platformRuleId = alertrule.GetAlertingRuleId(&platformRule) + ) - client = management.NewWithCustomMapper(ctx, mockK8s, mockMapper) + BeforeEach(func() { + ctx = context.Background() + mockK8s = &testutils.MockClient{} + client = management.New(ctx, mockK8s) }) - Context("when deleting a user-defined alert rule", func() { - It("should delete rule from multi-rule PrometheusRule and update", func() { - By("setting up PrometheusRule with 3 rules in 2 groups") - rule1 := monitoringv1.Rule{ - Alert: "Alert1", - Expr: intstr.FromString("up == 0"), - } - rule2 := monitoringv1.Rule{ - Alert: "Alert2", - Expr: intstr.FromString("cpu_usage > 80"), - } - rule3 := monitoringv1.Rule{ - Alert: "Alert3", - Expr: intstr.FromString("memory_usage > 90"), - } - - prometheusRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "multi-rule", - Namespace: "test-namespace", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "group1", - Rules: []monitoringv1.Rule{rule1, rule2}, - }, - { - Name: "group2", - Rules: []monitoringv1.Rule{rule3}, - }, + Context("when rule is not found in RelabeledRules", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false }, - }, - } - - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "test-namespace/multi-rule": prometheusRule, - }) - - alertRuleId := "alert2-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "test-namespace", - Name: "multi-rule", - }, nil - } - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - if alertRule.Alert == "Alert2" { - return mapper.PrometheusAlertRuleId(alertRuleId) } - return mapper.PrometheusAlertRuleId("other-id") } + }) - By("deleting the middle rule") - err := client.DeleteUserDefinedAlertRuleById(ctx, alertRuleId) - Expect(err).ToNot(HaveOccurred()) + It("returns NotFoundError", func() { + err := client.DeleteUserDefinedAlertRuleById(ctx, "nonexistent-id") + Expect(err).To(HaveOccurred()) - By("verifying PrometheusRule was updated, not deleted") - updatedPR, found, err := mockPR.Get(ctx, "test-namespace", "multi-rule") - Expect(err).ToNot(HaveOccurred()) - Expect(found).To(BeTrue()) - Expect(updatedPR.Spec.Groups).To(HaveLen(2)) - Expect(updatedPR.Spec.Groups[0].Rules).To(HaveLen(1)) - Expect(updatedPR.Spec.Groups[0].Rules[0].Alert).To(Equal("Alert1")) - Expect(updatedPR.Spec.Groups[1].Rules).To(HaveLen(1)) - Expect(updatedPR.Spec.Groups[1].Rules[0].Alert).To(Equal("Alert3")) + var notFoundErr *management.NotFoundError + Expect(errors.As(err, ¬FoundErr)).To(BeTrue()) + Expect(notFoundErr.Resource).To(Equal("AlertRule")) + Expect(notFoundErr.Id).To(Equal("nonexistent-id")) }) + }) - It("should delete entire PrometheusRule when deleting the last rule", func() { - By("setting up PrometheusRule with single rule") - rule := monitoringv1.Rule{ - Alert: "OnlyAlert", - Expr: intstr.FromString("up == 0"), - } - - prometheusRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "single-rule", - Namespace: "test-namespace", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "group1", - Rules: []monitoringv1.Rule{rule}, - }, + Context("when trying to delete a platform rule", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == platformRuleId { + return platformRule, true + } + return monitoringv1.Rule{}, false }, - }, - } - - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "test-namespace/single-rule": prometheusRule, - }) - - alertRuleId := "only-alert-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "test-namespace", - Name: "single-rule", - }, nil - } - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - return mapper.PrometheusAlertRuleId(alertRuleId) + } } - deleteCalled := false - mockPR.DeleteFunc = func(ctx context.Context, namespace, name string) error { - deleteCalled = true - Expect(namespace).To(Equal("test-namespace")) - Expect(name).To(Equal("single-rule")) - return nil + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return name == "openshift-monitoring" + }, + } } + }) - By("deleting the only rule") - err := client.DeleteUserDefinedAlertRuleById(ctx, alertRuleId) - Expect(err).ToNot(HaveOccurred()) + It("returns NotAllowedError", func() { + err := client.DeleteUserDefinedAlertRuleById(ctx, platformRuleId) + Expect(err).To(HaveOccurred()) - By("verifying PrometheusRule was deleted") - Expect(deleteCalled).To(BeTrue()) + var notAllowedErr *management.NotAllowedError + Expect(errors.As(err, ¬AllowedErr)).To(BeTrue()) + Expect(notAllowedErr.Message).To(ContainSubstring("cannot delete alert rule from a platform-managed PrometheusRule")) }) + }) - It("should remove empty group when deleting its only rule", func() { - By("setting up PrometheusRule with 2 groups, one with single rule") - rule1 := monitoringv1.Rule{ - Alert: "Alert1", - Expr: intstr.FromString("up == 0"), - } - rule2 := monitoringv1.Rule{ - Alert: "Alert2", - Expr: intstr.FromString("cpu_usage > 80"), - } - rule3 := monitoringv1.Rule{ - Alert: "SingleRuleInGroup", - Expr: intstr.FromString("memory_usage > 90"), + Context("when PrometheusRule is not found", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == userRule1Id { + return userRule1, true + } + return monitoringv1.Rule{}, false + }, + } } - prometheusRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "multi-group", - Namespace: "test-namespace", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "group1", - Rules: []monitoringv1.Rule{rule1, rule2}, - }, - { - Name: "group2", - Rules: []monitoringv1.Rule{rule3}, - }, + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return false }, - }, + } } - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "test-namespace/multi-group": prometheusRule, - }) - - alertRuleId := "single-rule-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "test-namespace", - Name: "multi-group", - }, nil - } - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - if alertRule.Alert == "SingleRuleInGroup" { - return mapper.PrometheusAlertRuleId(alertRuleId) + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return nil, false, nil + }, } - return mapper.PrometheusAlertRuleId("other-id") } + }) - By("deleting the single rule from group2") - err := client.DeleteUserDefinedAlertRuleById(ctx, alertRuleId) - Expect(err).ToNot(HaveOccurred()) + It("returns NotFoundError", func() { + err := client.DeleteUserDefinedAlertRuleById(ctx, userRule1Id) + Expect(err).To(HaveOccurred()) - By("verifying group2 was removed and group1 remains") - updatedPR, found, err := mockPR.Get(ctx, "test-namespace", "multi-group") - Expect(found).To(BeTrue()) - Expect(err).ToNot(HaveOccurred()) - Expect(updatedPR.Spec.Groups).To(HaveLen(1)) - Expect(updatedPR.Spec.Groups[0].Name).To(Equal("group1")) - Expect(updatedPR.Spec.Groups[0].Rules).To(HaveLen(2)) + var notFoundErr *management.NotFoundError + Expect(errors.As(err, ¬FoundErr)).To(BeTrue()) + Expect(notFoundErr.Resource).To(Equal("PrometheusRule")) }) + }) - It("should delete only the exact matching rule", func() { - By("setting up PrometheusRule with similar rules") - rule1 := monitoringv1.Rule{ - Alert: "TestAlert", - Expr: intstr.FromString("up == 0"), - Labels: map[string]string{ - "severity": "warning", - }, - } - rule2 := monitoringv1.Rule{ - Alert: "TestAlert", - Expr: intstr.FromString("up == 0"), - Labels: map[string]string{ - "severity": "critical", - }, + Context("when PrometheusRule Get returns an error", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == userRule1Id { + return userRule1, true + } + return monitoringv1.Rule{}, false + }, + } } - prometheusRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "similar-rules", - Namespace: "test-namespace", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "group1", - Rules: []monitoringv1.Rule{rule1, rule2}, - }, + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return false }, - }, + } } - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "test-namespace/similar-rules": prometheusRule, - }) - - targetRuleId := "target-rule-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "test-namespace", - Name: "similar-rules", - }, nil - } - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - // Only rule1 matches the target ID - if alertRule.Alert == "TestAlert" && alertRule.Labels["severity"] == "warning" { - return mapper.PrometheusAlertRuleId(targetRuleId) + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return nil, false, errors.New("failed to get PrometheusRule") + }, } - return mapper.PrometheusAlertRuleId("other-id") } - - By("deleting the specific rule") - err := client.DeleteUserDefinedAlertRuleById(ctx, targetRuleId) - Expect(err).ToNot(HaveOccurred()) - - By("verifying only the exact matching rule was deleted") - updatedPR, found, err := mockPR.Get(ctx, "test-namespace", "similar-rules") - Expect(found).To(BeTrue()) - Expect(err).ToNot(HaveOccurred()) - Expect(updatedPR.Spec.Groups[0].Rules).To(HaveLen(1)) - Expect(updatedPR.Spec.Groups[0].Rules[0].Labels["severity"]).To(Equal("critical")) }) - }) - - Context("when handling errors", func() { - It("should return error when rule not found in mapper", func() { - By("configuring mapper to return error") - alertRuleId := "nonexistent-rule-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return nil, errors.New("alert rule not found") - } - By("attempting to delete the rule") - err := client.DeleteUserDefinedAlertRuleById(ctx, alertRuleId) - - By("verifying error is returned") + It("returns the error", func() { + err := client.DeleteUserDefinedAlertRuleById(ctx, userRule1Id) Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("AlertRule with id nonexistent-rule-id not found")) + Expect(err.Error()).To(ContainSubstring("failed to get PrometheusRule")) }) + }) - It("should return error when trying to delete from platform-managed PrometheusRule", func() { - By("configuring mapper to return platform PrometheusRule") - alertRuleId := "platform-rule-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "platform-namespace-1", - Name: "openshift-platform-alerts", - }, nil + Context("when rule is not found in PrometheusRule", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == userRule1Id { + return userRule1, true + } + return monitoringv1.Rule{}, false + }, + } } - By("attempting to delete the rule") - err := client.DeleteUserDefinedAlertRuleById(ctx, alertRuleId) - - By("verifying error is returned") - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("cannot delete alert rule from a platform-managed PrometheusRule")) - }) - - It("should return error when PrometheusRule Get fails", func() { - By("configuring Get to return error") - alertRuleId := "test-rule-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "test-namespace", - Name: "test-rule", - }, nil + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return false + }, + } } - mockPR.GetFunc = func(ctx context.Context, namespace, name string) (*monitoringv1.PrometheusRule, bool, error) { - return nil, false, errors.New("failed to get PrometheusRule") + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "test-group", + Rules: []monitoringv1.Rule{userRule2}, + }, + }, + }, + }, true, nil + }, + } } + }) - By("attempting to delete the rule") - err := client.DeleteUserDefinedAlertRuleById(ctx, alertRuleId) - - By("verifying error is returned") + It("returns NotFoundError", func() { + err := client.DeleteUserDefinedAlertRuleById(ctx, userRule1Id) Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("failed to get PrometheusRule")) + + var notFoundErr *management.NotFoundError + Expect(errors.As(err, ¬FoundErr)).To(BeTrue()) + Expect(notFoundErr.Resource).To(Equal("AlertRule")) + Expect(notFoundErr.Id).To(Equal(userRule1Id)) }) + }) - It("should return error when PrometheusRule Update fails", func() { - By("setting up PrometheusRule with 2 rules") - rule1 := monitoringv1.Rule{ - Alert: "Alert1", - Expr: intstr.FromString("up == 0"), - } - rule2 := monitoringv1.Rule{ - Alert: "Alert2", - Expr: intstr.FromString("cpu_usage > 80"), + Context("when deleting the only rule", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == userRule1Id { + return userRule1, true + } + return monitoringv1.Rule{}, false + }, + } } - prometheusRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-rule", - Namespace: "test-namespace", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "group1", - Rules: []monitoringv1.Rule{rule1, rule2}, - }, + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return false }, - }, + } } - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "test-namespace/test-rule": prometheusRule, - }) - - alertRuleId := "alert2-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "test-namespace", - Name: "test-rule", - }, nil - } - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - if alertRule.Alert == "Alert2" { - return mapper.PrometheusAlertRuleId(alertRuleId) + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "test-group", + Rules: []monitoringv1.Rule{userRule1}, + }, + }, + }, + }, true, nil + }, + DeleteFunc: func(ctx context.Context, namespace string, name string) error { + return nil + }, } - return mapper.PrometheusAlertRuleId("other-id") } + }) - mockPR.UpdateFunc = func(ctx context.Context, pr monitoringv1.PrometheusRule) error { - return fmt.Errorf("kubernetes update error") + It("deletes the entire PrometheusRule", func() { + var deleteCalled bool + + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "test-group", + Rules: []monitoringv1.Rule{userRule1}, + }, + }, + }, + }, true, nil + }, + DeleteFunc: func(ctx context.Context, namespace string, name string) error { + deleteCalled = true + return nil + }, + } } - By("attempting to delete the rule") - err := client.DeleteUserDefinedAlertRuleById(ctx, alertRuleId) - - By("verifying error is returned") - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("failed to update PrometheusRule")) - Expect(err.Error()).To(ContainSubstring("kubernetes update error")) + err := client.DeleteUserDefinedAlertRuleById(ctx, userRule1Id) + Expect(err).NotTo(HaveOccurred()) + Expect(deleteCalled).To(BeTrue()) }) + }) - It("should return error when PrometheusRule Delete fails", func() { - By("setting up PrometheusRule with single rule") - rule := monitoringv1.Rule{ - Alert: "OnlyAlert", - Expr: intstr.FromString("up == 0"), - } - - prometheusRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "single-rule", - Namespace: "test-namespace", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "group1", - Rules: []monitoringv1.Rule{rule}, - }, + Context("when deleting one of multiple rules", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == userRule1Id { + return userRule1, true + } + return monitoringv1.Rule{}, false }, - }, + } } - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "test-namespace/single-rule": prometheusRule, - }) - - alertRuleId := "only-alert-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "test-namespace", - Name: "single-rule", - }, nil - } - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - return mapper.PrometheusAlertRuleId(alertRuleId) + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return false + }, + } } + }) - mockPR.DeleteFunc = func(ctx context.Context, namespace, name string) error { - return fmt.Errorf("kubernetes delete error") + It("updates the PrometheusRule with remaining rules", func() { + var updateCalled bool + var updatedPR *monitoringv1.PrometheusRule + + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "test-group", + Rules: []monitoringv1.Rule{userRule1, userRule2}, + }, + }, + }, + }, true, nil + }, + UpdateFunc: func(ctx context.Context, pr monitoringv1.PrometheusRule) error { + updateCalled = true + updatedPR = &pr + return nil + }, + } } - By("attempting to delete the rule") - err := client.DeleteUserDefinedAlertRuleById(ctx, alertRuleId) - - By("verifying error is returned") - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("failed to delete PrometheusRule")) - Expect(err.Error()).To(ContainSubstring("kubernetes delete error")) + err := client.DeleteUserDefinedAlertRuleById(ctx, userRule1Id) + Expect(err).NotTo(HaveOccurred()) + Expect(updateCalled).To(BeTrue()) + Expect(updatedPR.Spec.Groups).To(HaveLen(1)) + Expect(updatedPR.Spec.Groups[0].Rules).To(HaveLen(1)) + Expect(updatedPR.Spec.Groups[0].Rules[0].Alert).To(Equal("UserAlert2")) }) }) - Context("when handling edge cases", func() { - It("should handle PrometheusRule with multiple groups correctly", func() { - By("setting up PrometheusRule with 3 groups") - rule1 := monitoringv1.Rule{ - Alert: "Alert1", - Expr: intstr.FromString("up == 0"), - } - rule2 := monitoringv1.Rule{ - Alert: "Alert2", - Expr: intstr.FromString("cpu_usage > 80"), - } - rule3 := monitoringv1.Rule{ - Alert: "Alert3", - Expr: intstr.FromString("memory_usage > 90"), + Context("when deleting all rules from a group", func() { + It("removes the empty group", func() { + anotherRule := monitoringv1.Rule{ + Alert: "AnotherAlert", + Labels: map[string]string{ + k8s.PrometheusRuleLabelNamespace: "user-namespace", + k8s.PrometheusRuleLabelName: "user-rule", + }, } - prometheusRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "multi-group", - Namespace: "test-namespace", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "group1", - Rules: []monitoringv1.Rule{rule1}, - }, - { - Name: "group2", - Rules: []monitoringv1.Rule{rule2}, - }, - { - Name: "group3", - Rules: []monitoringv1.Rule{rule3}, - }, + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == userRule1Id { + return userRule1, true + } + return monitoringv1.Rule{}, false }, - }, + } } - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "test-namespace/multi-group": prometheusRule, - }) - - alertRuleId := "alert2-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "test-namespace", - Name: "multi-group", - }, nil + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return false + }, + } } - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - if alertRule.Alert == "Alert2" { - return mapper.PrometheusAlertRuleId(alertRuleId) + + var updatedPR *monitoringv1.PrometheusRule + + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "group-to-be-empty", + Rules: []monitoringv1.Rule{userRule1}, + }, + { + Name: "group-with-rules", + Rules: []monitoringv1.Rule{anotherRule}, + }, + }, + }, + }, true, nil + }, + UpdateFunc: func(ctx context.Context, pr monitoringv1.PrometheusRule) error { + updatedPR = &pr + return nil + }, } - return mapper.PrometheusAlertRuleId("other-id") } - By("deleting rule from middle group") - err := client.DeleteUserDefinedAlertRuleById(ctx, alertRuleId) - Expect(err).ToNot(HaveOccurred()) - - By("verifying middle group was removed") - updatedPR, found, err := mockPR.Get(ctx, "test-namespace", "multi-group") - Expect(found).To(BeTrue()) - Expect(err).ToNot(HaveOccurred()) - Expect(updatedPR.Spec.Groups).To(HaveLen(2)) - Expect(updatedPR.Spec.Groups[0].Name).To(Equal("group1")) - Expect(updatedPR.Spec.Groups[1].Name).To(Equal("group3")) + err := client.DeleteUserDefinedAlertRuleById(ctx, userRule1Id) + Expect(err).NotTo(HaveOccurred()) + Expect(updatedPR.Spec.Groups).To(HaveLen(1)) + Expect(updatedPR.Spec.Groups[0].Name).To(Equal("group-with-rules")) }) }) }) diff --git a/pkg/management/errors.go b/pkg/management/errors.go index a175acdc8..66292fc4e 100644 --- a/pkg/management/errors.go +++ b/pkg/management/errors.go @@ -5,10 +5,18 @@ import "fmt" type NotFoundError struct { Resource string Id string + + AdditionalInfo string } func (r *NotFoundError) Error() string { - return fmt.Sprintf("%s with id %s not found", r.Resource, r.Id) + s := fmt.Sprintf("%s with id %s not found", r.Resource, r.Id) + + if r.AdditionalInfo != "" { + s += fmt.Sprintf(": %s", r.AdditionalInfo) + } + + return s } type NotAllowedError struct { diff --git a/pkg/management/get_alerts.go b/pkg/management/get_alerts.go index ec0c3976d..0aebeff7c 100644 --- a/pkg/management/get_alerts.go +++ b/pkg/management/get_alerts.go @@ -4,7 +4,8 @@ import ( "context" "fmt" - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/relabel" "github.com/openshift/monitoring-plugin/pkg/k8s" ) @@ -15,39 +16,19 @@ func (c *client) GetAlerts(ctx context.Context, req k8s.GetAlertsRequest) ([]k8s return nil, fmt.Errorf("failed to get prometheus alerts: %w", err) } + configs := c.k8sClient.RelabeledRules().Config() + var result []k8s.PrometheusAlert for _, alert := range alerts { - // Apply relabel configurations to the alert - updatedAlert, err := c.updateAlertBasedOnRelabelConfig(&alert) - if err != nil { - // Alert was dropped by relabel config, skip it + + relabels, keep := relabel.Process(labels.FromMap(alert.Labels), configs...) + if !keep { continue } - result = append(result, updatedAlert) - } - return result, nil -} - -func (c *client) updateAlertBasedOnRelabelConfig(alert *k8s.PrometheusAlert) (k8s.PrometheusAlert, error) { - // Create a temporary rule to match relabel configs - rule := &monitoringv1.Rule{ - Alert: alert.Labels["alertname"], - Labels: alert.Labels, + alert.Labels = relabels.Map() + result = append(result, alert) } - configs := c.mapper.GetAlertRelabelConfigSpec(rule) - - updatedLabels, err := applyRelabelConfigs(string(rule.Alert), alert.Labels, configs) - if err != nil { - return k8s.PrometheusAlert{}, err - } - - alert.Labels = updatedLabels - // Update severity if it was changed - if severity, exists := updatedLabels["severity"]; exists { - alert.Labels["severity"] = severity - } - - return *alert, nil + return result, nil } diff --git a/pkg/management/get_alerts_test.go b/pkg/management/get_alerts_test.go index 428303b37..a9f9732d1 100644 --- a/pkg/management/get_alerts_test.go +++ b/pkg/management/get_alerts_test.go @@ -3,12 +3,10 @@ package management_test import ( "context" "errors" - "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - osmv1 "github.com/openshift/api/monitoring/v1" - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "github.com/prometheus/prometheus/model/relabel" "github.com/openshift/monitoring-plugin/pkg/k8s" "github.com/openshift/monitoring-plugin/pkg/management" @@ -17,106 +15,141 @@ import ( var _ = Describe("GetAlerts", func() { var ( - ctx context.Context - mockK8s *testutils.MockClient - mockAlerts *testutils.MockPrometheusAlertsInterface - mockMapper *testutils.MockMapperClient - client management.Client - testTime time.Time + ctx context.Context + mockK8s *testutils.MockClient + client management.Client ) BeforeEach(func() { ctx = context.Background() - testTime = time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC) - - mockAlerts = &testutils.MockPrometheusAlertsInterface{} - mockK8s = &testutils.MockClient{ - PrometheusAlertsFunc: func() k8s.PrometheusAlertsInterface { - return mockAlerts - }, - } - mockMapper = &testutils.MockMapperClient{} - - client = management.NewWithCustomMapper(ctx, mockK8s, mockMapper) + mockK8s = &testutils.MockClient{} + client = management.New(ctx, mockK8s) }) - It("should return alerts unchanged when no relabel configs exist", func() { - mockAlerts.SetActiveAlerts([]k8s.PrometheusAlert{ - {Labels: map[string]string{"alertname": "HighCPU", "severity": "warning"}, State: "firing", ActiveAt: testTime}, - {Labels: map[string]string{"alertname": "HighMemory", "severity": "critical"}, State: "pending", ActiveAt: testTime}, + Context("when PrometheusAlerts returns an error", func() { + BeforeEach(func() { + mockK8s.PrometheusAlertsFunc = func() k8s.PrometheusAlertsInterface { + return &testutils.MockPrometheusAlertsInterface{ + GetAlertsFunc: func(ctx context.Context, req k8s.GetAlertsRequest) ([]k8s.PrometheusAlert, error) { + return nil, errors.New("failed to get alerts") + }, + } + } }) - mockMapper.GetAlertRelabelConfigSpecFunc = func(*monitoringv1.Rule) []osmv1.RelabelConfig { return nil } - - result, err := client.GetAlerts(ctx, k8s.GetAlertsRequest{}) - Expect(err).ToNot(HaveOccurred()) - Expect(result).To(HaveLen(2)) - Expect(result[0].Labels["alertname"]).To(Equal("HighCPU")) - Expect(result[1].Labels["alertname"]).To(Equal("HighMemory")) - }) - - It("should apply Replace relabel actions correctly", func() { - mockAlerts.SetActiveAlerts([]k8s.PrometheusAlert{ - { - Labels: map[string]string{"alertname": "TestAlert", "severity": "warning", "team": "platform"}, - State: "firing", - }, + It("returns an error", func() { + req := k8s.GetAlertsRequest{} + _, err := client.GetAlerts(ctx, req) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to get prometheus alerts")) }) - mockMapper.GetAlertRelabelConfigSpecFunc = func(rule *monitoringv1.Rule) []osmv1.RelabelConfig { - return []osmv1.RelabelConfig{ - {TargetLabel: "severity", Replacement: "critical", Action: "Replace"}, - {TargetLabel: "team", Replacement: "infrastructure", Action: "Replace"}, - {TargetLabel: "reviewed", Replacement: "true", Action: "Replace"}, - } - } - - result, err := client.GetAlerts(ctx, k8s.GetAlertsRequest{}) - - Expect(err).ToNot(HaveOccurred()) - Expect(result).To(HaveLen(1)) - Expect(result[0].Labels).To(HaveKeyWithValue("severity", "critical")) - Expect(result[0].Labels).To(HaveKeyWithValue("team", "infrastructure")) - Expect(result[0].Labels).To(HaveKeyWithValue("reviewed", "true")) }) - It("should filter out alerts with Drop action", func() { - mockAlerts.SetActiveAlerts([]k8s.PrometheusAlert{ - {Labels: map[string]string{"alertname": "KeepAlert", "severity": "warning"}, State: "firing", ActiveAt: testTime}, - {Labels: map[string]string{"alertname": "DropAlert", "severity": "info"}, State: "firing", ActiveAt: testTime}, - }) - mockMapper.GetAlertRelabelConfigSpecFunc = func(rule *monitoringv1.Rule) []osmv1.RelabelConfig { - if rule.Alert == "DropAlert" { - return []osmv1.RelabelConfig{{Action: "Drop"}} + Context("when PrometheusAlerts returns alerts", func() { + var ( + alert1 = k8s.PrometheusAlert{ + Labels: map[string]string{ + "alertname": "Alert1", + "severity": "warning", + "namespace": "default", + }, + State: "firing", } - return nil - } - - result, err := client.GetAlerts(ctx, k8s.GetAlertsRequest{}) + alert2 = k8s.PrometheusAlert{ + Labels: map[string]string{ + "alertname": "Alert2", + "severity": "critical", + "namespace": "kube-system", + }, + State: "pending", + } + ) + + Context("without relabel configs", func() { + BeforeEach(func() { + mockK8s.PrometheusAlertsFunc = func() k8s.PrometheusAlertsInterface { + return &testutils.MockPrometheusAlertsInterface{ + GetAlertsFunc: func(ctx context.Context, req k8s.GetAlertsRequest) ([]k8s.PrometheusAlert, error) { + return []k8s.PrometheusAlert{alert1, alert2}, nil + }, + } + } + + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + ConfigFunc: func() []*relabel.Config { + return []*relabel.Config{} + }, + } + } + }) + + It("returns all alerts without modification", func() { + req := k8s.GetAlertsRequest{} + alerts, err := client.GetAlerts(ctx, req) + Expect(err).NotTo(HaveOccurred()) + Expect(alerts).To(HaveLen(2)) + Expect(alerts[0].Labels["alertname"]).To(Equal("Alert1")) + Expect(alerts[1].Labels["alertname"]).To(Equal("Alert2")) + }) + }) - Expect(err).ToNot(HaveOccurred()) - Expect(result).To(HaveLen(1)) - Expect(result[0].Labels["alertname"]).To(Equal("KeepAlert")) - }) + Context("with relabel configs that keep all alerts", func() { + BeforeEach(func() { + mockK8s.PrometheusAlertsFunc = func() k8s.PrometheusAlertsInterface { + return &testutils.MockPrometheusAlertsInterface{ + GetAlertsFunc: func(ctx context.Context, req k8s.GetAlertsRequest) ([]k8s.PrometheusAlert, error) { + return []k8s.PrometheusAlert{alert1, alert2}, nil + }, + } + } + + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + ConfigFunc: func() []*relabel.Config { + // Return empty config list to avoid validation issues in tests + // Relabel functionality is tested elsewhere (in k8s package) + return []*relabel.Config{} + }, + } + } + }) + + It("returns all alerts without modification when no relabel configs", func() { + req := k8s.GetAlertsRequest{} + alerts, err := client.GetAlerts(ctx, req) + Expect(err).NotTo(HaveOccurred()) + Expect(alerts).To(HaveLen(2)) + Expect(alerts[0].Labels["severity"]).To(Equal("warning")) + Expect(alerts[1].Labels["severity"]).To(Equal("critical")) + }) + }) - It("should propagate errors and handle edge cases", func() { - By("propagating errors from PrometheusAlerts interface") - mockAlerts.GetAlertsFunc = func(context.Context, k8s.GetAlertsRequest) ([]k8s.PrometheusAlert, error) { - return nil, errors.New("prometheus error") - } - _, err := client.GetAlerts(ctx, k8s.GetAlertsRequest{}) - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("prometheus error")) - - By("handling nil labels with Replace action") - mockAlerts.GetAlertsFunc = nil - mockAlerts.SetActiveAlerts([]k8s.PrometheusAlert{ - {Labels: map[string]string{"alertname": "TestAlert", "severity": "warning"}, State: "firing", ActiveAt: testTime}, + Context("when no alerts are returned from Prometheus", func() { + BeforeEach(func() { + mockK8s.PrometheusAlertsFunc = func() k8s.PrometheusAlertsInterface { + return &testutils.MockPrometheusAlertsInterface{ + GetAlertsFunc: func(ctx context.Context, req k8s.GetAlertsRequest) ([]k8s.PrometheusAlert, error) { + return []k8s.PrometheusAlert{}, nil + }, + } + } + + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + ConfigFunc: func() []*relabel.Config { + return []*relabel.Config{} + }, + } + } + }) + + It("returns an empty list", func() { + req := k8s.GetAlertsRequest{} + alerts, err := client.GetAlerts(ctx, req) + Expect(err).NotTo(HaveOccurred()) + Expect(alerts).To(HaveLen(0)) + }) }) - mockMapper.GetAlertRelabelConfigSpecFunc = func(*monitoringv1.Rule) []osmv1.RelabelConfig { - return []osmv1.RelabelConfig{{TargetLabel: "team", Replacement: "infra", Action: "Replace"}} - } - result, err := client.GetAlerts(ctx, k8s.GetAlertsRequest{}) - Expect(err).ToNot(HaveOccurred()) - Expect(result[0].Labels).To(HaveKeyWithValue("team", "infra")) }) }) diff --git a/pkg/management/get_rule_by_id.go b/pkg/management/get_rule_by_id.go index c9af605c1..e786ee464 100644 --- a/pkg/management/get_rule_by_id.go +++ b/pkg/management/get_rule_by_id.go @@ -2,64 +2,15 @@ package management import ( "context" - "fmt" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - "k8s.io/apimachinery/pkg/types" - - "github.com/openshift/monitoring-plugin/pkg/management/mapper" ) func (c *client) GetRuleById(ctx context.Context, alertRuleId string) (monitoringv1.Rule, error) { - prId, err := c.mapper.FindAlertRuleById(mapper.PrometheusAlertRuleId(alertRuleId)) - if err != nil { - return monitoringv1.Rule{}, err - } - - pr, found, err := c.k8sClient.PrometheusRules().Get(ctx, prId.Namespace, prId.Name) - if err != nil { - return monitoringv1.Rule{}, err - } - + rule, found := c.k8sClient.RelabeledRules().Get(ctx, alertRuleId) if !found { - return monitoringv1.Rule{}, &NotFoundError{Resource: "PrometheusRule", Id: fmt.Sprintf("%s/%s", prId.Namespace, prId.Name)} - } - - var rule *monitoringv1.Rule - - for groupIdx := range pr.Spec.Groups { - for ruleIdx := range pr.Spec.Groups[groupIdx].Rules { - foundRule := &pr.Spec.Groups[groupIdx].Rules[ruleIdx] - if c.mapper.GetAlertingRuleId(foundRule) == mapper.PrometheusAlertRuleId(alertRuleId) { - rule = foundRule - break - } - } - } - - if rule != nil { - ruleWithRelabel, err := c.updateRuleBasedOnRelabelConfig(rule) - if err != nil { - return monitoringv1.Rule{}, err - } - - isPlatformRule := c.IsPlatformAlertRule(types.NamespacedName(*prId)) - c.addPlatformSourceLabel(&ruleWithRelabel, isPlatformRule) - - return ruleWithRelabel, nil - } - - return monitoringv1.Rule{}, fmt.Errorf("alert rule with id %s not found in PrometheusRule %s/%s", alertRuleId, prId.Namespace, prId.Name) -} - -func (c *client) updateRuleBasedOnRelabelConfig(rule *monitoringv1.Rule) (monitoringv1.Rule, error) { - configs := c.mapper.GetAlertRelabelConfigSpec(rule) - - updatedLabels, err := applyRelabelConfigs(string(rule.Alert), rule.Labels, configs) - if err != nil { - return monitoringv1.Rule{}, err + return monitoringv1.Rule{}, &NotFoundError{Resource: "AlertRule", Id: alertRuleId} } - rule.Labels = updatedLabels - return *rule, nil + return rule, nil } diff --git a/pkg/management/get_rule_by_id_test.go b/pkg/management/get_rule_by_id_test.go index f467632b5..1c4b7822b 100644 --- a/pkg/management/get_rule_by_id_test.go +++ b/pkg/management/get_rule_by_id_test.go @@ -7,189 +7,153 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" "github.com/openshift/monitoring-plugin/pkg/k8s" "github.com/openshift/monitoring-plugin/pkg/management" - "github.com/openshift/monitoring-plugin/pkg/management/mapper" "github.com/openshift/monitoring-plugin/pkg/management/testutils" ) -var ErrAlertRuleNotFound = errors.New("alert rule not found") - var _ = Describe("GetRuleById", func() { var ( - ctx context.Context - mockK8s *testutils.MockClient - mockPR *testutils.MockPrometheusRuleInterface - mockNS *testutils.MockNamespaceInformerInterface - mockMapper *testutils.MockMapperClient - client management.Client + ctx context.Context + mockK8s *testutils.MockClient + client management.Client ) - BeforeEach(func() { - ctx = context.Background() - - mockPR = &testutils.MockPrometheusRuleInterface{} - mockNS = &testutils.MockNamespaceInformerInterface{} - mockNS.SetMonitoringNamespaces(map[string]bool{ - "monitoring": true, - }) - mockK8s = &testutils.MockClient{ - PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { - return mockPR - }, - NamespaceInformerFunc: func() k8s.NamespaceInformerInterface { - return mockNS + var ( + testRule = monitoringv1.Rule{ + Alert: "TestAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", + k8s.PrometheusRuleLabelNamespace: "test-namespace", + k8s.PrometheusRuleLabelName: "test-rule", }, } - mockMapper = &testutils.MockMapperClient{} + testRuleId = alertrule.GetAlertingRuleId(&testRule) + ) - client = management.NewWithCustomMapper(ctx, mockK8s, mockMapper) + BeforeEach(func() { + ctx = context.Background() + mockK8s = &testutils.MockClient{} + client = management.New(ctx, mockK8s) }) - Context("when retrieving an alert rule by ID", func() { - It("should successfully return the rule when it exists", func() { - By("setting up a PrometheusRule with multiple rules") - rule1 := monitoringv1.Rule{ - Alert: "TestAlert1", - Expr: intstr.FromString("up == 0"), - Labels: map[string]string{ - "severity": "critical", - }, - } - rule2 := monitoringv1.Rule{ - Alert: "TestAlert2", - Expr: intstr.FromString("cpu > 80"), - Annotations: map[string]string{ - "summary": "High CPU usage", - }, - } - - prometheusRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-rules", - Namespace: "monitoring", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "group1", - Rules: []monitoringv1.Rule{rule1}, - }, - { - Name: "group2", - Rules: []monitoringv1.Rule{rule2}, - }, + Context("when rule is found", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == testRuleId { + return testRule, true + } + return monitoringv1.Rule{}, false }, - }, - } - - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "monitoring/test-rules": prometheusRule, - }) - - alertRuleId := "test-rule-id-2" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "monitoring", - Name: "test-rules", - }, nil - } - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - if alertRule.Alert == "TestAlert2" { - return mapper.PrometheusAlertRuleId(alertRuleId) } - return mapper.PrometheusAlertRuleId("other-id") } - - By("retrieving the rule by ID") - rule, err := client.GetRuleById(ctx, alertRuleId) - Expect(err).ToNot(HaveOccurred()) - Expect(rule).ToNot(BeNil()) - - By("verifying the returned rule is correct") - Expect(rule.Alert).To(Equal("TestAlert2")) - Expect(rule.Expr.String()).To(Equal("cpu > 80")) - Expect(rule.Labels).To(HaveKeyWithValue("source", "platform")) - Expect(rule.Annotations).To(HaveKeyWithValue("summary", "High CPU usage")) }) - It("should return an error when the mapper cannot find the rule", func() { - alertRuleId := "nonexistent-rule-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return nil, ErrAlertRuleNotFound - } - - By("attempting to retrieve a nonexistent rule") - _, err := client.GetRuleById(ctx, alertRuleId) - - By("verifying an error is returned") - Expect(err).To(HaveOccurred()) - Expect(err).To(Equal(ErrAlertRuleNotFound)) + It("returns the rule", func() { + rule, err := client.GetRuleById(ctx, testRuleId) + Expect(err).NotTo(HaveOccurred()) + Expect(rule.Alert).To(Equal("TestAlert")) + Expect(rule.Labels["severity"]).To(Equal("warning")) }) + }) - It("should return an error when the PrometheusRule does not exist", func() { - alertRuleId := "test-rule-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "monitoring", - Name: "nonexistent-rule", - }, nil + Context("when rule is not found", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false + }, + } } + }) - By("attempting to retrieve a rule from a nonexistent PrometheusRule") - _, err := client.GetRuleById(ctx, alertRuleId) - - By("verifying an error is returned") + It("returns NotFoundError", func() { + _, err := client.GetRuleById(ctx, "nonexistent-id") Expect(err).To(HaveOccurred()) + + var notFoundErr *management.NotFoundError + Expect(errors.As(err, ¬FoundErr)).To(BeTrue()) + Expect(notFoundErr.Resource).To(Equal("AlertRule")) + Expect(notFoundErr.Id).To(Equal("nonexistent-id")) }) + }) - It("should return an error when the rule ID is not found in the PrometheusRule", func() { - By("setting up a PrometheusRule without the target rule") - rule1 := monitoringv1.Rule{ - Alert: "DifferentAlert", + Context("when multiple rules exist", func() { + var ( + rule1 = monitoringv1.Rule{ + Alert: "Alert1", Expr: intstr.FromString("up == 0"), } + rule1Id = alertrule.GetAlertingRuleId(&rule1) - prometheusRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-rules", - Namespace: "monitoring", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "group1", - Rules: []monitoringv1.Rule{rule1}, - }, + rule2 = monitoringv1.Rule{ + Alert: "Alert2", + Expr: intstr.FromString("down == 1"), + } + rule2Id = alertrule.GetAlertingRuleId(&rule2) + ) + + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + switch id { + case rule1Id: + return rule1, true + case rule2Id: + return rule2, true + default: + return monitoringv1.Rule{}, false + } }, - }, + } } + }) + + It("returns the correct rule based on ID", func() { + rule, err := client.GetRuleById(ctx, rule1Id) + Expect(err).NotTo(HaveOccurred()) + Expect(rule.Alert).To(Equal("Alert1")) - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "monitoring/test-rules": prometheusRule, - }) + rule, err = client.GetRuleById(ctx, rule2Id) + Expect(err).NotTo(HaveOccurred()) + Expect(rule.Alert).To(Equal("Alert2")) + }) + }) - alertRuleId := "nonexistent-rule-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "monitoring", - Name: "test-rules", - }, nil + Context("with recording rules", func() { + var ( + recordingRule = monitoringv1.Rule{ + Record: "job:request_latency_seconds:mean5m", + Expr: intstr.FromString("avg by (job) (request_latency_seconds)"), } - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - return mapper.PrometheusAlertRuleId("different-id") + recordingRuleId = alertrule.GetAlertingRuleId(&recordingRule) + ) + + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == recordingRuleId { + return recordingRule, true + } + return monitoringv1.Rule{}, false + }, + } } + }) - By("attempting to retrieve the rule") - _, err := client.GetRuleById(ctx, alertRuleId) - - By("verifying an error is returned") - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("alert rule with id")) - Expect(err.Error()).To(ContainSubstring("not found")) + It("returns the recording rule", func() { + rule, err := client.GetRuleById(ctx, recordingRuleId) + Expect(err).NotTo(HaveOccurred()) + Expect(rule.Record).To(Equal("job:request_latency_seconds:mean5m")) }) }) }) diff --git a/pkg/management/list_rules.go b/pkg/management/list_rules.go index 2d5307dba..b78e70ad0 100644 --- a/pkg/management/list_rules.go +++ b/pkg/management/list_rules.go @@ -3,18 +3,9 @@ package management import ( "context" "errors" - "fmt" + "github.com/openshift/monitoring-plugin/pkg/k8s" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - "k8s.io/apimachinery/pkg/types" - - "github.com/openshift/monitoring-plugin/pkg/management/mapper" -) - -const ( - alertRuleIdLabel = "alert_rule_id" - sourceLabel = "source" - platformSourceValue = "platform" ) func (c *client) ListRules(ctx context.Context, prOptions PrometheusRuleOptions, arOptions AlertRuleOptions) ([]monitoringv1.Rule, error) { @@ -22,95 +13,44 @@ func (c *client) ListRules(ctx context.Context, prOptions PrometheusRuleOptions, return nil, errors.New("PrometheusRule Namespace must be specified when Name is provided") } - // Name and Namespace specified - if prOptions.Name != "" && prOptions.Namespace != "" { - pr, found, err := c.k8sClient.PrometheusRules().Get(ctx, prOptions.Namespace, prOptions.Name) - if err != nil { - return nil, fmt.Errorf("failed to get PrometheusRule %s/%s: %w", prOptions.Namespace, prOptions.Name, err) - } - if !found { - return nil, &NotFoundError{Resource: "PrometheusRule", Id: fmt.Sprintf("%s/%s", prOptions.Namespace, prOptions.Name)} - } - return c.extractAndFilterRules(*pr, &prOptions, &arOptions), nil - } - - // Name not specified - allPrometheusRules, err := c.k8sClient.PrometheusRules().List(ctx, prOptions.Namespace) - if err != nil { - return nil, fmt.Errorf("failed to list PrometheusRules: %w", err) - } - - var allRules []monitoringv1.Rule - for _, pr := range allPrometheusRules { - rules := c.extractAndFilterRules(pr, &prOptions, &arOptions) - allRules = append(allRules, rules...) - } - - return allRules, nil -} - -func (c *client) extractAndFilterRules(pr monitoringv1.PrometheusRule, prOptions *PrometheusRuleOptions, arOptions *AlertRuleOptions) []monitoringv1.Rule { - var rules []monitoringv1.Rule - prId := types.NamespacedName{Name: pr.Name, Namespace: pr.Namespace} - isPlatformRule := c.IsPlatformAlertRule(prId) - - for _, group := range pr.Spec.Groups { - // Filter by group name if specified - if prOptions.GroupName != "" && group.Name != prOptions.GroupName { - continue - } - - for _, rule := range group.Rules { - // Skip recording rules (only process alert rules) - if rule.Alert == "" { - continue - } + allRules := c.k8sClient.RelabeledRules().List(ctx) + var filteredRules []monitoringv1.Rule - // Apply alert rule filters - if !c.matchesAlertRuleFilters(rule, pr, arOptions) { + for _, rule := range allRules { + // Filter by PrometheusRule name and namespace if specified + if prOptions.Name != "" && prOptions.Namespace != "" { + namespace := rule.Labels[k8s.PrometheusRuleLabelNamespace] + name := rule.Labels[k8s.PrometheusRuleLabelName] + if namespace != prOptions.Namespace || name != prOptions.Name { continue } - - // Parse and update the rule based on relabeling configurations - r := c.parseRule(rule) - if r != nil { - c.addPlatformSourceLabel(r, isPlatformRule) - rules = append(rules, *r) - } } - } - return rules -} + // Apply alert rule filters + if !c.matchesAlertRuleFilters(rule, arOptions) { + continue + } -func (c *client) addPlatformSourceLabel(rule *monitoringv1.Rule, isPlatformRule bool) { - if rule == nil || !isPlatformRule { - return + filteredRules = append(filteredRules, rule) } - if rule.Labels == nil { - rule.Labels = make(map[string]string) - } - rule.Labels[sourceLabel] = platformSourceValue + return filteredRules, nil } -func (c *client) matchesAlertRuleFilters(rule monitoringv1.Rule, pr monitoringv1.PrometheusRule, arOptions *AlertRuleOptions) bool { +func (c *client) matchesAlertRuleFilters(rule monitoringv1.Rule, arOptions AlertRuleOptions) bool { // Filter by alert name if arOptions.Name != "" && string(rule.Alert) != arOptions.Name { return false } - // Filter by source (platform or user-defined) - if arOptions.Source != "" { - prId := types.NamespacedName{Name: pr.Name, Namespace: pr.Namespace} - isPlatform := c.IsPlatformAlertRule(prId) - - if arOptions.Source == "platform" && !isPlatform { - return false - } - if arOptions.Source == "user-defined" && isPlatform { + // Filter by source (platform) + if arOptions.Source == "platform" { + source, exists := rule.Labels["openshift_io_alert_source"] + if !exists { return false } + + return source == "platform" } // Filter by labels @@ -125,27 +65,3 @@ func (c *client) matchesAlertRuleFilters(rule monitoringv1.Rule, pr monitoringv1 return true } - -func (c *client) parseRule(rule monitoringv1.Rule) *monitoringv1.Rule { - alertRuleId := c.mapper.GetAlertingRuleId(&rule) - if alertRuleId == "" { - return nil - } - - _, err := c.mapper.FindAlertRuleById(mapper.PrometheusAlertRuleId(alertRuleId)) - if err != nil { - return nil - } - - rule, err = c.updateRuleBasedOnRelabelConfig(&rule) - if err != nil { - return nil - } - - if rule.Labels == nil { - rule.Labels = make(map[string]string) - } - rule.Labels[alertRuleIdLabel] = string(alertRuleId) - - return &rule -} diff --git a/pkg/management/list_rules_test.go b/pkg/management/list_rules_test.go index 61bb1162b..675c540f1 100644 --- a/pkg/management/list_rules_test.go +++ b/pkg/management/list_rules_test.go @@ -5,465 +5,278 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" "github.com/openshift/monitoring-plugin/pkg/k8s" "github.com/openshift/monitoring-plugin/pkg/management" - "github.com/openshift/monitoring-plugin/pkg/management/mapper" "github.com/openshift/monitoring-plugin/pkg/management/testutils" ) var _ = Describe("ListRules", func() { var ( - ctx context.Context - mockK8s *testutils.MockClient - mockPR *testutils.MockPrometheusRuleInterface - mockMapper *testutils.MockMapperClient - client management.Client + ctx context.Context + mockK8s *testutils.MockClient + client management.Client ) - BeforeEach(func() { - ctx = context.Background() - - mockPR = &testutils.MockPrometheusRuleInterface{} - mockNSInformer := &testutils.MockNamespaceInformerInterface{} - mockNSInformer.SetMonitoringNamespaces(map[string]bool{ - "platform-namespace-1": true, - "platform-namespace-2": true, - }) - mockK8s = &testutils.MockClient{ - PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { - return mockPR - }, - NamespaceInformerFunc: func() k8s.NamespaceInformerInterface { - return mockNSInformer - }, - } - mockMapper = &testutils.MockMapperClient{ - GetAlertingRuleIdFunc: func(rule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - return mapper.PrometheusAlertRuleId(rule.Alert) - }, - FindAlertRuleByIdFunc: func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - // Mock successful lookup for all alert rules - return &mapper.PrometheusRuleId{}, nil - }, - } - - client = management.NewWithCustomMapper(ctx, mockK8s, mockMapper) - }) - - It("should list rules from a specific PrometheusRule", func() { - testRule := monitoringv1.Rule{ - Alert: "TestAlert", + var ( + rule1 = monitoringv1.Rule{ + Alert: "Alert1", Expr: intstr.FromString("up == 0"), - } - - prometheusRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-rule", - Namespace: "test-namespace", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "test-group", - Rules: []monitoringv1.Rule{testRule}, - }, - }, + Labels: map[string]string{ + "severity": "warning", + k8s.PrometheusRuleLabelNamespace: "namespace1", + k8s.PrometheusRuleLabelName: "rule1", }, } - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "test-namespace/test-rule": prometheusRule, - }) - - options := management.PrometheusRuleOptions{ - Name: "test-rule", - Namespace: "test-namespace", - GroupName: "test-group", - } - - rules, err := client.ListRules(ctx, options, management.AlertRuleOptions{}) - - Expect(err).ToNot(HaveOccurred()) - Expect(rules).To(HaveLen(1)) - Expect(rules[0].Alert).To(Equal("TestAlert")) - Expect(rules[0].Expr.String()).To(Equal("up == 0")) - }) - - It("should list rules from all namespaces", func() { - testRule1 := monitoringv1.Rule{ - Alert: "TestAlert1", + rule2 = monitoringv1.Rule{ + Alert: "Alert2", Expr: intstr.FromString("up == 0"), - } - - testRule2 := monitoringv1.Rule{ - Alert: "TestAlert2", - Expr: intstr.FromString("cpu_usage > 80"), - } - - prometheusRule1 := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "rule1", - Namespace: "namespace1", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "group1", - Rules: []monitoringv1.Rule{testRule1}, - }, - }, - }, - } - - prometheusRule2 := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "rule2", - Namespace: "namespace2", + Labels: map[string]string{ + "severity": "critical", + k8s.PrometheusRuleLabelNamespace: "namespace1", + k8s.PrometheusRuleLabelName: "rule2", }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "group2", - Rules: []monitoringv1.Rule{testRule2}, - }, - }, - }, - } - - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "namespace1/rule1": prometheusRule1, - "namespace2/rule2": prometheusRule2, - }) - - options := management.PrometheusRuleOptions{} - - rules, err := client.ListRules(ctx, options, management.AlertRuleOptions{}) - - Expect(err).ToNot(HaveOccurred()) - Expect(rules).To(HaveLen(2)) - - alertNames := []string{rules[0].Alert, rules[1].Alert} - Expect(alertNames).To(ContainElement("TestAlert1")) - Expect(alertNames).To(ContainElement("TestAlert2")) - }) - - It("should list all rules from a specific namespace", func() { - // Setup test data in the same namespace but different PrometheusRules - testRule1 := monitoringv1.Rule{ - Alert: "NamespaceAlert1", - Expr: intstr.FromString("memory_usage > 90"), - } - - testRule2 := monitoringv1.Rule{ - Alert: "NamespaceAlert2", - Expr: intstr.FromString("disk_usage > 85"), - } - - testRule3 := monitoringv1.Rule{ - Alert: "OtherNamespaceAlert", - Expr: intstr.FromString("network_error_rate > 0.1"), } - // PrometheusRule in target namespace - prometheusRule1 := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "rule1", - Namespace: "target-namespace", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "group1", - Rules: []monitoringv1.Rule{testRule1}, - }, - }, + rule3 = monitoringv1.Rule{ + Alert: "Alert3", + Expr: intstr.FromString("down == 1"), + Labels: map[string]string{ + "severity": "warning", + k8s.PrometheusRuleLabelNamespace: "namespace2", + k8s.PrometheusRuleLabelName: "rule3", }, } - // Another PrometheusRule in the same target namespace - prometheusRule2 := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "rule2", - Namespace: "target-namespace", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "group2", - Rules: []monitoringv1.Rule{testRule2}, - }, - }, + platformRule = monitoringv1.Rule{ + Alert: "PlatformAlert", + Expr: intstr.FromString("node_down == 1"), + Labels: map[string]string{ + "severity": "critical", + "openshift_io_alert_source": "platform", + k8s.PrometheusRuleLabelNamespace: "openshift-monitoring", + k8s.PrometheusRuleLabelName: "platform-rule", }, } - // PrometheusRule in a different namespace (should not be included) - prometheusRule3 := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "rule3", - Namespace: "other-namespace", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "group3", - Rules: []monitoringv1.Rule{testRule3}, - }, - }, + customLabelRule = monitoringv1.Rule{ + Alert: "CustomLabelAlert", + Expr: intstr.FromString("custom == 1"), + Labels: map[string]string{ + "severity": "info", + "team": "backend", + "env": "production", + k8s.PrometheusRuleLabelNamespace: "namespace1", + k8s.PrometheusRuleLabelName: "rule1", }, } + ) - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "target-namespace/rule1": prometheusRule1, - "target-namespace/rule2": prometheusRule2, - "other-namespace/rule3": prometheusRule3, - }) + BeforeEach(func() { + ctx = context.Background() + mockK8s = &testutils.MockClient{} + client = management.New(ctx, mockK8s) - options := management.PrometheusRuleOptions{ - Namespace: "target-namespace", + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + ListFunc: func(ctx context.Context) []monitoringv1.Rule { + return []monitoringv1.Rule{rule1, rule2, rule3, platformRule, customLabelRule} + }, + } } + }) - rules, err := client.ListRules(ctx, options, management.AlertRuleOptions{}) - - Expect(err).ToNot(HaveOccurred()) - Expect(rules).To(HaveLen(2)) + Context("when PrometheusRule Name is provided without Namespace", func() { + It("returns an error", func() { + prOptions := management.PrometheusRuleOptions{ + Name: "rule1", + } + arOptions := management.AlertRuleOptions{} - alertNames := []string{rules[0].Alert, rules[1].Alert} - Expect(alertNames).To(ContainElement("NamespaceAlert1")) - Expect(alertNames).To(ContainElement("NamespaceAlert2")) - Expect(alertNames).ToNot(ContainElement("OtherNamespaceAlert")) + _, err := client.ListRules(ctx, prOptions, arOptions) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("PrometheusRule Namespace must be specified when Name is provided")) + }) }) - Context("AlertRuleOptions filtering", func() { - var prometheusRule *monitoringv1.PrometheusRule - - BeforeEach(func() { - prometheusRule = &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-alerts", - Namespace: "monitoring", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "critical-alerts", - Rules: []monitoringv1.Rule{ - { - Alert: "HighCPUUsage", - Expr: intstr.FromString("cpu_usage > 90"), - Labels: map[string]string{ - "severity": "critical", - "component": "node", - }, - }, - { - Alert: "HighCPUUsage", - Expr: intstr.FromString("cpu_usage > 80"), - Labels: map[string]string{ - "severity": "warning", - "component": "node", - }, - }, - { - Alert: "DiskSpaceLow", - Expr: intstr.FromString("disk_usage > 95"), - Labels: map[string]string{ - "severity": "critical", - "component": "storage", - }, - }, - }, - }, - }, - }, - } + Context("when no filters are provided", func() { + It("returns all rules", func() { + prOptions := management.PrometheusRuleOptions{} + arOptions := management.AlertRuleOptions{} - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "monitoring/test-alerts": prometheusRule, - }) + rules, err := client.ListRules(ctx, prOptions, arOptions) + Expect(err).NotTo(HaveOccurred()) + Expect(rules).To(HaveLen(5)) }) + }) - It("should filter by alert name", func() { + Context("when filtering by PrometheusRule Name and Namespace", func() { + It("returns only rules from the specified PrometheusRule", func() { prOptions := management.PrometheusRuleOptions{ - Name: "test-alerts", - Namespace: "monitoring", - } - arOptions := management.AlertRuleOptions{ - Name: "HighCPUUsage", + Name: "rule1", + Namespace: "namespace1", } + arOptions := management.AlertRuleOptions{} rules, err := client.ListRules(ctx, prOptions, arOptions) - - Expect(err).ToNot(HaveOccurred()) + Expect(err).NotTo(HaveOccurred()) Expect(rules).To(HaveLen(2)) - Expect(rules[0].Alert).To(Equal("HighCPUUsage")) - Expect(rules[1].Alert).To(Equal("HighCPUUsage")) + Expect(rules[0].Alert).To(BeElementOf("Alert1", "CustomLabelAlert")) + Expect(rules[1].Alert).To(BeElementOf("Alert1", "CustomLabelAlert")) }) - It("should filter by label severity", func() { + It("returns empty list when no rules match", func() { prOptions := management.PrometheusRuleOptions{ - Name: "test-alerts", - Namespace: "monitoring", - } - arOptions := management.AlertRuleOptions{ - Labels: map[string]string{ - "severity": "critical", - }, + Name: "nonexistent", + Namespace: "namespace1", } + arOptions := management.AlertRuleOptions{} rules, err := client.ListRules(ctx, prOptions, arOptions) - - Expect(err).ToNot(HaveOccurred()) - Expect(rules).To(HaveLen(2)) - - alertNames := []string{rules[0].Alert, rules[1].Alert} - Expect(alertNames).To(ContainElement("HighCPUUsage")) - Expect(alertNames).To(ContainElement("DiskSpaceLow")) - - for _, rule := range rules { - Expect(rule.Labels["severity"]).To(Equal("critical")) - } + Expect(err).NotTo(HaveOccurred()) + Expect(rules).To(HaveLen(0)) }) + }) - It("should filter by multiple labels", func() { - prOptions := management.PrometheusRuleOptions{ - Name: "test-alerts", - Namespace: "monitoring", - } + Context("when filtering by alert name", func() { + It("returns only rules with matching alert name", func() { + prOptions := management.PrometheusRuleOptions{} arOptions := management.AlertRuleOptions{ - Labels: map[string]string{ - "severity": "critical", - "component": "storage", - }, + Name: "Alert1", } rules, err := client.ListRules(ctx, prOptions, arOptions) - - Expect(err).ToNot(HaveOccurred()) + Expect(err).NotTo(HaveOccurred()) Expect(rules).To(HaveLen(1)) - Expect(rules[0].Alert).To(Equal("DiskSpaceLow")) - Expect(rules[0].Labels["severity"]).To(Equal("critical")) - Expect(rules[0].Labels["component"]).To(Equal("storage")) + Expect(rules[0].Alert).To(Equal("Alert1")) }) - It("should filter by source platform", func() { - platformRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "openshift-platform-alerts", - Namespace: "platform-namespace-1", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "platform-group", - Rules: []monitoringv1.Rule{ - { - Alert: "PlatformAlert", - Expr: intstr.FromString("platform_metric > 0"), - }, - }, - }, - }, - }, + It("returns empty list when alert name doesn't match", func() { + prOptions := management.PrometheusRuleOptions{} + arOptions := management.AlertRuleOptions{ + Name: "NonexistentAlert", } - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "monitoring/test-alerts": prometheusRule, - "platform-namespace-1/openshift-platform-alerts": platformRule, - }) + rules, err := client.ListRules(ctx, prOptions, arOptions) + Expect(err).NotTo(HaveOccurred()) + Expect(rules).To(HaveLen(0)) + }) + }) + Context("when filtering by source=platform", func() { + It("returns only platform rules", func() { prOptions := management.PrometheusRuleOptions{} arOptions := management.AlertRuleOptions{ Source: "platform", } rules, err := client.ListRules(ctx, prOptions, arOptions) - - Expect(err).ToNot(HaveOccurred()) + Expect(err).NotTo(HaveOccurred()) Expect(rules).To(HaveLen(1)) Expect(rules[0].Alert).To(Equal("PlatformAlert")) - Expect(rules[0].Labels).To(HaveKeyWithValue("source", "platform")) + Expect(rules[0].Labels["openshift_io_alert_source"]).To(Equal("platform")) }) + }) - It("should filter by source user-defined", func() { - platformRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "openshift-platform-alerts", - Namespace: "platform-namespace-1", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "platform-group", - Rules: []monitoringv1.Rule{ - { - Alert: "PlatformAlert", - Expr: intstr.FromString("platform_metric > 0"), - }, - }, - }, - }, + Context("when filtering by labels", func() { + It("returns rules matching a single label", func() { + prOptions := management.PrometheusRuleOptions{} + arOptions := management.AlertRuleOptions{ + Labels: map[string]string{ + "severity": "warning", }, } - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "monitoring/test-alerts": prometheusRule, - "platform-namespace-1/openshift-platform-alerts": platformRule, - }) + rules, err := client.ListRules(ctx, prOptions, arOptions) + Expect(err).NotTo(HaveOccurred()) + Expect(rules).To(HaveLen(2)) + }) + It("returns rules matching multiple labels", func() { prOptions := management.PrometheusRuleOptions{} arOptions := management.AlertRuleOptions{ - Source: "user-defined", + Labels: map[string]string{ + "team": "backend", + "env": "production", + }, } rules, err := client.ListRules(ctx, prOptions, arOptions) + Expect(err).NotTo(HaveOccurred()) + Expect(rules).To(HaveLen(1)) + Expect(rules[0].Alert).To(Equal("CustomLabelAlert")) + }) - Expect(err).ToNot(HaveOccurred()) - Expect(rules).To(HaveLen(3)) + It("returns empty list when labels don't match", func() { + prOptions := management.PrometheusRuleOptions{} + arOptions := management.AlertRuleOptions{ + Labels: map[string]string{ + "nonexistent": "value", + }, + } - alertNames := []string{rules[0].Alert, rules[1].Alert, rules[2].Alert} - Expect(alertNames).To(ContainElement("HighCPUUsage")) - Expect(alertNames).To(ContainElement("DiskSpaceLow")) - Expect(alertNames).ToNot(ContainElement("PlatformAlert")) + rules, err := client.ListRules(ctx, prOptions, arOptions) + Expect(err).NotTo(HaveOccurred()) + Expect(rules).To(HaveLen(0)) }) + }) - It("should combine multiple filters", func() { + Context("when combining multiple filters", func() { + It("returns rules matching all filters", func() { prOptions := management.PrometheusRuleOptions{ - Name: "test-alerts", - Namespace: "monitoring", + Name: "rule1", + Namespace: "namespace1", } arOptions := management.AlertRuleOptions{ - Name: "HighCPUUsage", Labels: map[string]string{ - "severity": "critical", + "severity": "warning", }, } rules, err := client.ListRules(ctx, prOptions, arOptions) - - Expect(err).ToNot(HaveOccurred()) + Expect(err).NotTo(HaveOccurred()) Expect(rules).To(HaveLen(1)) - Expect(rules[0].Alert).To(Equal("HighCPUUsage")) - Expect(rules[0].Labels["severity"]).To(Equal("critical")) + Expect(rules[0].Alert).To(Equal("Alert1")) }) - It("should return empty list when no rules match filters", func() { + It("returns empty list when some filters don't match", func() { prOptions := management.PrometheusRuleOptions{ - Name: "test-alerts", - Namespace: "monitoring", + Name: "rule1", + Namespace: "namespace1", } arOptions := management.AlertRuleOptions{ - Name: "NonExistentAlert", + Labels: map[string]string{ + "severity": "critical", + }, } rules, err := client.ListRules(ctx, prOptions, arOptions) + Expect(err).NotTo(HaveOccurred()) + Expect(rules).To(HaveLen(0)) + }) + }) - Expect(err).ToNot(HaveOccurred()) - Expect(rules).To(BeEmpty()) + Context("when RelabeledRules returns empty list", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + ListFunc: func(ctx context.Context) []monitoringv1.Rule { + return []monitoringv1.Rule{} + }, + } + } + }) + + It("returns empty list", func() { + prOptions := management.PrometheusRuleOptions{} + arOptions := management.AlertRuleOptions{} + + rules, err := client.ListRules(ctx, prOptions, arOptions) + Expect(err).NotTo(HaveOccurred()) + Expect(rules).To(HaveLen(0)) }) }) }) diff --git a/pkg/management/management.go b/pkg/management/management.go index a42f2dcbe..e310f4055 100644 --- a/pkg/management/management.go +++ b/pkg/management/management.go @@ -4,14 +4,12 @@ import ( "k8s.io/apimachinery/pkg/types" "github.com/openshift/monitoring-plugin/pkg/k8s" - "github.com/openshift/monitoring-plugin/pkg/management/mapper" ) type client struct { k8sClient k8s.Client - mapper mapper.Client } func (c *client) IsPlatformAlertRule(prId types.NamespacedName) bool { - return c.k8sClient.NamespaceInformer().IsClusterMonitoringNamespace(prId.Namespace) + return c.k8sClient.Namespace().IsClusterMonitoringNamespace(prId.Namespace) } diff --git a/pkg/management/management_suite_test.go b/pkg/management/management_suite_test.go index 6cf1a3084..b2dd05b63 100644 --- a/pkg/management/management_suite_test.go +++ b/pkg/management/management_suite_test.go @@ -5,8 +5,14 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/prometheus/common/model" ) +var _ = BeforeSuite(func() { + // Set validation scheme globally for all tests that use relabel configs + model.NameValidationScheme = model.LegacyValidation +}) + func TestManagement(t *testing.T) { RegisterFailHandler(Fail) RunSpecs(t, "Management Suite") diff --git a/pkg/management/mapper/mapper.go b/pkg/management/mapper/mapper.go deleted file mode 100644 index f2f9a325f..000000000 --- a/pkg/management/mapper/mapper.go +++ /dev/null @@ -1,287 +0,0 @@ -package mapper - -import ( - "context" - "crypto/sha256" - "fmt" - "log" - "regexp" - "slices" - "sort" - "strings" - "sync" - - osmv1 "github.com/openshift/api/monitoring/v1" - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/tools/cache" - - "github.com/openshift/monitoring-plugin/pkg/k8s" -) - -type mapper struct { - k8sClient k8s.Client - mu sync.RWMutex - - prometheusRules map[PrometheusRuleId][]PrometheusAlertRuleId - alertRelabelConfigs map[AlertRelabelConfigId][]osmv1.RelabelConfig -} - -var _ Client = (*mapper)(nil) - -func (m *mapper) GetAlertingRuleId(alertRule *monitoringv1.Rule) PrometheusAlertRuleId { - var kind, name string - if alertRule.Alert != "" { - kind = "alert" - name = alertRule.Alert - } else if alertRule.Record != "" { - kind = "record" - name = alertRule.Record - } else { - return "" - } - - expr := alertRule.Expr.String() - forDuration := "" - if alertRule.For != nil { - forDuration = string(*alertRule.For) - } - - var sortedLabels []string - if alertRule.Labels != nil { - for key, value := range alertRule.Labels { - sortedLabels = append(sortedLabels, fmt.Sprintf("%s=%s", key, value)) - } - sort.Strings(sortedLabels) - } - - var sortedAnnotations []string - if alertRule.Annotations != nil { - for key, value := range alertRule.Annotations { - sortedAnnotations = append(sortedAnnotations, fmt.Sprintf("%s=%s", key, value)) - } - sort.Strings(sortedAnnotations) - } - - // Build the hash input string - hashInput := strings.Join([]string{ - kind, - name, - expr, - forDuration, - strings.Join(sortedLabels, ","), - strings.Join(sortedAnnotations, ","), - }, "\n") - - // Generate SHA256 hash - hash := sha256.Sum256([]byte(hashInput)) - - return PrometheusAlertRuleId(fmt.Sprintf("%s/%x", name, hash)) -} - -func (m *mapper) FindAlertRuleById(alertRuleId PrometheusAlertRuleId) (*PrometheusRuleId, error) { - m.mu.RLock() - defer m.mu.RUnlock() - - for id, rules := range m.prometheusRules { - if slices.Contains(rules, alertRuleId) { - return &id, nil - } - } - - // If the PrometheusRuleId is not found, return an error - return nil, fmt.Errorf("alert rule with id %s not found", alertRuleId) -} - -func (m *mapper) WatchPrometheusRules(ctx context.Context) { - go func() { - callbacks := k8s.PrometheusRuleInformerCallback{ - OnAdd: func(pr *monitoringv1.PrometheusRule) { - m.AddPrometheusRule(pr) - }, - OnUpdate: func(pr *monitoringv1.PrometheusRule) { - m.AddPrometheusRule(pr) - }, - OnDelete: func(key cache.ObjectName) { - m.DeletePrometheusRule(key) - }, - } - - err := m.k8sClient.PrometheusRuleInformer().Run(ctx, callbacks) - if err != nil { - log.Fatalf("Failed to run PrometheusRule informer: %v", err) - } - }() -} - -func (m *mapper) AddPrometheusRule(pr *monitoringv1.PrometheusRule) { - m.mu.Lock() - defer m.mu.Unlock() - - promRuleId := PrometheusRuleId(types.NamespacedName{Namespace: pr.Namespace, Name: pr.Name}) - delete(m.prometheusRules, promRuleId) - - rules := make([]PrometheusAlertRuleId, 0) - for _, group := range pr.Spec.Groups { - for _, rule := range group.Rules { - if rule.Alert != "" { - ruleId := m.GetAlertingRuleId(&rule) - if ruleId != "" { - rules = append(rules, ruleId) - } - } - } - } - - m.prometheusRules[promRuleId] = rules -} - -func (m *mapper) DeletePrometheusRule(key cache.ObjectName) { - m.mu.Lock() - defer m.mu.Unlock() - - delete(m.prometheusRules, PrometheusRuleId(key)) -} - -func (m *mapper) WatchAlertRelabelConfigs(ctx context.Context) { - go func() { - callbacks := k8s.AlertRelabelConfigInformerCallback{ - OnAdd: func(arc *osmv1.AlertRelabelConfig) { - m.AddAlertRelabelConfig(arc) - }, - OnUpdate: func(arc *osmv1.AlertRelabelConfig) { - m.AddAlertRelabelConfig(arc) - }, - OnDelete: func(key cache.ObjectName) { - m.DeleteAlertRelabelConfig(key) - }, - } - - err := m.k8sClient.AlertRelabelConfigInformer().Run(ctx, callbacks) - if err != nil { - log.Fatalf("Failed to run AlertRelabelConfig informer: %v", err) - } - }() -} - -func (m *mapper) AddAlertRelabelConfig(arc *osmv1.AlertRelabelConfig) { - m.mu.Lock() - defer m.mu.Unlock() - - arcId := AlertRelabelConfigId(types.NamespacedName{Namespace: arc.Namespace, Name: arc.Name}) - - // Clean up old entries - delete(m.alertRelabelConfigs, arcId) - - configs := make([]osmv1.RelabelConfig, 0) - - for _, config := range arc.Spec.Configs { - if slices.Contains(config.SourceLabels, "alertname") { - alertname := parseAlertnameFromRelabelConfig(config) - if alertname != "" { - configs = append(configs, config) - } - } - } - - if len(configs) > 0 { - m.alertRelabelConfigs[arcId] = configs - } -} - -func parseAlertnameFromRelabelConfig(config osmv1.RelabelConfig) string { - separator := config.Separator - if separator == "" { - separator = ";" - } - - regex := config.Regex - if regex == "" { - return "" - } - - values := strings.Split(regex, separator) - if len(values) != len(config.SourceLabels) { - return "" - } - - // Find the alertname value from source labels - for i, labelName := range config.SourceLabels { - if string(labelName) == "alertname" { - return values[i] - } - } - - return "" -} - -func (m *mapper) DeleteAlertRelabelConfig(key cache.ObjectName) { - m.mu.Lock() - defer m.mu.Unlock() - - arcId := AlertRelabelConfigId(key) - delete(m.alertRelabelConfigs, arcId) -} - -func (m *mapper) GetAlertRelabelConfigSpec(alertRule *monitoringv1.Rule) []osmv1.RelabelConfig { - m.mu.RLock() - defer m.mu.RUnlock() - - if alertRule == nil { - return nil - } - - var matchingConfigs []osmv1.RelabelConfig - - // Iterate through all AlertRelabelConfigs - for _, configs := range m.alertRelabelConfigs { - for _, config := range configs { - if m.configMatchesAlert(config, alertRule) { - matchingConfigs = append(matchingConfigs, config) - } - } - } - - return matchingConfigs -} - -// configMatchesAlert checks if a RelabelConfig matches the given alert rule's labels -func (m *mapper) configMatchesAlert(config osmv1.RelabelConfig, alertRule *monitoringv1.Rule) bool { - separator := config.Separator - if separator == "" { - separator = ";" - } - - var labelValues []string - for _, labelName := range config.SourceLabels { - labelValue := "" - - if string(labelName) == "alertname" { - if alertRule.Alert != "" { - labelValue = alertRule.Alert - } - } else { - if alertRule.Labels != nil { - if val, exists := alertRule.Labels[string(labelName)]; exists { - labelValue = val - } - } - } - - labelValues = append(labelValues, labelValue) - } - - ruleLabels := strings.Join(labelValues, separator) - - regex := config.Regex - if regex == "" { - regex = "(.*)" - } - - matched, err := regexp.MatchString(regex, ruleLabels) - if err != nil { - return false - } - - return matched -} diff --git a/pkg/management/mapper/mapper_suite_test.go b/pkg/management/mapper/mapper_suite_test.go deleted file mode 100644 index ad8ae2bb4..000000000 --- a/pkg/management/mapper/mapper_suite_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package mapper_test - -import ( - "testing" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" -) - -func TestMapper(t *testing.T) { - RegisterFailHandler(Fail) - RunSpecs(t, "Mapper Suite") -} diff --git a/pkg/management/mapper/mapper_test.go b/pkg/management/mapper/mapper_test.go deleted file mode 100644 index ceae3c594..000000000 --- a/pkg/management/mapper/mapper_test.go +++ /dev/null @@ -1,856 +0,0 @@ -package mapper_test - -import ( - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - - osmv1 "github.com/openshift/api/monitoring/v1" - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/client-go/tools/cache" - - "github.com/openshift/monitoring-plugin/pkg/management/mapper" - "github.com/openshift/monitoring-plugin/pkg/management/testutils" -) - -var _ = Describe("Mapper", func() { - var ( - mockK8sClient *testutils.MockClient - mapperClient mapper.Client - ) - - BeforeEach(func() { - mockK8sClient = &testutils.MockClient{} - mapperClient = mapper.New(mockK8sClient) - }) - - createPrometheusRule := func(namespace, name string, alertRules []monitoringv1.Rule) *monitoringv1.PrometheusRule { - return &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: namespace, - Name: name, - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "test-group", - Rules: alertRules, - }, - }, - }, - } - } - - Describe("GetAlertingRuleId", func() { - Context("when generating IDs for alert rules", func() { - It("should generate a non-empty ID for a simple alert rule", func() { - By("creating a simple alert rule") - alertRule := monitoringv1.Rule{ - Alert: "TestAlert", - Expr: intstr.FromString("up == 0"), - } - - By("generating the rule ID") - ruleId := mapperClient.GetAlertingRuleId(&alertRule) - - By("verifying the result") - Expect(ruleId).NotTo(BeEmpty()) - Expect(string(ruleId)).To(HaveLen(len(alertRule.Alert) + 1 + 64)) // alertname + separator + SHA256 hash should be 64 characters - }) - - It("should generate different IDs for different alert rules", func() { - By("creating two different alert rules") - alertRule1 := monitoringv1.Rule{ - Alert: "TestAlert1", - Expr: intstr.FromString("up == 0"), - } - alertRule2 := monitoringv1.Rule{ - Alert: "TestAlert2", - Expr: intstr.FromString("cpu > 80"), - } - - By("generating rule IDs") - ruleId1 := mapperClient.GetAlertingRuleId(&alertRule1) - ruleId2 := mapperClient.GetAlertingRuleId(&alertRule2) - - By("verifying the results") - Expect(ruleId1).NotTo(BeEmpty()) - Expect(ruleId2).NotTo(BeEmpty()) - Expect(ruleId1).NotTo(Equal(ruleId2)) - }) - - It("should generate the same ID for identical alert rules", func() { - By("creating two identical alert rules") - alertRule1 := monitoringv1.Rule{ - Alert: "TestAlert", - Expr: intstr.FromString("up == 0"), - } - alertRule2 := monitoringv1.Rule{ - Alert: "TestAlert", - Expr: intstr.FromString("up == 0"), - } - - By("generating rule IDs") - ruleId1 := mapperClient.GetAlertingRuleId(&alertRule1) - ruleId2 := mapperClient.GetAlertingRuleId(&alertRule2) - - By("verifying the results") - Expect(ruleId1).NotTo(BeEmpty()) - Expect(ruleId2).NotTo(BeEmpty()) - Expect(ruleId1).To(Equal(ruleId2)) - }) - - It("should return empty string for rules without alert or record name", func() { - By("creating a rule without alert or record name") - alertRule := monitoringv1.Rule{ - Expr: intstr.FromString("up == 0"), - } - - By("generating the rule ID") - ruleId := mapperClient.GetAlertingRuleId(&alertRule) - - By("verifying the result") - Expect(ruleId).To(BeEmpty()) - }) - }) - }) - - Describe("FindAlertRuleById", func() { - Context("when the alert rule exists", func() { - It("should return the correct PrometheusRuleId", func() { - By("creating test alert rule") - alertRule := monitoringv1.Rule{ - Alert: "TestAlert", - Expr: intstr.FromString("up == 0"), - } - - By("creating PrometheusRule") - pr := createPrometheusRule("test-namespace", "test-rule", []monitoringv1.Rule{alertRule}) - - By("adding the PrometheusRule to the mapper") - mapperClient.AddPrometheusRule(pr) - - By("getting the generated rule ID") - ruleId := mapperClient.GetAlertingRuleId(&alertRule) - Expect(ruleId).NotTo(BeEmpty()) - - By("testing FindAlertRuleById") - foundPrometheusRuleId, err := mapperClient.FindAlertRuleById(ruleId) - - By("verifying results") - Expect(err).NotTo(HaveOccurred()) - expectedPrometheusRuleId := mapper.PrometheusRuleId(types.NamespacedName{ - Namespace: "test-namespace", - Name: "test-rule", - }) - Expect(*foundPrometheusRuleId).To(Equal(expectedPrometheusRuleId)) - }) - - It("should return the correct PrometheusRuleId when alert rule is one of multiple in the same PrometheusRule", func() { - By("creating multiple test alert rules") - alertRule1 := monitoringv1.Rule{ - Alert: "TestAlert1", - Expr: intstr.FromString("up == 0"), - } - alertRule2 := monitoringv1.Rule{ - Alert: "TestAlert2", - Expr: intstr.FromString("cpu > 80"), - } - - By("creating PrometheusRule with multiple rules") - pr := createPrometheusRule("multi-namespace", "multi-rule", []monitoringv1.Rule{alertRule1, alertRule2}) - - By("adding the PrometheusRule to the mapper") - mapperClient.AddPrometheusRule(pr) - - By("getting the generated rule IDs") - ruleId1 := mapperClient.GetAlertingRuleId(&alertRule1) - ruleId2 := mapperClient.GetAlertingRuleId(&alertRule2) - Expect(ruleId1).NotTo(BeEmpty()) - Expect(ruleId2).NotTo(BeEmpty()) - Expect(ruleId1).NotTo(Equal(ruleId2)) - - By("testing FindAlertRuleById for both rules") - expectedPrometheusRuleId := mapper.PrometheusRuleId(types.NamespacedName{ - Namespace: "multi-namespace", - Name: "multi-rule", - }) - - foundPrometheusRuleId1, err1 := mapperClient.FindAlertRuleById(ruleId1) - Expect(err1).NotTo(HaveOccurred()) - Expect(*foundPrometheusRuleId1).To(Equal(expectedPrometheusRuleId)) - - foundPrometheusRuleId2, err2 := mapperClient.FindAlertRuleById(ruleId2) - Expect(err2).NotTo(HaveOccurred()) - Expect(*foundPrometheusRuleId2).To(Equal(expectedPrometheusRuleId)) - }) - }) - - Context("when the alert rule does not exist", func() { - It("should return an error when no rules are mapped", func() { - By("setting up test data") - nonExistentRuleId := mapper.PrometheusAlertRuleId("non-existent-rule-id") - - By("testing the method") - _, err := mapperClient.FindAlertRuleById(nonExistentRuleId) - - By("verifying results") - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("alert rule with id non-existent-rule-id not found")) - }) - - It("should return an error when rules are mapped but the target rule is not found", func() { - By("creating and adding a valid alert rule") - alertRule := monitoringv1.Rule{ - Alert: "ValidAlert", - Expr: intstr.FromString("up == 0"), - } - pr := createPrometheusRule("test-namespace", "test-rule", []monitoringv1.Rule{alertRule}) - mapperClient.AddPrometheusRule(pr) - - By("trying to find a non-existent rule ID") - nonExistentRuleId := mapper.PrometheusAlertRuleId("definitely-non-existent-rule-id") - - By("testing the method") - _, err := mapperClient.FindAlertRuleById(nonExistentRuleId) - - By("verifying results") - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("alert rule with id definitely-non-existent-rule-id not found")) - }) - }) - }) - - Describe("AddPrometheusRule", func() { - Context("when adding PrometheusRules", func() { - It("should successfully add a PrometheusRule with alert rules", func() { - By("creating a PrometheusRule with alert rules") - alertRule1 := monitoringv1.Rule{ - Alert: "TestAlert1", - Expr: intstr.FromString("up == 0"), - } - alertRule2 := monitoringv1.Rule{ - Alert: "TestAlert2", - Expr: intstr.FromString("cpu > 80"), - } - - pr := createPrometheusRule("test-namespace", "test-rule", []monitoringv1.Rule{alertRule1, alertRule2}) - - By("adding the PrometheusRule") - mapperClient.AddPrometheusRule(pr) - - By("verifying the rules can be found") - ruleId1 := mapperClient.GetAlertingRuleId(&alertRule1) - foundPr1, err1 := mapperClient.FindAlertRuleById(ruleId1) - Expect(err1).ToNot(HaveOccurred()) - Expect(foundPr1.Namespace).To(Equal("test-namespace")) - Expect(foundPr1.Name).To(Equal("test-rule")) - - ruleId2 := mapperClient.GetAlertingRuleId(&alertRule2) - foundPr2, err2 := mapperClient.FindAlertRuleById(ruleId2) - Expect(err2).ToNot(HaveOccurred()) - Expect(foundPr2.Namespace).To(Equal("test-namespace")) - Expect(foundPr2.Name).To(Equal("test-rule")) - }) - - It("should update existing PrometheusRule when added again", func() { - By("creating and adding initial PrometheusRule") - alertRule1 := monitoringv1.Rule{ - Alert: "TestAlert1", - Expr: intstr.FromString("up == 0"), - } - pr1 := createPrometheusRule("test-namespace", "test-rule", []monitoringv1.Rule{alertRule1}) - mapperClient.AddPrometheusRule(pr1) - - By("creating updated PrometheusRule with different alerts") - alertRule2 := monitoringv1.Rule{ - Alert: "TestAlert2", - Expr: intstr.FromString("cpu > 80"), - } - pr2 := createPrometheusRule("test-namespace", "test-rule", []monitoringv1.Rule{alertRule2}) - mapperClient.AddPrometheusRule(pr2) - - By("verifying old rule is no longer found") - ruleId1 := mapperClient.GetAlertingRuleId(&alertRule1) - _, err1 := mapperClient.FindAlertRuleById(ruleId1) - Expect(err1).To(HaveOccurred()) - - By("verifying new rule is found") - ruleId2 := mapperClient.GetAlertingRuleId(&alertRule2) - foundPr, err2 := mapperClient.FindAlertRuleById(ruleId2) - Expect(err2).ToNot(HaveOccurred()) - Expect(foundPr.Namespace).To(Equal("test-namespace")) - }) - - It("should ignore recording rules (not alert rules)", func() { - By("creating a PrometheusRule with recording rule") - recordingRule := monitoringv1.Rule{ - Record: "test:recording:rule", - Expr: intstr.FromString("sum(up)"), - } - - pr := createPrometheusRule("test-namespace", "test-rule", []monitoringv1.Rule{recordingRule}) - - By("adding the PrometheusRule") - mapperClient.AddPrometheusRule(pr) - - By("verifying the recording rule is not found") - ruleId := mapperClient.GetAlertingRuleId(&recordingRule) - _, err := mapperClient.FindAlertRuleById(ruleId) - Expect(err).To(HaveOccurred()) - }) - }) - }) - - Describe("DeletePrometheusRule", func() { - Context("when deleting PrometheusRules", func() { - It("should successfully delete a PrometheusRule", func() { - By("creating and adding a PrometheusRule") - alertRule := monitoringv1.Rule{ - Alert: "TestAlert", - Expr: intstr.FromString("up == 0"), - } - pr := createPrometheusRule("test-namespace", "test-rule", []monitoringv1.Rule{alertRule}) - mapperClient.AddPrometheusRule(pr) - - By("verifying the rule exists") - ruleId := mapperClient.GetAlertingRuleId(&alertRule) - _, err := mapperClient.FindAlertRuleById(ruleId) - Expect(err).ToNot(HaveOccurred()) - - By("deleting the PrometheusRule") - mapperClient.DeletePrometheusRule(cache.ObjectName(types.NamespacedName{Namespace: pr.Namespace, Name: pr.Name})) - - By("verifying the rule is no longer found") - _, err = mapperClient.FindAlertRuleById(ruleId) - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("not found")) - }) - - It("should handle deleting non-existent PrometheusRule gracefully", func() { - By("creating a PrometheusRule that was never added") - alertRule := monitoringv1.Rule{ - Alert: "TestAlert", - Expr: intstr.FromString("up == 0"), - } - pr := createPrometheusRule("test-namespace", "test-rule", []monitoringv1.Rule{alertRule}) - - By("deleting the non-existent PrometheusRule") - Expect(func() { - mapperClient.DeletePrometheusRule(cache.ObjectName(types.NamespacedName{Namespace: pr.Namespace, Name: pr.Name})) - }).NotTo(Panic()) - - By("verifying mapper still works after delete attempt") - // Add a different rule to verify the mapper is still functional - alertRule2 := monitoringv1.Rule{ - Alert: "AnotherAlert", - Expr: intstr.FromString("cpu > 80"), - } - pr2 := createPrometheusRule("test-namespace", "another-rule", []monitoringv1.Rule{alertRule2}) - mapperClient.AddPrometheusRule(pr2) - - ruleId := mapperClient.GetAlertingRuleId(&alertRule2) - foundPr, err := mapperClient.FindAlertRuleById(ruleId) - Expect(err).ToNot(HaveOccurred()) - Expect(foundPr.Name).To(Equal("another-rule")) - }) - }) - }) - - Describe("AddAlertRelabelConfig", func() { - Context("when adding AlertRelabelConfigs", func() { - It("should successfully add an AlertRelabelConfig", func() { - By("creating an AlertRelabelConfig") - arc := &osmv1.AlertRelabelConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-arc", - Namespace: "test-namespace", - }, - Spec: osmv1.AlertRelabelConfigSpec{ - Configs: []osmv1.RelabelConfig{ - { - SourceLabels: []osmv1.LabelName{"alertname", "severity"}, - Separator: ";", - Regex: "TestAlert;critical", - TargetLabel: "severity", - Replacement: "warning", - Action: "Replace", - }, - }, - }, - } - - By("adding the AlertRelabelConfig") - mapperClient.AddAlertRelabelConfig(arc) - - By("verifying it can be retrieved") - alertRule := &monitoringv1.Rule{ - Alert: "TestAlert", - Labels: map[string]string{ - "severity": "critical", - }, - } - configs := mapperClient.GetAlertRelabelConfigSpec(alertRule) - Expect(configs).To(HaveLen(1)) - Expect(configs[0].SourceLabels).To(ContainElement(osmv1.LabelName("alertname"))) - Expect(configs[0].Regex).To(Equal("TestAlert;critical")) - }) - - It("should ignore configs without alertname in SourceLabels", func() { - By("creating an AlertRelabelConfig without alertname") - arc := &osmv1.AlertRelabelConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-arc", - Namespace: "test-namespace", - }, - Spec: osmv1.AlertRelabelConfigSpec{ - Configs: []osmv1.RelabelConfig{ - { - SourceLabels: []osmv1.LabelName{"severity", "namespace"}, - Separator: ";", - Regex: "critical;default", - TargetLabel: "priority", - Replacement: "high", - Action: "Replace", - }, - }, - }, - } - - By("adding the AlertRelabelConfig") - mapperClient.AddAlertRelabelConfig(arc) - - By("verifying it returns empty for an alert") - alertRule := &monitoringv1.Rule{ - Alert: "TestAlert", - Labels: map[string]string{ - "severity": "critical", - "namespace": "default", - }, - } - specs := mapperClient.GetAlertRelabelConfigSpec(alertRule) - Expect(specs).To(BeEmpty()) - }) - - It("should update existing AlertRelabelConfig when added again", func() { - By("creating and adding initial AlertRelabelConfig") - arc1 := &osmv1.AlertRelabelConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-arc", - Namespace: "test-namespace", - }, - Spec: osmv1.AlertRelabelConfigSpec{ - Configs: []osmv1.RelabelConfig{ - { - SourceLabels: []osmv1.LabelName{"alertname"}, - Separator: ";", - Regex: "Alert1", - TargetLabel: "severity", - Replacement: "warning", - Action: "Replace", - }, - }, - }, - } - mapperClient.AddAlertRelabelConfig(arc1) - - By("creating updated AlertRelabelConfig") - arc2 := &osmv1.AlertRelabelConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-arc", - Namespace: "test-namespace", - }, - Spec: osmv1.AlertRelabelConfigSpec{ - Configs: []osmv1.RelabelConfig{ - { - SourceLabels: []osmv1.LabelName{"alertname"}, - Separator: ";", - Regex: "Alert2", - TargetLabel: "severity", - Replacement: "critical", - Action: "Replace", - }, - }, - }, - } - mapperClient.AddAlertRelabelConfig(arc2) - - By("verifying the updated config is retrieved") - alertRule := &monitoringv1.Rule{ - Alert: "Alert2", - } - configs := mapperClient.GetAlertRelabelConfigSpec(alertRule) - Expect(configs).To(HaveLen(1)) - Expect(configs[0].Regex).To(Equal("Alert2")) - }) - - It("should handle multiple relabel configs in single AlertRelabelConfig", func() { - By("creating AlertRelabelConfig with multiple configs") - arc := &osmv1.AlertRelabelConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-arc", - Namespace: "test-namespace", - }, - Spec: osmv1.AlertRelabelConfigSpec{ - Configs: []osmv1.RelabelConfig{ - { - SourceLabels: []osmv1.LabelName{"alertname"}, - Separator: ";", - Regex: "Alert1", - TargetLabel: "severity", - Replacement: "warning", - Action: "Replace", - }, - { - SourceLabels: []osmv1.LabelName{"alertname"}, - Separator: ";", - Regex: "Alert2", - TargetLabel: "priority", - Replacement: "high", - Action: "Replace", - }, - }, - }, - } - - By("adding the AlertRelabelConfig") - mapperClient.AddAlertRelabelConfig(arc) - - By("verifying Alert1 gets its matching config") - alertRule1 := &monitoringv1.Rule{ - Alert: "Alert1", - } - specs1 := mapperClient.GetAlertRelabelConfigSpec(alertRule1) - Expect(specs1).To(HaveLen(1)) - Expect(specs1[0].TargetLabel).To(Equal("severity")) - - By("verifying Alert2 gets its matching config") - alertRule2 := &monitoringv1.Rule{ - Alert: "Alert2", - } - specs2 := mapperClient.GetAlertRelabelConfigSpec(alertRule2) - Expect(specs2).To(HaveLen(1)) - Expect(specs2[0].TargetLabel).To(Equal("priority")) - }) - - It("should handle configs with empty regex", func() { - By("creating AlertRelabelConfig with empty regex") - arc := &osmv1.AlertRelabelConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-arc", - Namespace: "test-namespace", - }, - Spec: osmv1.AlertRelabelConfigSpec{ - Configs: []osmv1.RelabelConfig{ - { - SourceLabels: []osmv1.LabelName{"alertname"}, - Separator: ";", - Regex: "", - TargetLabel: "severity", - Replacement: "warning", - Action: "Replace", - }, - }, - }, - } - - By("adding the AlertRelabelConfig") - mapperClient.AddAlertRelabelConfig(arc) - - By("verifying it's ignored (empty regex)") - alertRule := &monitoringv1.Rule{ - Alert: "TestAlert", - } - specs := mapperClient.GetAlertRelabelConfigSpec(alertRule) - Expect(specs).To(BeEmpty()) - }) - - It("should handle configs where regex values don't match source labels count", func() { - By("creating AlertRelabelConfig with mismatched regex/labels") - arc := &osmv1.AlertRelabelConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-arc", - Namespace: "test-namespace", - }, - Spec: osmv1.AlertRelabelConfigSpec{ - Configs: []osmv1.RelabelConfig{ - { - SourceLabels: []osmv1.LabelName{"alertname", "severity"}, - Separator: ";", - Regex: "OnlyOneValue", - TargetLabel: "severity", - Replacement: "warning", - Action: "Replace", - }, - }, - }, - } - - By("adding the AlertRelabelConfig") - mapperClient.AddAlertRelabelConfig(arc) - - By("verifying it's ignored (mismatch)") - alertRule := &monitoringv1.Rule{ - Alert: "OnlyOneValue", - Labels: map[string]string{ - "severity": "critical", - }, - } - specs := mapperClient.GetAlertRelabelConfigSpec(alertRule) - Expect(specs).To(BeEmpty()) - }) - }) - }) - - Describe("DeleteAlertRelabelConfig", func() { - Context("when deleting AlertRelabelConfigs", func() { - It("should successfully delete an AlertRelabelConfig", func() { - By("creating and adding an AlertRelabelConfig") - arc := &osmv1.AlertRelabelConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-arc", - Namespace: "test-namespace", - }, - Spec: osmv1.AlertRelabelConfigSpec{ - Configs: []osmv1.RelabelConfig{ - { - SourceLabels: []osmv1.LabelName{"alertname"}, - Separator: ";", - Regex: "TestAlert", - TargetLabel: "severity", - Replacement: "warning", - Action: "Replace", - }, - }, - }, - } - mapperClient.AddAlertRelabelConfig(arc) - - By("verifying it exists") - alertRule := &monitoringv1.Rule{ - Alert: "TestAlert", - } - specs := mapperClient.GetAlertRelabelConfigSpec(alertRule) - Expect(specs).To(HaveLen(1)) - - By("deleting the AlertRelabelConfig") - mapperClient.DeleteAlertRelabelConfig(cache.ObjectName(types.NamespacedName{Namespace: arc.Namespace, Name: arc.Name})) - - By("verifying it's no longer found") - specs = mapperClient.GetAlertRelabelConfigSpec(alertRule) - Expect(specs).To(BeEmpty()) - }) - - It("should handle deleting non-existent AlertRelabelConfig gracefully", func() { - By("creating an AlertRelabelConfig that was never added") - arc := &osmv1.AlertRelabelConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-arc", - Namespace: "test-namespace", - }, - Spec: osmv1.AlertRelabelConfigSpec{ - Configs: []osmv1.RelabelConfig{}, - }, - } - - By("deleting the non-existent AlertRelabelConfig") - Expect(func() { - mapperClient.DeleteAlertRelabelConfig(cache.ObjectName(types.NamespacedName{Namespace: arc.Namespace, Name: arc.Name})) - }).NotTo(Panic()) - - By("verifying mapper still works after delete attempt") - // Add a different AlertRelabelConfig to verify the mapper is still functional - arc2 := &osmv1.AlertRelabelConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: "another-arc", - Namespace: "test-namespace", - }, - Spec: osmv1.AlertRelabelConfigSpec{ - Configs: []osmv1.RelabelConfig{ - { - SourceLabels: []osmv1.LabelName{"alertname"}, - Separator: ";", - Regex: "TestAlert", - TargetLabel: "severity", - Replacement: "critical", - Action: "Replace", - }, - }, - }, - } - mapperClient.AddAlertRelabelConfig(arc2) - - alertRule := &monitoringv1.Rule{ - Alert: "TestAlert", - } - configs := mapperClient.GetAlertRelabelConfigSpec(alertRule) - Expect(configs).To(HaveLen(1)) - Expect(configs[0].Regex).To(Equal("TestAlert")) - }) - }) - }) - - Describe("GetAlertRelabelConfigSpec", func() { - Context("when retrieving AlertRelabelConfig specs", func() { - It("should return specs for existing AlertRelabelConfig", func() { - By("creating and adding an AlertRelabelConfig") - arc := &osmv1.AlertRelabelConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-arc", - Namespace: "test-namespace", - }, - Spec: osmv1.AlertRelabelConfigSpec{ - Configs: []osmv1.RelabelConfig{ - { - SourceLabels: []osmv1.LabelName{"alertname", "severity"}, - Separator: ";", - Regex: "TestAlert;critical", - TargetLabel: "priority", - Replacement: "high", - Action: "Replace", - }, - }, - }, - } - mapperClient.AddAlertRelabelConfig(arc) - - By("retrieving the configs") - alertRule := &monitoringv1.Rule{ - Alert: "TestAlert", - Labels: map[string]string{ - "severity": "critical", - }, - } - configs := mapperClient.GetAlertRelabelConfigSpec(alertRule) - - By("verifying the configs") - Expect(configs).To(HaveLen(1)) - Expect(configs[0].TargetLabel).To(Equal("priority")) - Expect(configs[0].Replacement).To(Equal("high")) - Expect(configs[0].SourceLabels).To(ContainElements(osmv1.LabelName("alertname"), osmv1.LabelName("severity"))) - Expect(configs[0].Regex).To(Equal("TestAlert;critical")) - }) - - It("should return empty for alert that doesn't match any config", func() { - By("trying to get specs for an alert that doesn't match") - alertRule := &monitoringv1.Rule{ - Alert: "NonMatchingAlert", - Labels: map[string]string{ - "severity": "info", - }, - } - specs := mapperClient.GetAlertRelabelConfigSpec(alertRule) - - By("verifying empty is returned") - Expect(specs).To(BeEmpty()) - }) - - It("should return copies of specs (not original pointers)", func() { - By("creating and adding an AlertRelabelConfig") - arc := &osmv1.AlertRelabelConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-arc", - Namespace: "test-namespace", - }, - Spec: osmv1.AlertRelabelConfigSpec{ - Configs: []osmv1.RelabelConfig{ - { - SourceLabels: []osmv1.LabelName{"alertname"}, - Separator: ";", - Regex: "TestAlert", - TargetLabel: "severity", - Replacement: "warning", - Action: "Replace", - }, - }, - }, - } - mapperClient.AddAlertRelabelConfig(arc) - - By("retrieving configs twice") - alertRule := &monitoringv1.Rule{ - Alert: "TestAlert", - } - configs1 := mapperClient.GetAlertRelabelConfigSpec(alertRule) - configs2 := mapperClient.GetAlertRelabelConfigSpec(alertRule) - - By("verifying they are independent copies") - Expect(configs1).To(HaveLen(1)) - Expect(configs2).To(HaveLen(1)) - // Modify one and verify the other is unchanged - configs1[0].Replacement = "modified" - Expect(configs2[0].Replacement).To(Equal("warning")) - }) - }) - }) - - Describe("GetAlertRelabelConfigSpec with matching alerts", func() { - Context("when alert rule matches AlertRelabelConfig", func() { - It("should return matching configs from all AlertRelabelConfigs", func() { - By("creating and adding a PrometheusRule") - alertRule := monitoringv1.Rule{ - Alert: "TestAlert", - Expr: intstr.FromString("up == 0"), - Labels: map[string]string{ - "severity": "critical", - }, - } - pr := createPrometheusRule("test-namespace", "test-rule", []monitoringv1.Rule{alertRule}) - mapperClient.AddPrometheusRule(pr) - - By("creating and adding first AlertRelabelConfig") - arc1 := &osmv1.AlertRelabelConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-arc-1", - Namespace: "test-namespace", - }, - Spec: osmv1.AlertRelabelConfigSpec{ - Configs: []osmv1.RelabelConfig{ - { - SourceLabels: []osmv1.LabelName{"alertname"}, - Separator: ";", - Regex: "TestAlert", - TargetLabel: "priority", - Replacement: "high", - Action: "Replace", - }, - }, - }, - } - mapperClient.AddAlertRelabelConfig(arc1) - - By("creating and adding second AlertRelabelConfig") - arc2 := &osmv1.AlertRelabelConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-arc-2", - Namespace: "test-namespace", - }, - Spec: osmv1.AlertRelabelConfigSpec{ - Configs: []osmv1.RelabelConfig{ - { - SourceLabels: []osmv1.LabelName{"alertname", "severity"}, - Separator: ";", - Regex: "TestAlert;critical", - TargetLabel: "team", - Replacement: "platform", - Action: "Replace", - }, - }, - }, - } - mapperClient.AddAlertRelabelConfig(arc2) - - By("getting matching configs for the alert") - configs := mapperClient.GetAlertRelabelConfigSpec(&alertRule) - - By("verifying both configs are returned") - Expect(configs).To(HaveLen(2)) - // Verify first config - targetLabels := []string{configs[0].TargetLabel, configs[1].TargetLabel} - Expect(targetLabels).To(ContainElements("priority", "team")) - }) - }) - }) -}) diff --git a/pkg/management/mapper/new.go b/pkg/management/mapper/new.go deleted file mode 100644 index aa5a3708a..000000000 --- a/pkg/management/mapper/new.go +++ /dev/null @@ -1,16 +0,0 @@ -package mapper - -import ( - osmv1 "github.com/openshift/api/monitoring/v1" - - "github.com/openshift/monitoring-plugin/pkg/k8s" -) - -// New creates a new instance of the mapper client. -func New(k8sClient k8s.Client) Client { - return &mapper{ - k8sClient: k8sClient, - prometheusRules: make(map[PrometheusRuleId][]PrometheusAlertRuleId), - alertRelabelConfigs: make(map[AlertRelabelConfigId][]osmv1.RelabelConfig), - } -} diff --git a/pkg/management/mapper/types.go b/pkg/management/mapper/types.go deleted file mode 100644 index 8929ea1af..000000000 --- a/pkg/management/mapper/types.go +++ /dev/null @@ -1,49 +0,0 @@ -package mapper - -import ( - "context" - - osmv1 "github.com/openshift/api/monitoring/v1" - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/tools/cache" -) - -// PrometheusRuleId is a unique identifier for a PrometheusRule resource in Kubernetes, represented by its NamespacedName. -type PrometheusRuleId types.NamespacedName - -// AlertRelabelConfigId is a unique identifier for an AlertRelabelConfig resource in Kubernetes, represented by its NamespacedName. -type AlertRelabelConfigId types.NamespacedName - -// PrometheusAlertRuleId is a hash-based identifier for an alerting rule within a PrometheusRule, represented by a string. -type PrometheusAlertRuleId string - -// Client defines the interface for mapping between Prometheus alerting rules and their unique identifiers. -type Client interface { - // GetAlertingRuleId returns the unique identifier for a given alerting rule. - GetAlertingRuleId(alertRule *monitoringv1.Rule) PrometheusAlertRuleId - - // FindAlertRuleById returns the PrometheusRuleId for a given alerting rule ID. - FindAlertRuleById(alertRuleId PrometheusAlertRuleId) (*PrometheusRuleId, error) - - // WatchPrometheusRules starts watching for changes to PrometheusRules. - WatchPrometheusRules(ctx context.Context) - - // AddPrometheusRule adds or updates a PrometheusRule in the mapper. - AddPrometheusRule(pr *monitoringv1.PrometheusRule) - - // DeletePrometheusRule removes a PrometheusRule from the mapper. - DeletePrometheusRule(key cache.ObjectName) - - // WatchAlertRelabelConfigs starts watching for changes to AlertRelabelConfigs. - WatchAlertRelabelConfigs(ctx context.Context) - - // AddAlertRelabelConfig adds or updates an AlertRelabelConfig in the mapper. - AddAlertRelabelConfig(arc *osmv1.AlertRelabelConfig) - - // DeleteAlertRelabelConfig removes an AlertRelabelConfig from the mapper. - DeleteAlertRelabelConfig(key cache.ObjectName) - - // GetAlertRelabelConfigSpec returns the RelabelConfigs that match the given alert rule's labels. - GetAlertRelabelConfigSpec(alertRule *monitoringv1.Rule) []osmv1.RelabelConfig -} diff --git a/pkg/management/new.go b/pkg/management/new.go index a4c827df2..f6e7ae2bc 100644 --- a/pkg/management/new.go +++ b/pkg/management/new.go @@ -4,21 +4,11 @@ import ( "context" "github.com/openshift/monitoring-plugin/pkg/k8s" - "github.com/openshift/monitoring-plugin/pkg/management/mapper" ) // New creates a new management client func New(ctx context.Context, k8sClient k8s.Client) Client { - m := mapper.New(k8sClient) - m.WatchPrometheusRules(ctx) - m.WatchAlertRelabelConfigs(ctx) - - return NewWithCustomMapper(ctx, k8sClient, m) -} - -func NewWithCustomMapper(ctx context.Context, k8sClient k8s.Client, m mapper.Client) Client { return &client{ k8sClient: k8sClient, - mapper: m, } } diff --git a/pkg/management/relabel_config.go b/pkg/management/relabel_config.go deleted file mode 100644 index 552d37d56..000000000 --- a/pkg/management/relabel_config.go +++ /dev/null @@ -1,46 +0,0 @@ -package management - -import ( - "fmt" - - osmv1 "github.com/openshift/api/monitoring/v1" -) - -// applyRelabelConfigs applies relabel configurations to a set of labels. -// Returns the updated labels or an error if the alert/rule should be dropped. -func applyRelabelConfigs(name string, labels map[string]string, configs []osmv1.RelabelConfig) (map[string]string, error) { - if labels == nil { - labels = make(map[string]string) - } - - updatedLabels := make(map[string]string, len(labels)) - for k, v := range labels { - updatedLabels[k] = v - } - - for _, config := range configs { - // TODO: (machadovilaca) Implement all relabeling actions - // 'Replace', 'Keep', 'Drop', 'HashMod', 'LabelMap', 'LabelDrop', or 'LabelKeep' - - switch config.Action { - case "Drop": - return nil, fmt.Errorf("alert/rule %s has been dropped by relabeling configuration", name) - case "Replace": - updatedLabels[config.TargetLabel] = config.Replacement - case "Keep": - // Keep action is a no-op in this context since the alert/rule is already matched - case "HashMod": - // HashMod action is not implemented yet - case "LabelMap": - // LabelMap action is not implemented yet - case "LabelDrop": - // LabelDrop action is not implemented yet - case "LabelKeep": - // LabelKeep action is not implemented yet - default: - // Unsupported action, ignore - } - } - - return updatedLabels, nil -} diff --git a/pkg/management/relabel_config_test.go b/pkg/management/relabel_config_test.go deleted file mode 100644 index 1271fb202..000000000 --- a/pkg/management/relabel_config_test.go +++ /dev/null @@ -1,171 +0,0 @@ -package management - -import ( - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - osmv1 "github.com/openshift/api/monitoring/v1" -) - -var _ = Describe("applyRelabelConfigs", func() { - Context("when Drop action is applied", func() { - It("should return error", func() { - initialLabels := map[string]string{ - "severity": "critical", - } - configs := []osmv1.RelabelConfig{ - { - Action: "Drop", - }, - } - - result, err := applyRelabelConfigs("TestAlert", initialLabels, configs) - - Expect(err).To(HaveOccurred()) - Expect(result).To(BeNil()) - }) - }) - - Context("when Replace action is applied", func() { - It("should update existing label", func() { - initialLabels := map[string]string{ - "severity": "warning", - } - configs := []osmv1.RelabelConfig{ - { - Action: "Replace", - TargetLabel: "severity", - Replacement: "critical", - }, - } - - result, err := applyRelabelConfigs("TestAlert", initialLabels, configs) - - Expect(err).ToNot(HaveOccurred()) - Expect(result).To(Equal(map[string]string{ - "severity": "critical", - })) - }) - - It("should add new label", func() { - initialLabels := map[string]string{ - "severity": "warning", - } - configs := []osmv1.RelabelConfig{ - { - Action: "Replace", - TargetLabel: "team", - Replacement: "platform", - }, - } - - result, err := applyRelabelConfigs("TestAlert", initialLabels, configs) - - Expect(err).ToNot(HaveOccurred()) - Expect(result).To(Equal(map[string]string{ - "severity": "warning", - "team": "platform", - })) - }) - - It("should work with nil labels", func() { - configs := []osmv1.RelabelConfig{ - { - Action: "Replace", - TargetLabel: "severity", - Replacement: "critical", - }, - } - - result, err := applyRelabelConfigs("TestAlert", nil, configs) - - Expect(err).ToNot(HaveOccurred()) - Expect(result).To(Equal(map[string]string{ - "severity": "critical", - })) - }) - }) - - Context("when multiple Replace actions are applied", func() { - It("should apply all replacements", func() { - initialLabels := map[string]string{ - "severity": "warning", - } - configs := []osmv1.RelabelConfig{ - { - Action: "Replace", - TargetLabel: "severity", - Replacement: "critical", - }, - { - Action: "Replace", - TargetLabel: "team", - Replacement: "platform", - }, - } - - result, err := applyRelabelConfigs("TestAlert", initialLabels, configs) - - Expect(err).ToNot(HaveOccurred()) - Expect(result).To(Equal(map[string]string{ - "severity": "critical", - "team": "platform", - })) - }) - }) - - Context("when Keep action is applied", func() { - It("should be a no-op", func() { - initialLabels := map[string]string{ - "severity": "warning", - } - configs := []osmv1.RelabelConfig{ - { - Action: "Keep", - }, - } - - result, err := applyRelabelConfigs("TestAlert", initialLabels, configs) - - Expect(err).ToNot(HaveOccurred()) - Expect(result).To(Equal(map[string]string{ - "severity": "warning", - })) - }) - }) - - Context("when unknown action is applied", func() { - It("should be ignored", func() { - initialLabels := map[string]string{ - "severity": "warning", - } - configs := []osmv1.RelabelConfig{ - { - Action: "UnknownAction", - }, - } - - result, err := applyRelabelConfigs("TestAlert", initialLabels, configs) - - Expect(err).ToNot(HaveOccurred()) - Expect(result).To(Equal(map[string]string{ - "severity": "warning", - })) - }) - }) - - Context("when no configs are provided", func() { - It("should return unchanged labels", func() { - initialLabels := map[string]string{ - "severity": "warning", - } - configs := []osmv1.RelabelConfig{} - - result, err := applyRelabelConfigs("TestAlert", initialLabels, configs) - - Expect(err).ToNot(HaveOccurred()) - Expect(result).To(Equal(map[string]string{ - "severity": "warning", - })) - }) - }) -}) diff --git a/pkg/management/testutils/k8s_client_mock.go b/pkg/management/testutils/k8s_client_mock.go index cd860d9cb..c0ab8c957 100644 --- a/pkg/management/testutils/k8s_client_mock.go +++ b/pkg/management/testutils/k8s_client_mock.go @@ -7,19 +7,19 @@ import ( osmv1 "github.com/openshift/api/monitoring/v1" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "github.com/prometheus/prometheus/model/relabel" "github.com/openshift/monitoring-plugin/pkg/k8s" ) // MockClient is a mock implementation of k8s.Client interface type MockClient struct { - TestConnectionFunc func(ctx context.Context) error - PrometheusAlertsFunc func() k8s.PrometheusAlertsInterface - PrometheusRulesFunc func() k8s.PrometheusRuleInterface - PrometheusRuleInformerFunc func() k8s.PrometheusRuleInformerInterface - AlertRelabelConfigsFunc func() k8s.AlertRelabelConfigInterface - AlertRelabelConfigInformerFunc func() k8s.AlertRelabelConfigInformerInterface - NamespaceInformerFunc func() k8s.NamespaceInformerInterface + TestConnectionFunc func(ctx context.Context) error + PrometheusAlertsFunc func() k8s.PrometheusAlertsInterface + PrometheusRulesFunc func() k8s.PrometheusRuleInterface + AlertRelabelConfigsFunc func() k8s.AlertRelabelConfigInterface + RelabeledRulesFunc func() k8s.RelabeledRulesInterface + NamespaceFunc func() k8s.NamespaceInterface } // TestConnection mocks the TestConnection method @@ -46,14 +46,6 @@ func (m *MockClient) PrometheusRules() k8s.PrometheusRuleInterface { return &MockPrometheusRuleInterface{} } -// PrometheusRuleInformer mocks the PrometheusRuleInformer method -func (m *MockClient) PrometheusRuleInformer() k8s.PrometheusRuleInformerInterface { - if m.PrometheusRuleInformerFunc != nil { - return m.PrometheusRuleInformerFunc() - } - return &MockPrometheusRuleInformerInterface{} -} - // AlertRelabelConfigs mocks the AlertRelabelConfigs method func (m *MockClient) AlertRelabelConfigs() k8s.AlertRelabelConfigInterface { if m.AlertRelabelConfigsFunc != nil { @@ -62,20 +54,20 @@ func (m *MockClient) AlertRelabelConfigs() k8s.AlertRelabelConfigInterface { return &MockAlertRelabelConfigInterface{} } -// AlertRelabelConfigInformer mocks the AlertRelabelConfigInformer method -func (m *MockClient) AlertRelabelConfigInformer() k8s.AlertRelabelConfigInformerInterface { - if m.AlertRelabelConfigInformerFunc != nil { - return m.AlertRelabelConfigInformerFunc() +// RelabeledRules mocks the RelabeledRules method +func (m *MockClient) RelabeledRules() k8s.RelabeledRulesInterface { + if m.RelabeledRulesFunc != nil { + return m.RelabeledRulesFunc() } - return &MockAlertRelabelConfigInformerInterface{} + return &MockRelabeledRulesInterface{} } -// NamespaceInformer mocks the NamespaceInformer method -func (m *MockClient) NamespaceInformer() k8s.NamespaceInformerInterface { - if m.NamespaceInformerFunc != nil { - return m.NamespaceInformerFunc() +// Namespace mocks the Namespace method +func (m *MockClient) Namespace() k8s.NamespaceInterface { + if m.NamespaceFunc != nil { + return m.NamespaceFunc() } - return &MockNamespaceInformerInterface{} + return &MockNamespaceInterface{} } // MockPrometheusAlertsInterface is a mock implementation of k8s.PrometheusAlertsInterface @@ -224,64 +216,6 @@ func (m *MockPrometheusRuleInterface) AddRule(ctx context.Context, namespacedNam return nil } -// MockPrometheusRuleInformerInterface is a mock implementation of k8s.PrometheusRuleInformerInterface -type MockPrometheusRuleInformerInterface struct { - RunFunc func(ctx context.Context, callbacks k8s.PrometheusRuleInformerCallback) error - ListFunc func(ctx context.Context, namespace string) ([]monitoringv1.PrometheusRule, error) - GetFunc func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) - - // Storage for test data - PrometheusRules map[string]*monitoringv1.PrometheusRule -} - -func (m *MockPrometheusRuleInformerInterface) SetPrometheusRules(rules map[string]*monitoringv1.PrometheusRule) { - m.PrometheusRules = rules -} - -// Run mocks the Run method -func (m *MockPrometheusRuleInformerInterface) Run(ctx context.Context, callbacks k8s.PrometheusRuleInformerCallback) error { - if m.RunFunc != nil { - return m.RunFunc(ctx, callbacks) - } - - // Default implementation - just wait for context to be cancelled - <-ctx.Done() - return ctx.Err() -} - -// List mocks the List method -func (m *MockPrometheusRuleInformerInterface) List(ctx context.Context, namespace string) ([]monitoringv1.PrometheusRule, error) { - if m.ListFunc != nil { - return m.ListFunc(ctx, namespace) - } - - var rules []monitoringv1.PrometheusRule - if m.PrometheusRules != nil { - for _, rule := range m.PrometheusRules { - if namespace == "" || rule.Namespace == namespace { - rules = append(rules, *rule) - } - } - } - return rules, nil -} - -// Get mocks the Get method -func (m *MockPrometheusRuleInformerInterface) Get(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { - if m.GetFunc != nil { - return m.GetFunc(ctx, namespace, name) - } - - key := namespace + "/" + name - if m.PrometheusRules != nil { - if rule, exists := m.PrometheusRules[key]; exists { - return rule, true, nil - } - } - - return nil, false, nil -} - // MockAlertRelabelConfigInterface is a mock implementation of k8s.AlertRelabelConfigInterface type MockAlertRelabelConfigInterface struct { ListFunc func(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) @@ -372,85 +306,50 @@ func (m *MockAlertRelabelConfigInterface) Delete(ctx context.Context, namespace return nil } -// MockAlertRelabelConfigInformerInterface is a mock implementation of k8s.AlertRelabelConfigInformerInterface -type MockAlertRelabelConfigInformerInterface struct { - RunFunc func(ctx context.Context, callbacks k8s.AlertRelabelConfigInformerCallback) error - ListFunc func(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) - GetFunc func(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) - - // Storage for test data - AlertRelabelConfigs map[string]*osmv1.AlertRelabelConfig -} - -func (m *MockAlertRelabelConfigInformerInterface) SetAlertRelabelConfigs(configs map[string]*osmv1.AlertRelabelConfig) { - m.AlertRelabelConfigs = configs -} - -// Run mocks the Run method -func (m *MockAlertRelabelConfigInformerInterface) Run(ctx context.Context, callbacks k8s.AlertRelabelConfigInformerCallback) error { - if m.RunFunc != nil { - return m.RunFunc(ctx, callbacks) - } - - // Default implementation - just wait for context to be cancelled - <-ctx.Done() - return ctx.Err() +// MockRelabeledRulesInterface is a mock implementation of k8s.RelabeledRulesInterface +type MockRelabeledRulesInterface struct { + ListFunc func(ctx context.Context) []monitoringv1.Rule + GetFunc func(ctx context.Context, id string) (monitoringv1.Rule, bool) + ConfigFunc func() []*relabel.Config } -// List mocks the List method -func (m *MockAlertRelabelConfigInformerInterface) List(ctx context.Context, namespace string) ([]osmv1.AlertRelabelConfig, error) { +func (m *MockRelabeledRulesInterface) List(ctx context.Context) []monitoringv1.Rule { if m.ListFunc != nil { - return m.ListFunc(ctx, namespace) - } - - var configs []osmv1.AlertRelabelConfig - if m.AlertRelabelConfigs != nil { - for _, config := range m.AlertRelabelConfigs { - if namespace == "" || config.Namespace == namespace { - configs = append(configs, *config) - } - } + return m.ListFunc(ctx) } - return configs, nil + return []monitoringv1.Rule{} } -// Get mocks the Get method -func (m *MockAlertRelabelConfigInformerInterface) Get(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) { +func (m *MockRelabeledRulesInterface) Get(ctx context.Context, id string) (monitoringv1.Rule, bool) { if m.GetFunc != nil { - return m.GetFunc(ctx, namespace, name) + return m.GetFunc(ctx, id) } + return monitoringv1.Rule{}, false +} - key := namespace + "/" + name - if m.AlertRelabelConfigs != nil { - if config, exists := m.AlertRelabelConfigs[key]; exists { - return config, true, nil - } +func (m *MockRelabeledRulesInterface) Config() []*relabel.Config { + if m.ConfigFunc != nil { + return m.ConfigFunc() } - - return nil, false, nil + return []*relabel.Config{} } -// MockNamespaceInformerInterface is a mock implementation of k8s.NamespaceInformerInterface -type MockNamespaceInformerInterface struct { +// MockNamespaceInterface is a mock implementation of k8s.NamespaceInterface +type MockNamespaceInterface struct { IsClusterMonitoringNamespaceFunc func(name string) bool // Storage for test data MonitoringNamespaces map[string]bool } -func (m *MockNamespaceInformerInterface) SetMonitoringNamespaces(namespaces map[string]bool) { +func (m *MockNamespaceInterface) SetMonitoringNamespaces(namespaces map[string]bool) { m.MonitoringNamespaces = namespaces } // IsClusterMonitoringNamespace mocks the IsClusterMonitoringNamespace method -func (m *MockNamespaceInformerInterface) IsClusterMonitoringNamespace(name string) bool { +func (m *MockNamespaceInterface) IsClusterMonitoringNamespace(name string) bool { if m.IsClusterMonitoringNamespaceFunc != nil { return m.IsClusterMonitoringNamespaceFunc(name) } - - if m.MonitoringNamespaces != nil { - return m.MonitoringNamespaces[name] - } - - return false + return m.MonitoringNamespaces[name] } diff --git a/pkg/management/testutils/mapper_mock.go b/pkg/management/testutils/mapper_mock.go deleted file mode 100644 index 79d1aa53b..000000000 --- a/pkg/management/testutils/mapper_mock.go +++ /dev/null @@ -1,83 +0,0 @@ -package testutils - -import ( - "context" - - osmv1 "github.com/openshift/api/monitoring/v1" - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - "k8s.io/client-go/tools/cache" - - "github.com/openshift/monitoring-plugin/pkg/management/mapper" -) - -var _ mapper.Client = &MockMapperClient{} - -// MockMapperClient is a simple mock for the mapper.Client interface -type MockMapperClient struct { - GetAlertingRuleIdFunc func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId - FindAlertRuleByIdFunc func(alertRuleId mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) - WatchPrometheusRulesFunc func(ctx context.Context) - AddPrometheusRuleFunc func(pr *monitoringv1.PrometheusRule) - DeletePrometheusRuleFunc func(key cache.ObjectName) - WatchAlertRelabelConfigsFunc func(ctx context.Context) - AddAlertRelabelConfigFunc func(arc *osmv1.AlertRelabelConfig) - DeleteAlertRelabelConfigFunc func(key cache.ObjectName) - GetAlertRelabelConfigSpecFunc func(alertRule *monitoringv1.Rule) []osmv1.RelabelConfig -} - -func (m *MockMapperClient) GetAlertingRuleId(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - if m.GetAlertingRuleIdFunc != nil { - return m.GetAlertingRuleIdFunc(alertRule) - } - return mapper.PrometheusAlertRuleId("mock-id") -} - -func (m *MockMapperClient) FindAlertRuleById(alertRuleId mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - if m.FindAlertRuleByIdFunc != nil { - return m.FindAlertRuleByIdFunc(alertRuleId) - } - return nil, nil -} - -func (m *MockMapperClient) WatchPrometheusRules(ctx context.Context) { - if m.WatchPrometheusRulesFunc != nil { - m.WatchPrometheusRulesFunc(ctx) - } -} - -func (m *MockMapperClient) AddPrometheusRule(pr *monitoringv1.PrometheusRule) { - if m.AddPrometheusRuleFunc != nil { - m.AddPrometheusRuleFunc(pr) - } -} - -func (m *MockMapperClient) DeletePrometheusRule(key cache.ObjectName) { - if m.DeletePrometheusRuleFunc != nil { - m.DeletePrometheusRuleFunc(key) - } -} - -func (m *MockMapperClient) WatchAlertRelabelConfigs(ctx context.Context) { - if m.WatchAlertRelabelConfigsFunc != nil { - m.WatchAlertRelabelConfigsFunc(ctx) - } -} - -func (m *MockMapperClient) AddAlertRelabelConfig(arc *osmv1.AlertRelabelConfig) { - if m.AddAlertRelabelConfigFunc != nil { - m.AddAlertRelabelConfigFunc(arc) - } -} - -func (m *MockMapperClient) DeleteAlertRelabelConfig(key cache.ObjectName) { - if m.DeleteAlertRelabelConfigFunc != nil { - m.DeleteAlertRelabelConfigFunc(key) - } -} - -func (m *MockMapperClient) GetAlertRelabelConfigSpec(alertRule *monitoringv1.Rule) []osmv1.RelabelConfig { - if m.GetAlertRelabelConfigSpecFunc != nil { - return m.GetAlertRelabelConfigSpecFunc(alertRule) - } - return nil -} diff --git a/pkg/management/update_platform_alert_rule.go b/pkg/management/update_platform_alert_rule.go index 80248cc08..c1852b41d 100644 --- a/pkg/management/update_platform_alert_rule.go +++ b/pkg/management/update_platform_alert_rule.go @@ -11,20 +11,23 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "github.com/openshift/monitoring-plugin/pkg/management/mapper" + "github.com/openshift/monitoring-plugin/pkg/k8s" ) func (c *client) UpdatePlatformAlertRule(ctx context.Context, alertRuleId string, alertRule monitoringv1.Rule) error { - prId, err := c.mapper.FindAlertRuleById(mapper.PrometheusAlertRuleId(alertRuleId)) - if err != nil { - return err + rule, found := c.k8sClient.RelabeledRules().Get(ctx, alertRuleId) + if !found { + return &NotFoundError{Resource: "AlertRule", Id: alertRuleId} } - if !c.IsPlatformAlertRule(types.NamespacedName(*prId)) { - return errors.New("cannot update non-platform alert rule from " + prId.Namespace + "/" + prId.Name) + namespace := rule.Labels[k8s.PrometheusRuleLabelNamespace] + name := rule.Labels[k8s.PrometheusRuleLabelName] + + if !c.IsPlatformAlertRule(types.NamespacedName{Namespace: namespace, Name: name}) { + return errors.New("cannot update non-platform alert rule from " + namespace + "/" + name) } - originalRule, err := c.getOriginalPlatformRule(ctx, prId, alertRuleId) + originalRule, err := c.getOriginalPlatformRule(ctx, namespace, name, alertRuleId) if err != nil { return err } @@ -34,17 +37,17 @@ func (c *client) UpdatePlatformAlertRule(ctx context.Context, alertRuleId string return errors.New("no label changes detected; platform alert rules can only have labels updated") } - return c.applyLabelChangesViaAlertRelabelConfig(ctx, prId.Namespace, alertRuleId, originalRule.Alert, labelChanges) + return c.applyLabelChangesViaAlertRelabelConfig(ctx, namespace, alertRuleId, originalRule.Alert, labelChanges) } -func (c *client) getOriginalPlatformRule(ctx context.Context, prId *mapper.PrometheusRuleId, alertRuleId string) (*monitoringv1.Rule, error) { - pr, found, err := c.k8sClient.PrometheusRules().Get(ctx, prId.Namespace, prId.Name) +func (c *client) getOriginalPlatformRule(ctx context.Context, namespace string, name string, alertRuleId string) (*monitoringv1.Rule, error) { + pr, found, err := c.k8sClient.PrometheusRules().Get(ctx, namespace, name) if err != nil { - return nil, fmt.Errorf("failed to get PrometheusRule %s/%s: %w", prId.Namespace, prId.Name, err) + return nil, fmt.Errorf("failed to get PrometheusRule %s/%s: %w", namespace, name, err) } if !found { - return nil, &NotFoundError{Resource: "PrometheusRule", Id: fmt.Sprintf("%s/%s", prId.Namespace, prId.Name)} + return nil, &NotFoundError{Resource: "PrometheusRule", Id: fmt.Sprintf("%s/%s", namespace, name)} } for groupIdx := range pr.Spec.Groups { @@ -56,7 +59,7 @@ func (c *client) getOriginalPlatformRule(ctx context.Context, prId *mapper.Prome } } - return nil, fmt.Errorf("alert rule with id %s not found in PrometheusRule %s/%s", alertRuleId, prId.Namespace, prId.Name) + return nil, fmt.Errorf("alert rule with id %s not found in PrometheusRule %s/%s", alertRuleId, namespace, name) } type labelChange struct { @@ -99,7 +102,7 @@ func calculateLabelChanges(originalLabels, newLabels map[string]string) []labelC } func (c *client) applyLabelChangesViaAlertRelabelConfig(ctx context.Context, namespace string, alertRuleId string, alertName string, changes []labelChange) error { - arcName := fmt.Sprintf("alertmanagement-%s", strings.ToLower(strings.ReplaceAll(alertRuleId, "/", "-"))) + arcName := fmt.Sprintf("alertmanagement-%s", strings.ToLower(strings.ReplaceAll(alertRuleId, ";", "-"))) existingArc, found, err := c.k8sClient.AlertRelabelConfigs().Get(ctx, namespace, arcName) if err != nil { diff --git a/pkg/management/update_platform_alert_rule_test.go b/pkg/management/update_platform_alert_rule_test.go index 93ee1b054..6bab6b5ce 100644 --- a/pkg/management/update_platform_alert_rule_test.go +++ b/pkg/management/update_platform_alert_rule_test.go @@ -3,6 +3,7 @@ package management_test import ( "context" "errors" + "strings" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -11,398 +12,374 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" "github.com/openshift/monitoring-plugin/pkg/k8s" "github.com/openshift/monitoring-plugin/pkg/management" - "github.com/openshift/monitoring-plugin/pkg/management/mapper" "github.com/openshift/monitoring-plugin/pkg/management/testutils" ) var _ = Describe("UpdatePlatformAlertRule", func() { var ( - ctx context.Context - mockK8s *testutils.MockClient - mockPR *testutils.MockPrometheusRuleInterface - mockARC *testutils.MockAlertRelabelConfigInterface - mockMapper *testutils.MockMapperClient - client management.Client + ctx context.Context + mockK8s *testutils.MockClient + client management.Client ) - BeforeEach(func() { - ctx = context.Background() - - mockPR = &testutils.MockPrometheusRuleInterface{} - mockARC = &testutils.MockAlertRelabelConfigInterface{} - mockNSInformer := &testutils.MockNamespaceInformerInterface{} - mockNSInformer.SetMonitoringNamespaces(map[string]bool{ - "platform-namespace-1": true, - "platform-namespace-2": true, - }) - mockK8s = &testutils.MockClient{ - PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { - return mockPR + var ( + // Original platform rule as stored in PrometheusRule (without k8s labels) + originalPlatformRule = monitoringv1.Rule{ + Alert: "PlatformAlert", + Expr: intstr.FromString("node_down == 1"), + Labels: map[string]string{ + "severity": "critical", }, - AlertRelabelConfigsFunc: func() k8s.AlertRelabelConfigInterface { - return mockARC + } + originalPlatformRuleId = alertrule.GetAlertingRuleId(&originalPlatformRule) + + // Platform rule as seen by RelabeledRules (with k8s labels added) + platformRule = monitoringv1.Rule{ + Alert: "PlatformAlert", + Expr: intstr.FromString("node_down == 1"), + Labels: map[string]string{ + "severity": "critical", + k8s.PrometheusRuleLabelNamespace: "openshift-monitoring", + k8s.PrometheusRuleLabelName: "platform-rule", + k8s.AlertRuleLabelId: originalPlatformRuleId, }, - NamespaceInformerFunc: func() k8s.NamespaceInformerInterface { - return mockNSInformer + } + platformRuleId = alertrule.GetAlertingRuleId(&platformRule) + + userRule = monitoringv1.Rule{ + Alert: "UserAlert", + Labels: map[string]string{ + k8s.PrometheusRuleLabelNamespace: "user-namespace", + k8s.PrometheusRuleLabelName: "user-rule", }, } - mockMapper = &testutils.MockMapperClient{} + userRuleId = alertrule.GetAlertingRuleId(&userRule) + ) - client = management.NewWithCustomMapper(ctx, mockK8s, mockMapper) - }) + BeforeEach(func() { + ctx = context.Background() + mockK8s = &testutils.MockClient{} + client = management.New(ctx, mockK8s) - Context("when updating a platform alert rule", func() { - It("should create an AlertRelabelConfig to update labels", func() { - By("setting up the existing platform rule") - existingRule := monitoringv1.Rule{ - Alert: "PlatformAlert", - Expr: intstr.FromString("up == 0"), - Labels: map[string]string{ - "severity": "warning", - "team": "platform", + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return name == "openshift-monitoring" }, } + } + }) - prometheusRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "openshift-platform-alerts", - Namespace: "platform-namespace-1", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "platform-group", - Rules: []monitoringv1.Rule{existingRule}, - }, + Context("when rule is not found", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false }, - }, - } - - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "platform-namespace-1/openshift-platform-alerts": prometheusRule, - }) - - alertRuleId := "test-platform-rule-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "platform-namespace-1", - Name: "openshift-platform-alerts", - }, nil - } - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - if alertRule.Alert == "PlatformAlert" { - return mapper.PrometheusAlertRuleId(alertRuleId) } - return mapper.PrometheusAlertRuleId("other-id") - } - - By("updating labels through AlertRelabelConfig") - updatedRule := monitoringv1.Rule{ - Alert: "PlatformAlert", - Expr: intstr.FromString("up == 0"), - Labels: map[string]string{ - "severity": "critical", - "team": "platform", - "owner": "sre", - }, } + }) - err := client.UpdatePlatformAlertRule(ctx, alertRuleId, updatedRule) - Expect(err).ToNot(HaveOccurred()) - - By("verifying AlertRelabelConfig was created") - arcs, err := mockARC.List(ctx, "platform-namespace-1") - Expect(err).ToNot(HaveOccurred()) - Expect(arcs).To(HaveLen(1)) - - arc := arcs[0] - Expect(arc.Namespace).To(Equal("platform-namespace-1")) - Expect(arc.Name).To(Equal("alertmanagement-test-platform-rule-id")) - - By("verifying relabel configs include label updates with alertname matching") - Expect(arc.Spec.Configs).To(HaveLen(2)) + It("returns NotFoundError", func() { + updatedRule := platformRule + err := client.UpdatePlatformAlertRule(ctx, "nonexistent-id", updatedRule) + Expect(err).To(HaveOccurred()) - severityUpdate := false - ownerAdd := false - for _, config := range arc.Spec.Configs { - Expect(config.Action).To(Equal("Replace")) - Expect(config.SourceLabels).To(ContainElement(osmv1.LabelName("alertname"))) - Expect(config.Regex).To(ContainSubstring("PlatformAlert")) + var notFoundErr *management.NotFoundError + Expect(errors.As(err, ¬FoundErr)).To(BeTrue()) + Expect(notFoundErr.Resource).To(Equal("AlertRule")) + }) + }) - if config.TargetLabel == "severity" && config.Replacement == "critical" { - severityUpdate = true - Expect(config.SourceLabels).To(ContainElement(osmv1.LabelName("severity"))) - } - if config.TargetLabel == "owner" && config.Replacement == "sre" { - ownerAdd = true - Expect(config.SourceLabels).To(ContainElement(osmv1.LabelName("owner"))) + Context("when trying to update a non-platform rule", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == userRuleId { + return userRule, true + } + return monitoringv1.Rule{}, false + }, } } - Expect(severityUpdate).To(BeTrue()) - Expect(ownerAdd).To(BeTrue()) }) - It("should update existing AlertRelabelConfig when one already exists", func() { - By("setting up the existing platform rule and AlertRelabelConfig") - existingRule := monitoringv1.Rule{ - Alert: "PlatformAlert", - Expr: intstr.FromString("up == 0"), - Labels: map[string]string{ - "severity": "warning", - }, - } + It("returns an error", func() { + updatedRule := userRule + err := client.UpdatePlatformAlertRule(ctx, userRuleId, updatedRule) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("cannot update non-platform alert rule")) + }) + }) - prometheusRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "openshift-platform-alerts", - Namespace: "platform-namespace-1", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "platform-group", - Rules: []monitoringv1.Rule{existingRule}, - }, + Context("when PrometheusRule is not found", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == platformRuleId { + return platformRule, true + } + return monitoringv1.Rule{}, false }, - }, + } } - existingARC := &osmv1.AlertRelabelConfig{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-platform-rule-id-relabel", - Namespace: "platform-namespace-1", - }, - Spec: osmv1.AlertRelabelConfigSpec{ - Configs: []osmv1.RelabelConfig{ - { - SourceLabels: []osmv1.LabelName{"alertname"}, - Regex: "PlatformAlert", - Action: "Keep", - }, + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return nil, false, nil }, - }, + } } + }) - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "platform-namespace-1/openshift-platform-alerts": prometheusRule, - }) - mockARC.SetAlertRelabelConfigs(map[string]*osmv1.AlertRelabelConfig{ - "platform-namespace-1/alertmanagement-test-platform-rule-id": existingARC, - }) + It("returns NotFoundError", func() { + updatedRule := platformRule + err := client.UpdatePlatformAlertRule(ctx, platformRuleId, updatedRule) + Expect(err).To(HaveOccurred()) - alertRuleId := "test-platform-rule-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "platform-namespace-1", - Name: "openshift-platform-alerts", - }, nil - } - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - if alertRule.Alert == "PlatformAlert" { - return mapper.PrometheusAlertRuleId(alertRuleId) + var notFoundErr *management.NotFoundError + Expect(errors.As(err, ¬FoundErr)).To(BeTrue()) + Expect(notFoundErr.Resource).To(Equal("PrometheusRule")) + }) + }) + + Context("when PrometheusRule Get returns an error", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == platformRuleId { + return platformRule, true + } + return monitoringv1.Rule{}, false + }, } - return mapper.PrometheusAlertRuleId("other-id") } - By("updating labels through existing AlertRelabelConfig") - updatedRule := monitoringv1.Rule{ - Alert: "PlatformAlert", - Expr: intstr.FromString("up == 0"), - Labels: map[string]string{ - "severity": "critical", - }, + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return nil, false, errors.New("failed to get PrometheusRule") + }, + } } + }) - err := client.UpdatePlatformAlertRule(ctx, alertRuleId, updatedRule) - Expect(err).ToNot(HaveOccurred()) - - By("verifying existing AlertRelabelConfig was updated") - arc, found, err := mockARC.Get(ctx, "platform-namespace-1", "alertmanagement-test-platform-rule-id") - Expect(found).To(BeTrue()) - Expect(err).ToNot(HaveOccurred()) - Expect(arc.Spec.Configs).To(HaveLen(1)) - Expect(arc.Spec.Configs[0].Action).To(Equal("Replace")) - Expect(arc.Spec.Configs[0].SourceLabels).To(ContainElement(osmv1.LabelName("alertname"))) - Expect(arc.Spec.Configs[0].TargetLabel).To(Equal("severity")) - Expect(arc.Spec.Configs[0].Replacement).To(Equal("critical")) + It("returns the error", func() { + updatedRule := platformRule + err := client.UpdatePlatformAlertRule(ctx, platformRuleId, updatedRule) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to get PrometheusRule")) }) + }) - It("should handle label removal", func() { - By("setting up the existing platform rule with multiple labels") - existingRule := monitoringv1.Rule{ - Alert: "PlatformAlert", - Expr: intstr.FromString("up == 0"), - Labels: map[string]string{ - "severity": "warning", - "team": "platform", - "owner": "sre", - }, + Context("when no label changes are detected", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == platformRuleId { + return platformRule, true + } + return monitoringv1.Rule{}, false + }, + } } - prometheusRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "openshift-platform-alerts", - Namespace: "platform-namespace-1", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "platform-group", - Rules: []monitoringv1.Rule{existingRule}, - }, + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "test-group", + Rules: []monitoringv1.Rule{originalPlatformRule}, + }, + }, + }, + }, true, nil }, - }, + } } + }) - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "platform-namespace-1/openshift-platform-alerts": prometheusRule, - }) + It("returns an error", func() { + updatedRule := originalPlatformRule + err := client.UpdatePlatformAlertRule(ctx, platformRuleId, updatedRule) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("no label changes detected")) + }) + }) - alertRuleId := "test-platform-rule-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "platform-namespace-1", - Name: "openshift-platform-alerts", - }, nil - } - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - if alertRule.Alert == "PlatformAlert" { - return mapper.PrometheusAlertRuleId(alertRuleId) + Context("when updating platform rule labels", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == platformRuleId { + return platformRule, true + } + return monitoringv1.Rule{}, false + }, } - return mapper.PrometheusAlertRuleId("other-id") } - By("updating with fewer labels") - updatedRule := monitoringv1.Rule{ - Alert: "PlatformAlert", - Expr: intstr.FromString("up == 0"), - Labels: map[string]string{ - "severity": "warning", - }, + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "test-group", + Rules: []monitoringv1.Rule{originalPlatformRule}, + }, + }, + }, + }, true, nil + }, + } } + }) - err := client.UpdatePlatformAlertRule(ctx, alertRuleId, updatedRule) - Expect(err).ToNot(HaveOccurred()) - - By("verifying AlertRelabelConfig includes label removal actions") - arcs, err := mockARC.List(ctx, "platform-namespace-1") - Expect(err).ToNot(HaveOccurred()) - Expect(arcs).To(HaveLen(1)) + Context("when creating new AlertRelabelConfig", func() { + BeforeEach(func() { + mockK8s.AlertRelabelConfigsFunc = func() k8s.AlertRelabelConfigInterface { + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) { + return nil, false, nil + }, + CreateFunc: func(ctx context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) { + return &arc, nil + }, + } + } + }) - arc := arcs[0] - Expect(arc.Spec.Configs).To(HaveLen(2)) + It("creates AlertRelabelConfig for label changes", func() { + var createdARC *osmv1.AlertRelabelConfig - labelRemovalCount := 0 - for _, config := range arc.Spec.Configs { - if config.Replacement == "" && (config.TargetLabel == "team" || config.TargetLabel == "owner") { - labelRemovalCount++ - Expect(config.Action).To(Equal("Replace")) - Expect(config.SourceLabels).To(ContainElement(osmv1.LabelName("alertname"))) + mockK8s.AlertRelabelConfigsFunc = func() k8s.AlertRelabelConfigInterface { + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) { + return nil, false, nil + }, + CreateFunc: func(ctx context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) { + createdARC = &arc + return &arc, nil + }, + } } - } - Expect(labelRemovalCount).To(Equal(2)) - }) - It("should return error when trying to update non-platform rule", func() { - By("setting up a user-defined rule") - alertRuleId := "test-user-rule-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "user-namespace", - Name: "user-rule", - }, nil - } - - updatedRule := monitoringv1.Rule{ - Alert: "UserAlert", - Expr: intstr.FromString("up == 0"), - Labels: map[string]string{ - "severity": "critical", - }, - } + updatedRule := originalPlatformRule + updatedRule.Labels = map[string]string{ + "severity": "warning", + "new_label": "new_value", + } - err := client.UpdatePlatformAlertRule(ctx, alertRuleId, updatedRule) - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("cannot update non-platform alert rule")) + err := client.UpdatePlatformAlertRule(ctx, platformRuleId, updatedRule) + Expect(err).NotTo(HaveOccurred()) + Expect(createdARC).NotTo(BeNil()) + Expect(createdARC.Namespace).To(Equal("openshift-monitoring")) + Expect(strings.HasPrefix(createdARC.Name, "alertmanagement-")).To(BeTrue()) + Expect(createdARC.Spec.Configs).NotTo(BeEmpty()) + }) }) - It("should return error when no label changes detected", func() { - By("setting up the existing platform rule") - existingRule := monitoringv1.Rule{ - Alert: "PlatformAlert", - Expr: intstr.FromString("up == 0"), - Labels: map[string]string{ - "severity": "warning", - }, - } - - prometheusRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "openshift-platform-alerts", - Namespace: "platform-namespace-1", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "platform-group", - Rules: []monitoringv1.Rule{existingRule}, + Context("when updating existing AlertRelabelConfig", func() { + BeforeEach(func() { + mockK8s.AlertRelabelConfigsFunc = func() k8s.AlertRelabelConfigInterface { + existingARC := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "alertmanagement-existing", + Namespace: "openshift-monitoring", }, - }, - }, - } - - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "platform-namespace-1/openshift-platform-alerts": prometheusRule, + } + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) { + return existingARC, true, nil + }, + UpdateFunc: func(ctx context.Context, arc osmv1.AlertRelabelConfig) error { + return nil + }, + } + } }) - alertRuleId := "test-platform-rule-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "platform-namespace-1", - Name: "openshift-platform-alerts", - }, nil - } - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - if alertRule.Alert == "PlatformAlert" { - return mapper.PrometheusAlertRuleId(alertRuleId) + It("updates existing AlertRelabelConfig", func() { + var updatedARC *osmv1.AlertRelabelConfig + + mockK8s.AlertRelabelConfigsFunc = func() k8s.AlertRelabelConfigInterface { + existingARC := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "alertmanagement-existing", + Namespace: "openshift-monitoring", + }, + } + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) { + return existingARC, true, nil + }, + UpdateFunc: func(ctx context.Context, arc osmv1.AlertRelabelConfig) error { + updatedARC = &arc + return nil + }, + } } - return mapper.PrometheusAlertRuleId("other-id") - } - By("updating with same labels") - updatedRule := monitoringv1.Rule{ - Alert: "PlatformAlert", - Expr: intstr.FromString("up == 0"), - Labels: map[string]string{ - "severity": "warning", - }, - } + updatedRule := originalPlatformRule + updatedRule.Labels = map[string]string{ + "severity": "info", + } - err := client.UpdatePlatformAlertRule(ctx, alertRuleId, updatedRule) - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("no label changes detected")) + err := client.UpdatePlatformAlertRule(ctx, platformRuleId, updatedRule) + Expect(err).NotTo(HaveOccurred()) + Expect(updatedARC).NotTo(BeNil()) + Expect(updatedARC.Spec.Configs).NotTo(BeEmpty()) + }) }) - It("should return error when alert rule not found", func() { - By("setting up mapper to return rule ID") - alertRuleId := "non-existent-rule-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return nil, errors.New("alert rule not found") - } + Context("when dropping labels", func() { + It("creates relabel config to drop labels", func() { + var createdARC *osmv1.AlertRelabelConfig - updatedRule := monitoringv1.Rule{ - Alert: "PlatformAlert", - Expr: intstr.FromString("up == 0"), - Labels: map[string]string{ - "severity": "critical", - }, - } + mockK8s.AlertRelabelConfigsFunc = func() k8s.AlertRelabelConfigInterface { + return &testutils.MockAlertRelabelConfigInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*osmv1.AlertRelabelConfig, bool, error) { + return nil, false, nil + }, + CreateFunc: func(ctx context.Context, arc osmv1.AlertRelabelConfig) (*osmv1.AlertRelabelConfig, error) { + createdARC = &arc + return &arc, nil + }, + } + } - err := client.UpdatePlatformAlertRule(ctx, alertRuleId, updatedRule) - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("alert rule not found")) + updatedRule := originalPlatformRule + // Remove severity label (keep alertname as it's special) + updatedRule.Labels = map[string]string{} + + err := client.UpdatePlatformAlertRule(ctx, platformRuleId, updatedRule) + Expect(err).NotTo(HaveOccurred()) + Expect(createdARC).NotTo(BeNil()) + Expect(createdARC.Spec.Configs).NotTo(BeEmpty()) + }) }) }) }) diff --git a/pkg/management/update_user_defined_alert_rule.go b/pkg/management/update_user_defined_alert_rule.go index a9ac7bc8d..c29b841db 100644 --- a/pkg/management/update_user_defined_alert_rule.go +++ b/pkg/management/update_user_defined_alert_rule.go @@ -4,29 +4,32 @@ import ( "context" "fmt" + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" + "github.com/openshift/monitoring-plugin/pkg/k8s" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" "k8s.io/apimachinery/pkg/types" - - "github.com/openshift/monitoring-plugin/pkg/management/mapper" ) func (c *client) UpdateUserDefinedAlertRule(ctx context.Context, alertRuleId string, alertRule monitoringv1.Rule) error { - prId, err := c.mapper.FindAlertRuleById(mapper.PrometheusAlertRuleId(alertRuleId)) - if err != nil { - return err + rule, found := c.k8sClient.RelabeledRules().Get(ctx, alertRuleId) + if !found { + return &NotFoundError{Resource: "AlertRule", Id: alertRuleId} } - if c.IsPlatformAlertRule(types.NamespacedName(*prId)) { + namespace := rule.Labels[k8s.PrometheusRuleLabelNamespace] + name := rule.Labels[k8s.PrometheusRuleLabelName] + + if c.IsPlatformAlertRule(types.NamespacedName{Namespace: namespace, Name: name}) { return fmt.Errorf("cannot update alert rule in a platform-managed PrometheusRule") } - pr, found, err := c.k8sClient.PrometheusRules().Get(ctx, prId.Namespace, prId.Name) + pr, found, err := c.k8sClient.PrometheusRules().Get(ctx, namespace, name) if err != nil { return err } if !found { - return &NotFoundError{Resource: "PrometheusRule", Id: fmt.Sprintf("%s/%s", prId.Namespace, prId.Name)} + return &NotFoundError{Resource: "PrometheusRule", Id: fmt.Sprintf("%s/%s", namespace, name)} } updated := false @@ -45,7 +48,7 @@ func (c *client) UpdateUserDefinedAlertRule(ctx context.Context, alertRuleId str } if !updated { - return fmt.Errorf("alert rule with id %s not found in PrometheusRule %s/%s", alertRuleId, prId.Namespace, prId.Name) + return fmt.Errorf("alert rule with id %s not found in PrometheusRule %s/%s", alertRuleId, namespace, name) } err = c.k8sClient.PrometheusRules().Update(ctx, *pr) @@ -57,5 +60,5 @@ func (c *client) UpdateUserDefinedAlertRule(ctx context.Context, alertRuleId str } func (c *client) shouldUpdateRule(rule monitoringv1.Rule, alertRuleId string) bool { - return alertRuleId == string(c.mapper.GetAlertingRuleId(&rule)) + return alertRuleId == alertrule.GetAlertingRuleId(&rule) } diff --git a/pkg/management/update_user_defined_alert_rule_test.go b/pkg/management/update_user_defined_alert_rule_test.go index 2380381b5..bce2fd8ce 100644 --- a/pkg/management/update_user_defined_alert_rule_test.go +++ b/pkg/management/update_user_defined_alert_rule_test.go @@ -2,6 +2,8 @@ package management_test import ( "context" + "errors" + "fmt" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -9,250 +11,417 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + alertrule "github.com/openshift/monitoring-plugin/pkg/alert_rule" "github.com/openshift/monitoring-plugin/pkg/k8s" "github.com/openshift/monitoring-plugin/pkg/management" - "github.com/openshift/monitoring-plugin/pkg/management/mapper" "github.com/openshift/monitoring-plugin/pkg/management/testutils" ) var _ = Describe("UpdateUserDefinedAlertRule", func() { var ( - ctx context.Context - mockK8s *testutils.MockClient - mockPR *testutils.MockPrometheusRuleInterface - mockMapper *testutils.MockMapperClient - client management.Client + ctx context.Context + mockK8s *testutils.MockClient + client management.Client ) - BeforeEach(func() { - ctx = context.Background() - - mockPR = &testutils.MockPrometheusRuleInterface{} - mockNSInformer := &testutils.MockNamespaceInformerInterface{} - mockNSInformer.SetMonitoringNamespaces(map[string]bool{ - "platform-namespace-1": true, - "platform-namespace-2": true, - }) - mockK8s = &testutils.MockClient{ - PrometheusRulesFunc: func() k8s.PrometheusRuleInterface { - return mockPR + var ( + // Original user rule as stored in PrometheusRule (without k8s labels) + originalUserRule = monitoringv1.Rule{ + Alert: "UserAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", }, - NamespaceInformerFunc: func() k8s.NamespaceInformerInterface { - return mockNSInformer + } + originalUserRuleId = alertrule.GetAlertingRuleId(&originalUserRule) + + // User rule as seen by RelabeledRules (with k8s labels added) + userRule = monitoringv1.Rule{ + Alert: "UserAlert", + Expr: intstr.FromString("up == 0"), + Labels: map[string]string{ + "severity": "warning", + k8s.PrometheusRuleLabelNamespace: "user-namespace", + k8s.PrometheusRuleLabelName: "user-rule", }, } - mockMapper = &testutils.MockMapperClient{} + userRuleId = originalUserRuleId - client = management.NewWithCustomMapper(ctx, mockK8s, mockMapper) - }) + platformRule = monitoringv1.Rule{ + Alert: "PlatformAlert", + Labels: map[string]string{ + k8s.PrometheusRuleLabelNamespace: "openshift-monitoring", + k8s.PrometheusRuleLabelName: "platform-rule", + }, + } + platformRuleId = alertrule.GetAlertingRuleId(&platformRule) + ) - Context("when updating a user-defined alert rule", func() { - It("should successfully update an existing alert rule", func() { - By("setting up the existing rule") - existingRule := monitoringv1.Rule{ - Alert: "OldAlert", - Expr: intstr.FromString("up == 0"), - } + BeforeEach(func() { + ctx = context.Background() + mockK8s = &testutils.MockClient{} + client = management.New(ctx, mockK8s) - prometheusRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "user-rule", - Namespace: "user-namespace", + mockK8s.NamespaceFunc = func() k8s.NamespaceInterface { + return &testutils.MockNamespaceInterface{ + IsClusterMonitoringNamespaceFunc: func(name string) bool { + return name == "openshift-monitoring" }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "test-group", - Rules: []monitoringv1.Rule{existingRule}, - }, + } + } + }) + + Context("when rule is not found", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + return monitoringv1.Rule{}, false }, - }, + } } + }) + + It("returns NotFoundError", func() { + updatedRule := userRule + err := client.UpdateUserDefinedAlertRule(ctx, "nonexistent-id", updatedRule) + Expect(err).To(HaveOccurred()) - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "user-namespace/user-rule": prometheusRule, - }) + var notFoundErr *management.NotFoundError + Expect(errors.As(err, ¬FoundErr)).To(BeTrue()) + Expect(notFoundErr.Resource).To(Equal("AlertRule")) + }) + }) - alertRuleId := "test-rule-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "user-namespace", - Name: "user-rule", - }, nil + Context("when trying to update a platform rule", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == platformRuleId { + return platformRule, true + } + return monitoringv1.Rule{}, false + }, + } } - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - if alertRule.Alert == "OldAlert" { - return mapper.PrometheusAlertRuleId(alertRuleId) + }) + + It("returns an error", func() { + updatedRule := platformRule + err := client.UpdateUserDefinedAlertRule(ctx, platformRuleId, updatedRule) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("cannot update alert rule in a platform-managed PrometheusRule")) + }) + }) + + Context("when PrometheusRule is not found", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == userRuleId { + return userRule, true + } + return monitoringv1.Rule{}, false + }, } - return mapper.PrometheusAlertRuleId("other-id") } - By("updating with new values") - updatedRule := monitoringv1.Rule{ - Alert: "UpdatedAlert", - Expr: intstr.FromString("up == 1"), - Annotations: map[string]string{ - "summary": "Updated summary", - }, + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return nil, false, nil + }, + } } + }) - err := client.UpdateUserDefinedAlertRule(ctx, alertRuleId, updatedRule) - Expect(err).ToNot(HaveOccurred()) + It("returns NotFoundError", func() { + updatedRule := userRule + err := client.UpdateUserDefinedAlertRule(ctx, userRuleId, updatedRule) + Expect(err).To(HaveOccurred()) - By("verifying the update succeeded") - updatedPR, found, err := mockPR.Get(ctx, "user-namespace", "user-rule") - Expect(found).To(BeTrue()) - Expect(err).ToNot(HaveOccurred()) - Expect(updatedPR.Spec.Groups).To(HaveLen(1)) - Expect(updatedPR.Spec.Groups[0].Rules).To(HaveLen(1)) - Expect(updatedPR.Spec.Groups[0].Rules[0].Alert).To(Equal("UpdatedAlert")) - Expect(updatedPR.Spec.Groups[0].Rules[0].Expr.String()).To(Equal("up == 1")) - Expect(updatedPR.Spec.Groups[0].Rules[0].Annotations["summary"]).To(Equal("Updated summary")) + var notFoundErr *management.NotFoundError + Expect(errors.As(err, ¬FoundErr)).To(BeTrue()) + Expect(notFoundErr.Resource).To(Equal("PrometheusRule")) }) + }) - It("should update the correct rule when multiple rules exist", func() { - By("setting up multiple rules across different groups") - rule1 := monitoringv1.Rule{ - Alert: "Alert1", - Expr: intstr.FromString("up == 0"), + Context("when PrometheusRule Get returns an error", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == userRuleId { + return userRule, true + } + return monitoringv1.Rule{}, false + }, + } } - rule2 := monitoringv1.Rule{ - Alert: "Alert2", - Expr: intstr.FromString("cpu_usage > 80"), + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return nil, false, errors.New("failed to get PrometheusRule") + }, + } } + }) + + It("returns the error", func() { + updatedRule := userRule + err := client.UpdateUserDefinedAlertRule(ctx, userRuleId, updatedRule) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to get PrometheusRule")) + }) + }) - rule3 := monitoringv1.Rule{ - Alert: "Alert3", - Expr: intstr.FromString("memory_usage > 90"), + Context("when rule is not found in PrometheusRule", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == userRuleId { + return userRule, true + } + return monitoringv1.Rule{}, false + }, + } } - prometheusRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "multi-rule", - Namespace: "user-namespace", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "group1", - Rules: []monitoringv1.Rule{rule1, rule2}, - }, - { - Name: "group2", - Rules: []monitoringv1.Rule{rule3}, - }, + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + // Return PrometheusRule but without the rule we're looking for + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "test-group", + Rules: []monitoringv1.Rule{}, + }, + }, + }, + }, true, nil }, - }, + } } + }) - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "user-namespace/multi-rule": prometheusRule, - }) + It("returns an error", func() { + updatedRule := userRule + err := client.UpdateUserDefinedAlertRule(ctx, userRuleId, updatedRule) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring(fmt.Sprintf("alert rule with id %s not found", userRuleId))) + }) + }) - alertRuleId := "alert2-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "user-namespace", - Name: "multi-rule", - }, nil - } - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - if alertRule.Alert == "Alert2" { - return mapper.PrometheusAlertRuleId(alertRuleId) + Context("when PrometheusRule Update fails", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == userRuleId { + return userRule, true + } + return monitoringv1.Rule{}, false + }, } - return mapper.PrometheusAlertRuleId("other-id") } - By("updating only the second rule") - updatedRule := monitoringv1.Rule{ - Alert: "Alert2Updated", - Expr: intstr.FromString("cpu_usage > 90"), + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "test-group", + Rules: []monitoringv1.Rule{originalUserRule}, + }, + }, + }, + }, true, nil + }, + UpdateFunc: func(ctx context.Context, pr monitoringv1.PrometheusRule) error { + return errors.New("failed to update PrometheusRule") + }, + } } + }) - err := client.UpdateUserDefinedAlertRule(ctx, alertRuleId, updatedRule) - Expect(err).ToNot(HaveOccurred()) - - By("verifying only the targeted rule was updated") - updatedPR, found, err := mockPR.Get(ctx, "user-namespace", "multi-rule") - Expect(found).To(BeTrue()) - Expect(err).ToNot(HaveOccurred()) - Expect(updatedPR.Spec.Groups).To(HaveLen(2)) - - Expect(updatedPR.Spec.Groups[0].Rules).To(HaveLen(2)) - Expect(updatedPR.Spec.Groups[0].Rules[0].Alert).To(Equal("Alert1")) - Expect(updatedPR.Spec.Groups[0].Rules[1].Alert).To(Equal("Alert2Updated")) - Expect(updatedPR.Spec.Groups[0].Rules[1].Expr.String()).To(Equal("cpu_usage > 90")) - - Expect(updatedPR.Spec.Groups[1].Rules).To(HaveLen(1)) - Expect(updatedPR.Spec.Groups[1].Rules[0].Alert).To(Equal("Alert3")) + It("returns the error", func() { + updatedRule := originalUserRule + err := client.UpdateUserDefinedAlertRule(ctx, userRuleId, updatedRule) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("failed to update PrometheusRule")) }) + }) - It("should return error when alert rule ID is not found", func() { - existingRule := monitoringv1.Rule{ - Alert: "ExistingAlert", - Expr: intstr.FromString("up == 0"), + Context("when successfully updating a rule", func() { + BeforeEach(func() { + mockK8s.RelabeledRulesFunc = func() k8s.RelabeledRulesInterface { + return &testutils.MockRelabeledRulesInterface{ + GetFunc: func(ctx context.Context, id string) (monitoringv1.Rule, bool) { + if id == userRuleId { + return userRule, true + } + return monitoringv1.Rule{}, false + }, + } } + }) - prometheusRule := &monitoringv1.PrometheusRule{ - ObjectMeta: metav1.ObjectMeta{ - Name: "user-rule", - Namespace: "user-namespace", - }, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: []monitoringv1.RuleGroup{ - { - Name: "test-group", - Rules: []monitoringv1.Rule{existingRule}, - }, + It("updates the rule in the PrometheusRule", func() { + var updatedPR *monitoringv1.PrometheusRule + + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "test-group", + Rules: []monitoringv1.Rule{originalUserRule}, + }, + }, + }, + }, true, nil }, - }, + UpdateFunc: func(ctx context.Context, pr monitoringv1.PrometheusRule) error { + updatedPR = &pr + return nil + }, + } } - mockPR.SetPrometheusRules(map[string]*monitoringv1.PrometheusRule{ - "user-namespace/user-rule": prometheusRule, - }) - - alertRuleId := "non-existent-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "user-namespace", - Name: "user-rule", - }, nil + updatedRule := originalUserRule + // Create a deep copy of the Labels map to avoid modifying the original + updatedRule.Labels = make(map[string]string) + for k, v := range originalUserRule.Labels { + updatedRule.Labels[k] = v } - mockMapper.GetAlertingRuleIdFunc = func(alertRule *monitoringv1.Rule) mapper.PrometheusAlertRuleId { - return mapper.PrometheusAlertRuleId("different-id") + updatedRule.Labels["severity"] = "critical" + updatedRule.Expr = intstr.FromString("up == 1") + + err := client.UpdateUserDefinedAlertRule(ctx, userRuleId, updatedRule) + Expect(err).NotTo(HaveOccurred()) + Expect(updatedPR).NotTo(BeNil()) + Expect(updatedPR.Spec.Groups[0].Rules[0].Labels["severity"]).To(Equal("critical")) + Expect(updatedPR.Spec.Groups[0].Rules[0].Expr.String()).To(Equal("up == 1")) + }) + + It("updates only the matching rule when multiple rules exist", func() { + anotherRule := monitoringv1.Rule{ + Alert: "AnotherAlert", + Expr: intstr.FromString("down == 1"), } - updatedRule := monitoringv1.Rule{ - Alert: "UpdatedAlert", - Expr: intstr.FromString("up == 1"), + var updatedPR *monitoringv1.PrometheusRule + + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "test-group", + Rules: []monitoringv1.Rule{originalUserRule, anotherRule}, + }, + }, + }, + }, true, nil + }, + UpdateFunc: func(ctx context.Context, pr monitoringv1.PrometheusRule) error { + updatedPR = &pr + return nil + }, + } } - err := client.UpdateUserDefinedAlertRule(ctx, alertRuleId, updatedRule) + updatedRule := originalUserRule + // Create a deep copy of the Labels map to avoid modifying the original + updatedRule.Labels = make(map[string]string) + for k, v := range originalUserRule.Labels { + updatedRule.Labels[k] = v + } + updatedRule.Labels["severity"] = "info" - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("not found")) + err := client.UpdateUserDefinedAlertRule(ctx, userRuleId, updatedRule) + Expect(err).NotTo(HaveOccurred()) + Expect(updatedPR).NotTo(BeNil()) + Expect(updatedPR.Spec.Groups[0].Rules).To(HaveLen(2)) + Expect(updatedPR.Spec.Groups[0].Rules[0].Labels["severity"]).To(Equal("info")) + Expect(updatedPR.Spec.Groups[0].Rules[1].Alert).To(Equal("AnotherAlert")) }) - It("should return error when trying to update a platform-managed alert rule", func() { - alertRuleId := "platform-rule-id" - mockMapper.FindAlertRuleByIdFunc = func(id mapper.PrometheusAlertRuleId) (*mapper.PrometheusRuleId, error) { - return &mapper.PrometheusRuleId{ - Namespace: "platform-namespace-1", - Name: "openshift-platform-rules", - }, nil + It("updates rule in the correct group when multiple groups exist", func() { + var updatedPR *monitoringv1.PrometheusRule + + mockK8s.PrometheusRulesFunc = func() k8s.PrometheusRuleInterface { + return &testutils.MockPrometheusRuleInterface{ + GetFunc: func(ctx context.Context, namespace string, name string) (*monitoringv1.PrometheusRule, bool, error) { + return &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: name, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "group1", + Rules: []monitoringv1.Rule{}, + }, + { + Name: "group2", + Rules: []monitoringv1.Rule{originalUserRule}, + }, + }, + }, + }, true, nil + }, + UpdateFunc: func(ctx context.Context, pr monitoringv1.PrometheusRule) error { + updatedPR = &pr + return nil + }, + } } - updatedRule := monitoringv1.Rule{ - Alert: "UpdatedAlert", - Expr: intstr.FromString("up == 1"), + updatedRule := originalUserRule + // Create a deep copy of the Labels map to avoid modifying the original + updatedRule.Labels = make(map[string]string) + for k, v := range originalUserRule.Labels { + updatedRule.Labels[k] = v } + updatedRule.Labels["new_label"] = "new_value" - err := client.UpdateUserDefinedAlertRule(ctx, alertRuleId, updatedRule) - - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("platform-managed")) + err := client.UpdateUserDefinedAlertRule(ctx, userRuleId, updatedRule) + Expect(err).NotTo(HaveOccurred()) + Expect(updatedPR).NotTo(BeNil()) + Expect(updatedPR.Spec.Groups).To(HaveLen(2)) + Expect(updatedPR.Spec.Groups[0].Rules).To(HaveLen(0)) + Expect(updatedPR.Spec.Groups[1].Rules).To(HaveLen(1)) + Expect(updatedPR.Spec.Groups[1].Rules[0].Labels["new_label"]).To(Equal("new_value")) }) }) }) diff --git a/pkg/server.go b/pkg/server.go index 271ac4003..129d800e3 100644 --- a/pkg/server.go +++ b/pkg/server.go @@ -61,11 +61,11 @@ type PluginConfig struct { type Feature string const ( - AcmAlerting Feature = "acm-alerting" - Incidents Feature = "incidents" - DevConfig Feature = "dev-config" - PersesDashboards Feature = "perses-dashboards" - ManagementAPI Feature = "management-api" + AcmAlerting Feature = "acm-alerting" + Incidents Feature = "incidents" + DevConfig Feature = "dev-config" + PersesDashboards Feature = "perses-dashboards" + AlertManagementAPI Feature = "alert-management-api" ) func (pluginConfig *PluginConfig) MarshalJSON() ([]byte, error) { @@ -109,7 +109,7 @@ func (s *PluginServer) Shutdown(ctx context.Context) error { func createHTTPServer(ctx context.Context, cfg *Config) (*http.Server, error) { acmMode := cfg.Features[AcmAlerting] - managementMode := cfg.Features[ManagementAPI] + alertManagementAPIMode := cfg.Features[AlertManagementAPI] acmLocationsLength := len(cfg.AlertmanagerUrl) + len(cfg.ThanosQuerierUrl) @@ -135,7 +135,7 @@ func createHTTPServer(ctx context.Context, cfg *Config) (*http.Server, error) { // Comment the following line for local development: var k8sclient *dynamic.DynamicClient - if acmMode || managementMode { + if acmMode || alertManagementAPIMode { k8sconfig, err = rest.InClusterConfig() if err != nil { return nil, fmt.Errorf("cannot get in cluster config: %w", err) @@ -151,18 +151,18 @@ func createHTTPServer(ctx context.Context, cfg *Config) (*http.Server, error) { // Initialize management client if management API feature is enabled var managementClient management.Client - if managementMode { + if alertManagementAPIMode { k8sClient, err := k8s.NewClient(ctx, k8sconfig) if err != nil { - return nil, fmt.Errorf("failed to create k8s client for management API: %w", err) + return nil, fmt.Errorf("failed to create k8s client for alert management API: %w", err) } if err := k8sClient.TestConnection(ctx); err != nil { - return nil, fmt.Errorf("failed to connect to kubernetes cluster for management API: %w", err) + return nil, fmt.Errorf("failed to connect to kubernetes cluster for alert management API: %w", err) } managementClient = management.New(ctx, k8sClient) - log.Info("Management API enabled") + log.Info("alert management API enabled") } router, pluginConfig := setupRoutes(cfg, managementClient) diff --git a/test/e2e/alert_management_api_test.go b/test/e2e/alert_management_api_test.go new file mode 100644 index 000000000..0e5091393 --- /dev/null +++ b/test/e2e/alert_management_api_test.go @@ -0,0 +1,334 @@ +package e2e + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "testing" + "time" + + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "gopkg.in/yaml.v2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/wait" + + "github.com/openshift/monitoring-plugin/internal/managementrouter" + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/test/e2e/framework" +) + +func TestBulkDeleteUserDefinedAlertRules(t *testing.T) { + f, err := framework.New() + if err != nil { + t.Fatalf("Failed to create framework: %v", err) + } + + ctx := context.Background() + + testNamespace, cleanup, err := f.CreateNamespace(ctx, "test-bulk-delete", false) + if err != nil { + t.Fatalf("Failed to create test namespace: %v", err) + } + defer cleanup() + + forDuration := monitoringv1.Duration("5m") + + testRule1 := monitoringv1.Rule{ + Alert: "TestBulkDeleteAlert1", + Expr: intstr.FromString("up == 0"), + For: &forDuration, + Labels: map[string]string{ + "severity": "warning", + }, + Annotations: map[string]string{ + "description": "Test alert 1 for bulk delete testing", + }, + } + + testRule2 := monitoringv1.Rule{ + Alert: "TestBulkDeleteAlert2", + Expr: intstr.FromString("up == 1"), + For: &forDuration, + Labels: map[string]string{ + "severity": "info", + }, + Annotations: map[string]string{ + "description": "Test alert 2 for bulk delete testing", + }, + } + + testRule3 := monitoringv1.Rule{ + Alert: "TestBulkDeleteAlert3", + Expr: intstr.FromString("up == 2"), + For: &forDuration, + Labels: map[string]string{ + "severity": "critical", + }, + Annotations: map[string]string{ + "description": "Test alert 3 for bulk delete testing", + }, + } + + _, err = createPrometheusRule(ctx, f, testNamespace, testRule1, testRule2, testRule3) + if err != nil { + t.Fatalf("Failed to create PrometheusRule: %v", err) + } + + var ruleIdsToDelete []string + err = wait.PollUntilContextTimeout(ctx, 2*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { + cm, err := f.Clientset.CoreV1().ConfigMaps(k8s.ClusterMonitoringNamespace).Get( + ctx, + k8s.RelabeledRulesConfigMapName, + metav1.GetOptions{}, + ) + if err != nil { + t.Logf("Failed to get ConfigMap: %v", err) + return false, nil + } + + configData, ok := cm.Data[k8s.RelabeledRulesConfigMapKey] + if !ok { + t.Logf("ConfigMap has no %s key", k8s.RelabeledRulesConfigMapKey) + return false, nil + } + + var rules map[string]monitoringv1.Rule + if err := yaml.Unmarshal([]byte(configData), &rules); err != nil { + t.Logf("Failed to unmarshal config data: %v", err) + return false, nil + } + + foundRuleIds := []string{} + for ruleId, rule := range rules { + if rule.Alert == "TestBulkDeleteAlert1" || rule.Alert == "TestBulkDeleteAlert2" { + foundRuleIds = append(foundRuleIds, ruleId) + } + } + + if len(foundRuleIds) == 2 { + ruleIdsToDelete = foundRuleIds + t.Logf("Found rule IDs to delete: %v", ruleIdsToDelete) + return true, nil + } + + t.Logf("Found %d/2 test alerts in ConfigMap", len(foundRuleIds)) + return false, nil + }) + + if err != nil { + t.Fatalf("Timeout waiting for alerts to appear in ConfigMap: %v", err) + } + + reqBody := managementrouter.BulkDeleteUserDefinedAlertRulesRequest{ + RuleIds: ruleIdsToDelete, + } + + reqJSON, err := json.Marshal(reqBody) + if err != nil { + t.Fatalf("Failed to marshal request body: %v", err) + } + + bulkDeleteURL := fmt.Sprintf("%s/api/v1/alerting/rules", f.PluginURL) + req, err := http.NewRequestWithContext(ctx, http.MethodDelete, bulkDeleteURL, bytes.NewBuffer(reqJSON)) + if err != nil { + t.Fatalf("Failed to create HTTP request: %v", err) + } + req.Header.Set("Content-Type", "application/json") + + client := &http.Client{Timeout: 10 * time.Second} + resp, err := client.Do(req) + if err != nil { + t.Fatalf("Failed to make bulk delete request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + t.Fatalf("Expected status code %d, got %d. Response body: %s", http.StatusOK, resp.StatusCode, string(body)) + } + + var bulkDeleteResp managementrouter.BulkDeleteUserDefinedAlertRulesResponse + if err := json.NewDecoder(resp.Body).Decode(&bulkDeleteResp); err != nil { + t.Fatalf("Failed to decode response: %v", err) + } + + if len(bulkDeleteResp.Rules) != 2 { + t.Fatalf("Expected 2 rules in response, got %d", len(bulkDeleteResp.Rules)) + } + + for _, result := range bulkDeleteResp.Rules { + if result.StatusCode != http.StatusNoContent { + t.Errorf("Rule %s deletion failed with status %d: %s", result.Id, result.StatusCode, result.Message) + } else { + t.Logf("Rule %s deleted successfully", result.Id) + } + } + + promRule, err := f.Monitoringv1clientset.MonitoringV1().PrometheusRules(testNamespace).Get( + ctx, + "test-prometheus-rule", + metav1.GetOptions{}, + ) + if err != nil { + t.Fatalf("Failed to get PrometheusRule after deletion: %v", err) + } + + if len(promRule.Spec.Groups) != 1 { + t.Fatalf("Expected 1 rule group, got %d", len(promRule.Spec.Groups)) + } + + ruleGroup := promRule.Spec.Groups[0] + if len(ruleGroup.Rules) != 1 { + t.Fatalf("Expected 1 rule remaining, got %d: %+v", len(ruleGroup.Rules), ruleGroup.Rules) + } + + remainingRule := ruleGroup.Rules[0] + if remainingRule.Alert != "TestBulkDeleteAlert3" { + t.Errorf("Expected remaining rule to be TestBulkDeleteAlert3, got %s", remainingRule.Alert) + } + + if remainingRule.Labels["severity"] != "critical" { + t.Errorf("Expected severity=critical, got %s", remainingRule.Labels["severity"]) + } + + t.Log("Bulk delete test completed successfully - only TestBulkDeleteAlert3 remains") +} + +func TestDeleteUserDefinedAlertRuleById(t *testing.T) { + f, err := framework.New() + if err != nil { + t.Fatalf("Failed to create framework: %v", err) + } + + ctx := context.Background() + + testNamespace, cleanup, err := f.CreateNamespace(ctx, "test-delete-by-id", false) + if err != nil { + t.Fatalf("Failed to create test namespace: %v", err) + } + defer cleanup() + + forDuration := monitoringv1.Duration("5m") + + testRule1 := monitoringv1.Rule{ + Alert: "TestDeleteByIdAlert1", + Expr: intstr.FromString("up == 0"), + For: &forDuration, + Labels: map[string]string{ + "severity": "warning", + }, + Annotations: map[string]string{ + "description": "Test alert 1 for delete by id testing", + }, + } + + testRule2 := monitoringv1.Rule{ + Alert: "TestDeleteByIdAlert2", + Expr: intstr.FromString("up == 1"), + For: &forDuration, + Labels: map[string]string{ + "severity": "info", + }, + Annotations: map[string]string{ + "description": "Test alert 2 for delete by id testing", + }, + } + + _, err = createPrometheusRule(ctx, f, testNamespace, testRule1, testRule2) + if err != nil { + t.Fatalf("Failed to create PrometheusRule: %v", err) + } + + var ruleIdToDelete string + err = wait.PollUntilContextTimeout(ctx, 2*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { + cm, err := f.Clientset.CoreV1().ConfigMaps(k8s.ClusterMonitoringNamespace).Get( + ctx, + k8s.RelabeledRulesConfigMapName, + metav1.GetOptions{}, + ) + if err != nil { + t.Logf("Failed to get ConfigMap: %v", err) + return false, nil + } + + configData, ok := cm.Data[k8s.RelabeledRulesConfigMapKey] + if !ok { + t.Logf("ConfigMap has no %s key", k8s.RelabeledRulesConfigMapKey) + return false, nil + } + + var rules map[string]monitoringv1.Rule + if err := yaml.Unmarshal([]byte(configData), &rules); err != nil { + t.Logf("Failed to unmarshal config data: %v", err) + return false, nil + } + + for ruleId, rule := range rules { + if rule.Alert == "TestDeleteByIdAlert1" { + ruleIdToDelete = ruleId + t.Logf("Found rule ID to delete: %s", ruleIdToDelete) + return true, nil + } + } + + t.Logf("Test alert not found yet in ConfigMap") + return false, nil + }) + + if err != nil { + t.Fatalf("Timeout waiting for alerts to appear in ConfigMap: %v", err) + } + + deleteURL := fmt.Sprintf("%s/api/v1/alerting/rules/%s", f.PluginURL, ruleIdToDelete) + req, err := http.NewRequestWithContext(ctx, http.MethodDelete, deleteURL, nil) + if err != nil { + t.Fatalf("Failed to create HTTP request: %v", err) + } + + client := &http.Client{Timeout: 10 * time.Second} + resp, err := client.Do(req) + if err != nil { + t.Fatalf("Failed to make delete request: %v", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusNoContent { + body, _ := io.ReadAll(resp.Body) + t.Fatalf("Expected status code %d, got %d. Response body: %s", http.StatusNoContent, resp.StatusCode, string(body)) + } + + t.Logf("Rule %s deleted successfully", ruleIdToDelete) + + promRule, err := f.Monitoringv1clientset.MonitoringV1().PrometheusRules(testNamespace).Get( + ctx, + "test-prometheus-rule", + metav1.GetOptions{}, + ) + if err != nil { + t.Fatalf("Failed to get PrometheusRule after deletion: %v", err) + } + + if len(promRule.Spec.Groups) != 1 { + t.Fatalf("Expected 1 rule group, got %d", len(promRule.Spec.Groups)) + } + + ruleGroup := promRule.Spec.Groups[0] + if len(ruleGroup.Rules) != 1 { + t.Fatalf("Expected 1 rule remaining, got %d: %+v", len(ruleGroup.Rules), ruleGroup.Rules) + } + + remainingRule := ruleGroup.Rules[0] + if remainingRule.Alert != "TestDeleteByIdAlert2" { + t.Errorf("Expected remaining rule to be TestDeleteByIdAlert2, got %s", remainingRule.Alert) + } + + if remainingRule.Labels["severity"] != "info" { + t.Errorf("Expected severity=info, got %s", remainingRule.Labels["severity"]) + } + + t.Log("Delete by ID test completed successfully - only TestDeleteByIdAlert2 remains") +} diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go new file mode 100644 index 000000000..1adb98742 --- /dev/null +++ b/test/e2e/framework/framework.go @@ -0,0 +1,95 @@ +package framework + +import ( + "context" + "fmt" + "os" + "strconv" + "time" + + osmv1client "github.com/openshift/client-go/monitoring/clientset/versioned" + "github.com/openshift/monitoring-plugin/pkg/k8s" + monitoringv1client "github.com/prometheus-operator/prometheus-operator/pkg/client/versioned" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" +) + +var f *Framework + +type Framework struct { + Clientset *kubernetes.Clientset + Monitoringv1clientset *monitoringv1client.Clientset + Osmv1clientset *osmv1client.Clientset + + PluginURL string +} + +type CleanupFunc func() error + +func New() (*Framework, error) { + if f != nil { + return f, nil + } + + kubeConfigPath := os.Getenv("KUBECONFIG") + if kubeConfigPath == "" { + return nil, fmt.Errorf("KUBECONFIG environment variable not set") + } + + pluginURL := os.Getenv("PLUGIN_URL") + if pluginURL == "" { + return nil, fmt.Errorf("PLUGIN_URL environment variable not set, skipping management API e2e test") + } + + config, err := clientcmd.BuildConfigFromFlags("", kubeConfigPath) + if err != nil { + return nil, fmt.Errorf("failed to build config: %w", err) + } + + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create clientset: %w", err) + } + + monitoringv1clientset, err := monitoringv1client.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create monitoringv1 clientset: %w", err) + } + + osmv1clientset, err := osmv1client.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to create osmv1 clientset: %w", err) + } + + f = &Framework{ + Clientset: clientset, + Monitoringv1clientset: monitoringv1clientset, + Osmv1clientset: osmv1clientset, + PluginURL: pluginURL, + } + + return f, nil +} + +func (f *Framework) CreateNamespace(ctx context.Context, name string, isClusterMonitoringNamespace bool) (string, CleanupFunc, error) { + testNamespace := fmt.Sprintf("%s-%d", name, time.Now().Unix()) + namespace := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: testNamespace, + Labels: map[string]string{ + k8s.ClusterMonitoringLabel: strconv.FormatBool(isClusterMonitoringNamespace), + }, + }, + } + + _, err := f.Clientset.CoreV1().Namespaces().Create(ctx, namespace, metav1.CreateOptions{}) + if err != nil { + return "", nil, fmt.Errorf("failed to create test namespace: %w", err) + } + + return testNamespace, func() error { + return f.Clientset.CoreV1().Namespaces().Delete(ctx, testNamespace, metav1.DeleteOptions{}) + }, nil +} diff --git a/test/e2e/relabeled_rules_test.go b/test/e2e/relabeled_rules_test.go new file mode 100644 index 000000000..e62c168dd --- /dev/null +++ b/test/e2e/relabeled_rules_test.go @@ -0,0 +1,318 @@ +package e2e + +import ( + "context" + "fmt" + "testing" + "time" + + osmv1 "github.com/openshift/api/monitoring/v1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "gopkg.in/yaml.v2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/wait" + + "github.com/openshift/monitoring-plugin/pkg/k8s" + "github.com/openshift/monitoring-plugin/test/e2e/framework" +) + +func TestRelabeledRulesConfigMapExists(t *testing.T) { + f, err := framework.New() + if err != nil { + t.Fatalf("Failed to create framework: %v", err) + } + + ctx := context.Background() + + cm, err := f.Clientset.CoreV1().ConfigMaps(k8s.ClusterMonitoringNamespace).Get( + ctx, + k8s.RelabeledRulesConfigMapName, + metav1.GetOptions{}, + ) + if err != nil { + t.Fatalf("Failed to get ConfigMap %s/%s: %v", k8s.ClusterMonitoringNamespace, k8s.RelabeledRulesConfigMapName, err) + } + + if cm.Labels == nil { + t.Fatal("ConfigMap has no labels") + } + + if cm.Labels[k8s.AppKubernetesIoManagedBy] != k8s.AppKubernetesIoComponentMonitoringPlugin { + t.Errorf("ConfigMap has wrong managed-by label. Expected %s, got %s", k8s.AppKubernetesIoComponentMonitoringPlugin, cm.Labels[k8s.AppKubernetesIoManagedBy]) + } + + if cm.Labels[k8s.AppKubernetesIoComponent] != k8s.AppKubernetesIoComponentAlertManagementApi { + t.Errorf("ConfigMap has wrong component label. Expected %s, got %s", k8s.AppKubernetesIoComponentAlertManagementApi, cm.Labels[k8s.AppKubernetesIoComponent]) + } +} + +func TestPrometheusRuleAppearsInConfigMap(t *testing.T) { + f, err := framework.New() + if err != nil { + t.Fatalf("Failed to create framework: %v", err) + } + + ctx := context.Background() + + testNamespace, cleanup, err := f.CreateNamespace(ctx, "test-prometheus-rule", false) + if err != nil { + t.Fatalf("Failed to create test namespace: %v", err) + } + defer cleanup() + + testAlertName := "TestAlert" + forDuration := monitoringv1.Duration("5m") + testRule := monitoringv1.Rule{ + Alert: testAlertName, + Expr: intstr.FromString("up == 0"), + For: &forDuration, + Labels: map[string]string{ + "severity": "warning", + }, + Annotations: map[string]string{ + "description": "Test alert for e2e testing", + "summary": "Test alert", + }, + } + + _, err = createPrometheusRule(ctx, f, testNamespace, testRule) + if err != nil { + t.Fatalf("Failed to create PrometheusRule: %v", err) + } + + err = wait.PollUntilContextTimeout(ctx, 2*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { + cm, err := f.Clientset.CoreV1().ConfigMaps(k8s.ClusterMonitoringNamespace).Get( + ctx, + k8s.RelabeledRulesConfigMapName, + metav1.GetOptions{}, + ) + if err != nil { + t.Logf("Failed to get ConfigMap: %v", err) + return false, nil + } + + configData, ok := cm.Data[k8s.RelabeledRulesConfigMapKey] + if !ok { + t.Logf("ConfigMap has no %s key", k8s.RelabeledRulesConfigMapKey) + return false, nil + } + + var rules map[string]monitoringv1.Rule + if err := yaml.Unmarshal([]byte(configData), &rules); err != nil { + t.Logf("Failed to unmarshal config data: %v", err) + return false, nil + } + + for _, rule := range rules { + if rule.Alert == testAlertName { + expectedLabels := map[string]string{ + k8s.PrometheusRuleLabelNamespace: testNamespace, + k8s.PrometheusRuleLabelName: "test-prometheus-rule", + } + + if err := compareRuleLabels(t, testAlertName, rule.Labels, expectedLabels); err != nil { + return false, err + } + + if _, ok := rule.Labels[k8s.AlertRuleLabelId]; !ok { + t.Errorf("Alert %s missing openshift_io_alert_rule_id label", testAlertName) + return false, fmt.Errorf("alert missing openshift_io_alert_rule_id label") + } + + t.Logf("Found alert %s in ConfigMap with all expected labels", testAlertName) + return true, nil + } + } + + t.Logf("Alert %s not found in ConfigMap yet (found %d rules)", testAlertName, len(rules)) + return false, nil + }) + + if err != nil { + t.Fatalf("Timeout waiting for alert to appear in ConfigMap: %v", err) + } +} + +func TestRelabelAlert(t *testing.T) { + f, err := framework.New() + if err != nil { + t.Fatalf("Failed to create framework: %v", err) + } + + ctx := context.Background() + + testNamespace, cleanup, err := f.CreateNamespace(ctx, "test-relabel-alert", true) + if err != nil { + t.Fatalf("Failed to create test namespace: %v", err) + } + defer cleanup() + + forDuration := monitoringv1.Duration("5m") + + criticalRule := monitoringv1.Rule{ + Alert: "TestRelabelAlert", + Expr: intstr.FromString("up == 0"), + For: &forDuration, + Labels: map[string]string{ + "severity": "critical", + "team": "web", + }, + Annotations: map[string]string{ + "description": "Critical alert for relabel testing", + "summary": "Critical test alert", + }, + } + + warningRule := monitoringv1.Rule{ + Alert: "TestRelabelAlert", + Expr: intstr.FromString("up == 1"), + For: &forDuration, + Labels: map[string]string{ + "severity": "warning", + "team": "web", + }, + Annotations: map[string]string{ + "description": "Warning alert for relabel testing", + "summary": "Warning test alert", + }, + } + + _, err = createPrometheusRule(ctx, f, testNamespace, criticalRule, warningRule) + if err != nil { + t.Fatalf("Failed to create PrometheusRule: %v", err) + } + + relabelConfigName := "change-critical-team" + arc := &osmv1.AlertRelabelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: relabelConfigName, + Namespace: k8s.ClusterMonitoringNamespace, + }, + Spec: osmv1.AlertRelabelConfigSpec{ + Configs: []osmv1.RelabelConfig{ + { + SourceLabels: []osmv1.LabelName{"alertname", "severity"}, + Regex: "TestRelabelAlert;critical", + Separator: ";", + TargetLabel: "team", + Replacement: "ops", + Action: "Replace", + }, + }, + }, + } + + _, err = f.Osmv1clientset.MonitoringV1().AlertRelabelConfigs(k8s.ClusterMonitoringNamespace).Create( + ctx, + arc, + metav1.CreateOptions{}, + ) + if err != nil { + t.Fatalf("Failed to create AlertRelabelConfig: %v", err) + } + defer func() { + err = f.Osmv1clientset.MonitoringV1().AlertRelabelConfigs(k8s.ClusterMonitoringNamespace).Delete(ctx, relabelConfigName, metav1.DeleteOptions{}) + if err != nil { + t.Fatalf("Failed to delete AlertRelabelConfig: %v", err) + } + }() + + err = wait.PollUntilContextTimeout(ctx, 2*time.Second, 2*time.Minute, true, func(ctx context.Context) (bool, error) { + cm, err := f.Clientset.CoreV1().ConfigMaps(k8s.ClusterMonitoringNamespace).Get( + ctx, + k8s.RelabeledRulesConfigMapName, + metav1.GetOptions{}, + ) + if err != nil { + t.Logf("Failed to get ConfigMap: %v", err) + return false, nil + } + + configData, ok := cm.Data[k8s.RelabeledRulesConfigMapKey] + if !ok { + t.Logf("ConfigMap has no %s key", k8s.RelabeledRulesConfigMapKey) + return false, nil + } + + var rules map[string]monitoringv1.Rule + if err := yaml.Unmarshal([]byte(configData), &rules); err != nil { + t.Logf("Failed to unmarshal config data: %v", err) + return false, nil + } + + foundCriticalWithOps := false + foundWarningWithWeb := false + + for _, rule := range rules { + if rule.Alert == "TestRelabelAlert" { + if rule.Labels["team"] == "ops" && rule.Labels["severity"] == "critical" { + t.Logf("Found critical alert with team=ops (relabeling successful)") + foundCriticalWithOps = true + } + + if rule.Labels["team"] == "web" && rule.Labels["severity"] == "warning" { + t.Logf("Found warning alert with team=web") + foundWarningWithWeb = true + } + } + } + + if foundCriticalWithOps { + t.Logf("Relabeling verified: critical alert has team=ops, warning alert has team=web") + return true, nil + } + + t.Logf("Waiting for relabeling to take effect (critical with ops=%v, warning with web=%v)", foundCriticalWithOps, foundWarningWithWeb) + return false, nil + }) + + if err != nil { + t.Fatalf("Timeout waiting for relabeling to take effect: %v", err) + } +} + +func createPrometheusRule(ctx context.Context, f *framework.Framework, namespace string, rules ...monitoringv1.Rule) (*monitoringv1.PrometheusRule, error) { + interval := monitoringv1.Duration("30s") + prometheusRule := &monitoringv1.PrometheusRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-prometheus-rule", + Namespace: namespace, + }, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: []monitoringv1.RuleGroup{ + { + Name: "test-group", + Interval: &interval, + Rules: rules, + }, + }, + }, + } + + return f.Monitoringv1clientset.MonitoringV1().PrometheusRules(namespace).Create( + ctx, + prometheusRule, + metav1.CreateOptions{}, + ) +} + +func compareRuleLabels(t *testing.T, alertName string, foundLabels map[string]string, wantedLabels map[string]string) error { + if foundLabels == nil { + t.Errorf("Alert %s has no labels", alertName) + return fmt.Errorf("alert has no labels") + } + + for key, wantValue := range wantedLabels { + if gotValue, ok := foundLabels[key]; !ok { + t.Errorf("Alert %s missing %s label", alertName, key) + return fmt.Errorf("alert missing %s label", key) + } else if gotValue != wantValue { + t.Errorf("Alert %s has wrong %s label. Expected %s, got %s", + alertName, key, wantValue, gotValue) + return fmt.Errorf("alert has wrong %s label", key) + } + } + + return nil +}