openshift
diff --git a/‎bindata/assets/alerts/cpu-utilization-sno.yaml‎
Lines changed: 47 additions & 0 deletions b/‎bindata/assets/alerts/cpu-utilization-sno.yaml‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎pkg/operator/highcpuusagealertcontroller/highcpuusagealert_controller.go‎
Lines changed: 166 additions & 0 deletions b/‎pkg/operator/highcpuusagealertcontroller/highcpuusagealert_controller.go‎
Lines changed: 166 additions & 0 deletions
diff --git a/‎pkg/operator/starter.go‎
Lines changed: 17 additions & 3 deletions b/‎pkg/operator/starter.go‎
Lines changed: 17 additions & 3 deletions
diff --git a/‎vendor/k8s.io/utils/cpuset/OWNERS‎
Lines changed: 8 additions & 0 deletions b/‎vendor/k8s.io/utils/cpuset/OWNERS‎
Lines changed: 8 additions & 0 deletions
@@ -0,0 +1,47 @@
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+  name: cpu-utilization
+  namespace: openshift-kube-apiserver
+spec:
+  groups:
+    - name: control-plane-cpu-utilization
+      rules:
+        - alert: HighOverallControlPlaneCPU
+          annotations:
+            summary: >-
+              CPU utilization across control plane pods is more than 60% of total CPU. High CPU usage usually means that something goes wrong.
+            runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/ExtremelyHighIndividualControlPlaneCPU.md
+            description: >-
+              This level of CPU utlization of an control plane is probably not a problem under most circumstances, but high levels of utilization may indicate
+              problems with cluster or control plane pods. To manage this alert or modify threshold it in case of false positives see the following link:
+              https://docs.openshift.com/container-platform/latest/monitoring/managing-alerts.html
+          expr: |
+            sum(rate(container_cpu_usage_seconds_total{namespace=~"openshift-.*",image!=""}[4m])) / ${CPU-COUNT} * 100 > 60
+          for: 10m
+          labels:
+            namespace: openshift-kube-apiserver
+            severity: warning
+        - alert: ExtremelyHighIndividualControlPlaneCPU
+          annotations:
+            summary: >-
+              CPU utilization across control plane pods is more than 90% of total CPU. High CPU usage usually means that something goes wrong.
+            runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-kube-apiserver-operator/ExtremelyHighIndividualControlPlaneCPU.md
+            description: >-
+              This level of CPU utlization of an control plane is probably not a problem under most circumstances, but high levels of utilization may indicate
+              problems with cluster or control plane pods. When workload partitioning is enabled,
+              Extreme CPU pressure can cause slow serialization and poor performance from the kube-apiserver and etcd.
+              When this happens, there is a risk of clients seeing non-responsive API requests which are issued again
+              causing even more CPU pressure.
+              It can also cause failing liveness probes due to slow etcd responsiveness on the backend.
+              If one kube-apiserver fails under this condition, chances are you will experience a cascade as the remaining
+              kube-apiservers are also under-provisioned.
+              To fix this, increase the CPU and memory on your control plane nodes.
+              To manage this alert or modify threshold it in case of false positives see the following link: 
+              https://docs.openshift.com/container-platform/latest/monitoring/managing-alerts.html
+          expr: |
+            sum(rate(container_cpu_usage_seconds_total{namespace=~"openshift-.*",image!=""}[4m])) / ${CPU-COUNT} * 100 > 90
+          for: 1h
+          labels:
+            namespace: openshift-kube-apiserver
+            severity: critical
@@ -0,0 +1,166 @@
+package highcpuusagealertcontroller
+
+import (
+	"bytes"
+	"context"
+	"strconv"
+	"time"
+
+	configv1 "github.com/openshift/api/config/v1"
+	configv1informers "github.com/openshift/client-go/config/informers/externalversions/config/v1"
+	configlistersv1 "github.com/openshift/client-go/config/listers/config/v1"
+	"github.com/openshift/cluster-kube-apiserver-operator/bindata"
+	"github.com/openshift/library-go/pkg/controller/factory"
+	"github.com/openshift/library-go/pkg/operator/events"
+	"github.com/openshift/library-go/pkg/operator/resource/resourceapply"
+	"github.com/openshift/library-go/pkg/operator/resource/resourceread"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/apimachinery/pkg/runtime/schema"
+	"k8s.io/apimachinery/pkg/util/sets"
+	"k8s.io/client-go/dynamic"
+	"k8s.io/client-go/dynamic/dynamicinformer"
+	"k8s.io/utils/cpuset"
+)
+
+// default and taken from the docs
+const defaultCoresNum = 8
+
+var performanceGroup = schema.GroupVersionResource{Group: "performance.openshift.io", Version: "v2", Resource: "performanceprofiles"}
+
+type highCPUUsageAlertController struct {
+	client               dynamic.Interface
+	infraLister          configlistersv1.InfrastructureLister
+	clusterVersionLister configlistersv1.ClusterVersionLister
+}
+
+func NewHighCPUUsageAlertController(
+	configInformer configv1informers.Interface,
+	dynamicInformersForTargetNamespace dynamicinformer.DynamicSharedInformerFactory,
+	client dynamic.Interface,
+	recorder events.Recorder,
+) factory.Controller {
+	c := &highCPUUsageAlertController{
+		client:               client,
+		infraLister:          configInformer.Infrastructures().Lister(),
+		clusterVersionLister: configInformer.ClusterVersions().Lister(),
+	}
+
+	prometheusAlertInformerForTargetNamespace := dynamicInformersForTargetNamespace.ForResource(schema.GroupVersionResource{
+		Group:    "monitoring.coreos.com",
+		Version:  "v1",
+		Resource: "prometheusrules",
+	})
+
+	return factory.New().
+		WithInformers(configInformer.Infrastructures().Informer(), configInformer.ClusterVersions().Informer(), prometheusAlertInformerForTargetNamespace.Informer()).
+		WithSync(c.sync).ResyncEvery(10*time.Minute).
+		ToController("highCPUUsageAlertController", recorder.WithComponentSuffix("high-cpu-usage-alert-controller"))
+}
+
+func (c *highCPUUsageAlertController) sync(ctx context.Context, syncCtx factory.SyncContext) error {
+	infra, err := c.infraLister.Get("cluster")
+	if err != nil {
+		return err
+	}
+
+	var alertRaw []byte
+
+	if infra.Status.InfrastructureTopology != configv1.SingleReplicaTopologyMode {
+		// we moved creation of the alert here because the static resource controller was constantly
+		// deleting the alert and was fighting with this controller
+		alertRaw, err = bindata.Asset("assets/alerts/cpu-utilization.yaml")
+		if err != nil {
+			return err
+		}
+	} else {
+		clusterVersion, err := c.clusterVersionLister.Get("version")
+		if err != nil {
+			return err
+		}
+
+		alertRaw, err = snoAlert(ctx, c.client, clusterVersion.Status.Capabilities.EnabledCapabilities, infra.Status.CPUPartitioning)
+		if err != nil {
+			return err
+		}
+	}
+
+	alertObj, err := resourceread.ReadGenericWithUnstructured(alertRaw)
+	if err != nil {
+		return err
+	}
+
+	_, _, err = resourceapply.ApplyPrometheusRule(ctx, c.client, syncCtx.Recorder(), alertObj.(*unstructured.Unstructured))
+	return err
+}
+
+func snoAlert(ctx context.Context, client dynamic.Interface, enabledCapabilities []configv1.ClusterVersionCapability, cpuMode configv1.CPUPartitioningMode) ([]byte, error) {
+	cores := defaultCoresNum
+
+	// if NodeTuning capability disabled, there are no PerformanceProfile, so we proceed
+	// with default value.
+	if sets.New(enabledCapabilities...).Has(configv1.ClusterVersionCapabilityNodeTuning) && cpuMode == configv1.CPUPartitioningAllNodes {
+		foundCores, found, err := performanceProfileControlPlaneCores(ctx, client)
+		if err != nil {
+			return nil, err
+		}
+		// set cores from PerformanceProfile if expectedToFindCores
+		// if not, proceed with default values
+		if found {
+			cores = foundCores
+		}
+	}
+
+	fileData, err := bindata.Asset("assets/alerts/cpu-utilization-sno.yaml")
+	if err != nil {
+		return nil, err
+	}
+	fileData = bytes.ReplaceAll(fileData, []byte(`${CPU-COUNT}`), []byte(strconv.Itoa(cores)))
+
+	return fileData, nil
+}
+
+// performanceProfileControlPlaneCores returns cores allocated for control plane pods via
+// PerformanceProfile object. Bool value indicates if PerformanceProfile is expectedToFindCores for master node
+func performanceProfileControlPlaneCores(ctx context.Context, client dynamic.Interface) (int, bool, error) {
+	// fetch resource directly instead of using an informer because
+	// NodeTuning capability can be disabled at start and enabled later
+	obj, err := client.Resource(performanceGroup).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		return 0, false, err
+	}
+
+	for _, pf := range obj.Items {
+		nodeSelector, found, err := unstructured.NestedStringMap(pf.Object, "spec", "nodeSelector")
+		if err != nil {
+			return 0, false, err
+		}
+		if !found {
+			continue
+		}
+		if _, ok := nodeSelector["node-role.kubernetes.io/master"]; !ok {
+			continue
+		}
+
+		reservedCPU, found, err := unstructured.NestedString(pf.Object, "spec", "cpu", "reserved")
+		if err != nil {
+			return 0, false, err
+		}
+		if !found {
+			continue
+		}
+
+		cores, err := coresInCPUSet(reservedCPU)
+		if err != nil {
+			return 0, false, err
+		}
+		return cores, true, nil
+	}
+
+	return 0, false, nil
+}
+
+func coresInCPUSet(set string) (int, error) {
+	cpuMap, err := cpuset.Parse(set)
+	return cpuMap.Size(), err
+}
@@ -25,6 +25,7 @@ import (
 	"github.com/openshift/cluster-kube-apiserver-operator/pkg/operator/configobservation/configobservercontroller"
 	"github.com/openshift/cluster-kube-apiserver-operator/pkg/operator/configobservation/node"
 	"github.com/openshift/cluster-kube-apiserver-operator/pkg/operator/connectivitycheckcontroller"
+	"github.com/openshift/cluster-kube-apiserver-operator/pkg/operator/highcpuusagealertcontroller"
 	"github.com/openshift/cluster-kube-apiserver-operator/pkg/operator/kubeletversionskewcontroller"
 	"github.com/openshift/cluster-kube-apiserver-operator/pkg/operator/nodekubeconfigcontroller"
 	"github.com/openshift/cluster-kube-apiserver-operator/pkg/operator/operatorclient"
@@ -66,6 +67,7 @@ import (
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/runtime/schema"
 	"k8s.io/client-go/dynamic"
+	"k8s.io/client-go/dynamic/dynamicinformer"
 	"k8s.io/client-go/kubernetes"
 	"k8s.io/klog/v2"
 	"k8s.io/utils/ptr"
@@ -115,7 +117,7 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
 		"openshift-apiserver",
 	)
 	configInformers := configv1informers.NewSharedInformerFactory(configClient, 10*time.Minute)
-	operatorClient, dynamicInformers, err := genericoperatorclient.NewStaticPodOperatorClient(controllerContext.KubeConfig, operatorv1.GroupVersion.WithResource("kubeapiservers"))
+	operatorClient, dynamicInformersForAllNamespaces, err := genericoperatorclient.NewStaticPodOperatorClient(controllerContext.KubeConfig, operatorv1.GroupVersion.WithResource("kubeapiservers"))
 	if err != nil {
 		return err
 	}
@@ -175,13 +177,16 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
 		WithEventHandler(operatorclient.TargetNamespace, "LateConnections", terminationobserver.ProcessLateConnectionEvents).
 		ToController(kubeInformersForNamespaces.InformersFor(operatorclient.TargetNamespace), kubeClient.CoreV1(), controllerContext.EventRecorder)
 
+	// TODO: use informer instead of direct api call
+	// Also, in the future there is a plan to make infrastructure type dynamic
 	infrastructure, err := configClient.ConfigV1().Infrastructures().Get(ctx, "cluster", metav1.GetOptions{})
 	if err != nil {
 		return err
 	}
 	var notOnSingleReplicaTopology resourceapply.ConditionalFunction = func() bool {
 		return infrastructure.Status.ControlPlaneTopology != configv1.SingleReplicaTopologyMode
 	}
+
 	staticResourceController := staticresourcecontroller.NewStaticResourceController(
 		"KubeAPIServerStaticResources",
 		bindata.Asset,
@@ -208,7 +213,6 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
 			"assets/kube-apiserver/storage-version-migration-prioritylevelconfiguration.yaml",
 			"assets/alerts/api-usage.yaml",
 			"assets/alerts/audit-errors.yaml",
-			"assets/alerts/cpu-utilization.yaml",
 			"assets/alerts/kube-apiserver-requests.yaml",
 			"assets/alerts/kube-apiserver-slos-basic.yaml",
 			"assets/alerts/podsecurity-violations.yaml",
@@ -224,6 +228,14 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
 		WithConditionalResources(bindata.Asset, []string{"assets/alerts/kube-apiserver-slos-extended.yaml"}, notOnSingleReplicaTopology, nil).
 		AddKubeInformers(kubeInformersForNamespaces)
 
+	dynamicInformersForTargetNamespace := dynamicinformer.NewFilteredDynamicSharedInformerFactory(dynamicClient, 12*time.Hour, operatorclient.TargetNamespace, nil)
+
+	highCpuUsageAlertController := highcpuusagealertcontroller.NewHighCPUUsageAlertController(
+		configInformers.Config().V1(),
+		dynamicInformersForTargetNamespace,
+		dynamicClient,
+		controllerContext.EventRecorder)
+
 	targetConfigReconciler := targetconfigcontroller.NewTargetConfigController(
 		os.Getenv("IMAGE"),
 		os.Getenv("OPERATOR_IMAGE"),
@@ -459,7 +471,8 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
 
 	kubeInformersForNamespaces.Start(ctx.Done())
 	configInformers.Start(ctx.Done())
-	dynamicInformers.Start(ctx.Done())
+	dynamicInformersForAllNamespaces.Start(ctx.Done())
+	dynamicInformersForTargetNamespace.Start(ctx.Done())
 	migrationInformer.Start(ctx.Done())
 	apiextensionsInformers.Start(ctx.Done())
 	operatorInformers.Start(ctx.Done())
@@ -486,6 +499,7 @@ func RunOperator(ctx context.Context, controllerContext *controllercmd.Controlle
 	go webhookSupportabilityController.Run(ctx, 1)
 	go serviceAccountIssuerController.Run(ctx, 1)
 	go podSecurityReadinessController.Run(ctx, 1)
+	go highCpuUsageAlertController.Run(ctx, 1)
 	go sccReconcileController.Run(ctx, 1)
 
 	<-ctx.Done()