Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pkg/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -1748,6 +1748,11 @@ func (c *Client) HasConsoleCapability(ctx context.Context) (bool, error) {
return c.HasClusterCapability(ctx, configv1.ClusterVersionCapabilityConsole)
}

func (c *Client) HasOptionalMonitoringCapability(_ context.Context) (bool, error) {
//return c.HasClusterCapability(ctx, configv1.ClusterVersionCapabilityOptionalMonitoring)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Waiting on this.

return false, nil
}

// CreateOrUpdateConsolePlugin function uses retries because API requests related to the ConsolePlugin resource
// may depend on the availability of a conversion container. This container is part of the console-operator Pod, which is not duplicated.
// If this pod is down (due to restarts for upgrades or other reasons), transient failures will be reported.
Expand Down
90 changes: 56 additions & 34 deletions pkg/operator/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -790,42 +790,64 @@ func (o *Operator) sync(ctx context.Context, key string) error {
consoleConfig,
)

optionalMonitoringEnabled, err := o.client.HasOptionalMonitoringCapability(ctx)
if err != nil {
o.reportFailed(ctx, newRunReportForError("OptionalMonitoringCheckError", err))
return err
}

// Update prometheus-operator before anything else because it is
// responsible for managing many other resources (e.g. Prometheus,
// Alertmanager, Thanos Ruler, ...). The metrics scraping client CA
// should also be created first because it is referenced by Prometheus.
prerequisiteTasks := []*tasks.TaskSpec{
newTaskSpec("MetricsScrapingClientCA", tasks.NewMetricsClientCATask(o.client, factory, config)),
}
if optionalMonitoringEnabled {
prerequisiteTasks = append(prerequisiteTasks,
newTaskSpec("PrometheusOperatorOptionalMonitoring", tasks.NewPrometheusOperatorOptionalMonitoringTask(o.client, factory)),
)
} else {
prerequisiteTasks = append(prerequisiteTasks,
newTaskSpec("PrometheusOperator", tasks.NewPrometheusOperatorTask(o.client, factory)),
)
}
followupTasks :=
[]*tasks.TaskSpec{
newTaskSpec("Prometheus", tasks.NewPrometheusTask(o.client, factory, config)),
newTaskSpec("NodeExporter", tasks.NewNodeExporterTask(o.client, factory)),
newTaskSpec("KubeStateMetrics", tasks.NewKubeStateMetricsTask(o.client, factory)),
newTaskSpec("OpenshiftStateMetrics", tasks.NewOpenShiftStateMetricsTask(o.client, factory)),
newTaskSpec("MetricsServer", tasks.NewMetricsServerTask(ctx, o.namespace, o.client, factory, config)),
newTaskSpec("TelemeterClient", tasks.NewTelemeterClientTask(o.client, factory, config)),
newTaskSpec("ThanosQuerier", tasks.NewThanosQuerierTask(o.client, factory, config)),
newTaskSpec("ControlPlaneComponents", tasks.NewControlPlaneTask(o.client, factory, config)),
// Tried to run the UWM prom-operator in the first group, but some e2e tests started failing.
newUWMTaskSpec("Prometheus", tasks.NewPrometheusUserWorkloadTask(o.client, factory, config)),
}
if optionalMonitoringEnabled {
followupTasks = append(followupTasks,
newTaskSpec("ClusterMonitoringOperatorOptionalMonitoringDeps", tasks.NewClusterMonitoringOperatorOptionalMonitoringTask(o.client, factory, config)),
)
} else {
followupTasks = append(followupTasks,
newTaskSpec("ClusterMonitoringOperatorDeps", tasks.NewClusterMonitoringOperatorTask(o.client, factory, config)),
newTaskSpec("Alertmanager", tasks.NewAlertmanagerTask(o.client, factory, config)),
newTaskSpec("ConsolePluginComponents", tasks.NewMonitoringPluginTask(o.client, factory, config)),
newUWMTaskSpec("PrometheusOperator", tasks.NewPrometheusOperatorUserWorkloadTask(o.client, factory, config)),
newUWMTaskSpec("Alertmanager", tasks.NewAlertmanagerUserWorkloadTask(o.client, factory, config)),
newUWMTaskSpec("ThanosRuler", tasks.NewThanosRulerUserWorkloadTask(o.client, factory, config)),
)
}
// The shared configmap depends on resources being created by the previous tasks hence run it last.
postRequisiteTasks := []*tasks.TaskSpec{
newTaskSpec("ConfigurationSharing", tasks.NewConfigSharingTask(o.client, factory, config)),
}
tl := tasks.NewTaskRunner(
o.client,
// Update prometheus-operator before anything else because it is
// responsible for managing many other resources (e.g. Prometheus,
// Alertmanager, Thanos Ruler, ...). The metrics scraping client CA
// should also be created first because it is referenced by Prometheus.
tasks.NewTaskGroup(
[]*tasks.TaskSpec{
newTaskSpec("MetricsScrapingClientCA", tasks.NewMetricsClientCATask(o.client, factory, config)),
newTaskSpec("PrometheusOperator", tasks.NewPrometheusOperatorTask(o.client, factory)),
}),
tasks.NewTaskGroup(
[]*tasks.TaskSpec{
newTaskSpec("ClusterMonitoringOperatorDeps", tasks.NewClusterMonitoringOperatorTask(o.client, factory, config)),
newTaskSpec("Prometheus", tasks.NewPrometheusTask(o.client, factory, config)),
newTaskSpec("Alertmanager", tasks.NewAlertmanagerTask(o.client, factory, config)),
newTaskSpec("NodeExporter", tasks.NewNodeExporterTask(o.client, factory)),
newTaskSpec("KubeStateMetrics", tasks.NewKubeStateMetricsTask(o.client, factory)),
newTaskSpec("OpenshiftStateMetrics", tasks.NewOpenShiftStateMetricsTask(o.client, factory)),
newTaskSpec("MetricsServer", tasks.NewMetricsServerTask(ctx, o.namespace, o.client, factory, config)),
newTaskSpec("TelemeterClient", tasks.NewTelemeterClientTask(o.client, factory, config)),
newTaskSpec("ThanosQuerier", tasks.NewThanosQuerierTask(o.client, factory, config)),
newTaskSpec("ControlPlaneComponents", tasks.NewControlPlaneTask(o.client, factory, config)),
newTaskSpec("ConsolePluginComponents", tasks.NewMonitoringPluginTask(o.client, factory, config)),
// Tried to run the UWM prom-operator in the first group, but some e2e tests started failing.
newUWMTaskSpec("PrometheusOperator", tasks.NewPrometheusOperatorUserWorkloadTask(o.client, factory, config)),
newUWMTaskSpec("Prometheus", tasks.NewPrometheusUserWorkloadTask(o.client, factory, config)),
newUWMTaskSpec("Alertmanager", tasks.NewAlertmanagerUserWorkloadTask(o.client, factory, config)),
newUWMTaskSpec("ThanosRuler", tasks.NewThanosRulerUserWorkloadTask(o.client, factory, config)),
}),
// The shared configmap depends on resources being created by the previous tasks hence run it last.
tasks.NewTaskGroup(
[]*tasks.TaskSpec{
newTaskSpec("ConfigurationSharing", tasks.NewConfigSharingTask(o.client, factory, config)),
},
),
tasks.NewTaskGroup(prerequisiteTasks),
tasks.NewTaskGroup(followupTasks),
tasks.NewTaskGroup(postRequisiteTasks),
)
klog.Info("Updating ClusterOperator status to InProgress.")
err = o.client.StatusReporter().SetRollOutInProgress(ctx)
Expand Down
126 changes: 126 additions & 0 deletions pkg/tasks/clustermonitoringoperator_optionalmonitoring.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
// Copyright 2018 The Cluster Monitoring Operator Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package tasks

import (
"context"
"fmt"

rbacv1 "k8s.io/api/rbac/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/klog/v2"

"github.com/openshift/cluster-monitoring-operator/pkg/client"
"github.com/openshift/cluster-monitoring-operator/pkg/manifests"
)

type ClusterMonitoringOperatorOptionalMonitoringTask struct {
client *client.Client
factory *manifests.Factory
config *manifests.Config
}

func NewClusterMonitoringOperatorOptionalMonitoringTask(
client *client.Client,
factory *manifests.Factory,
config *manifests.Config,
) *ClusterMonitoringOperatorOptionalMonitoringTask {
return &ClusterMonitoringOperatorOptionalMonitoringTask{
client: client,
factory: factory,
config: config,
}
}

func (t *ClusterMonitoringOperatorOptionalMonitoringTask) Run(ctx context.Context) error {
crfs := map[string]func() (*rbacv1.ClusterRole, error){
"cluster-monitoring-view": t.factory.ClusterMonitoringClusterRoleView,
"system:aggregated-metrics-reader": t.factory.ClusterMonitoringClusterRoleAggregatedMetricsReader,
"pod-metrics-reader": t.factory.ClusterMonitoringClusterRolePodMetricsReader,
"monitoring-rules-edit": t.factory.ClusterMonitoringRulesEditClusterRole,
"monitoring-rules-view": t.factory.ClusterMonitoringRulesViewClusterRole,
"monitoring-edit": t.factory.ClusterMonitoringEditClusterRole,
}
for name, crf := range crfs {
cr, err := crf()
if err != nil {
return fmt.Errorf("initializing %s ClusterRole failed: %w", name, err)
}

err = t.client.CreateOrUpdateClusterRole(ctx, cr)
if err != nil {
return fmt.Errorf("reconciling %s ClusterRole failed: %w", name, err)
}
}

clarr, err := t.factory.ClusterMonitoringApiReaderRole()
if err != nil {
return fmt.Errorf("initializing ClusterMonitoringApiReader Role failed: %w", err)
}

err = t.client.CreateOrUpdateRole(ctx, clarr)
if err != nil {
return fmt.Errorf("reconciling ClusterMonitoringApiReader Role failed: %w", err)
}

pr, err := t.factory.ClusterMonitoringOperatorPrometheusRule()
if err != nil {
return fmt.Errorf("initializing cluster-monitoring-operator rules PrometheusRule failed: %w", err)
}
err = t.client.CreateOrUpdatePrometheusRule(ctx, pr)
if err != nil {
return fmt.Errorf("reconciling cluster-monitoring-operator rules PrometheusRule failed: %w", err)
}

smcmo, err := t.factory.ClusterMonitoringOperatorServiceMonitor()
if err != nil {
return fmt.Errorf("initializing Cluster Monitoring Operator ServiceMonitor failed: %w", err)
}

err = t.client.CreateOrUpdateServiceMonitor(ctx, smcmo)
if err != nil {
return fmt.Errorf("reconciling Cluster Monitoring Operator ServiceMonitor failed: %w", err)
}

s, err := t.factory.GRPCSecret()
if err != nil {
return fmt.Errorf("error initializing Cluster Monitoring Operator GRPC TLS secret: %w", err)
}

loaded, err := t.client.GetSecret(ctx, s.Namespace, s.Name)
switch {
case apierrors.IsNotFound(err):
// No secret was found, proceed with the default empty secret from manifests.
klog.V(5).Info("creating new Cluster Monitoring Operator GRPC TLS secret")
case err == nil:
// Secret was found, use that.
s = loaded
klog.V(5).Info("found existing Cluster Monitoring Operator GRPC TLS secret")
default:
return fmt.Errorf("error reading Cluster Monitoring Operator GRPC TLS secret: %w", err)
}

err = manifests.RotateGRPCSecret(s)
if err != nil {
return fmt.Errorf("error rotating Cluster Monitoring Operator GRPC TLS secret: %w", err)
}

err = t.client.CreateOrUpdateSecret(ctx, s)
if err != nil {
return fmt.Errorf("error creating Cluster Monitoring Operator GRPC TLS secret: %w", err)
}

return nil
}
Loading