Skip to content

Commit 07101a1

Browse files
committed
MON-4361,MON-4380: Add optional monitoring logic
Enabling the `OptionalMonitoring` capability translates to enabling all optional monitoring components under CMO. Note that since capabilities cannot be disabled once enabled, so cleanup for optional monitoring resources is not necessary. To clarify further, there are two possible paths at install time: * capability is disabled -> enabled (no need to cleanup) * capability is enabled -/> (cannot be disabled) (no need to cleanup) Signed-off-by: Pranshu Srivastava <[email protected]>
1 parent 55d6da0 commit 07101a1

10 files changed

+87
-41
lines changed

pkg/client/client.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1748,6 +1748,10 @@ func (c *Client) HasConsoleCapability(ctx context.Context) (bool, error) {
17481748
return c.HasClusterCapability(ctx, configv1.ClusterVersionCapabilityConsole)
17491749
}
17501750

1751+
func (c *Client) HasOptionalMonitoringCapability(ctx context.Context) (bool, error) {
1752+
return c.HasClusterCapability(ctx, "")
1753+
}
1754+
17511755
// CreateOrUpdateConsolePlugin function uses retries because API requests related to the ConsolePlugin resource
17521756
// may depend on the availability of a conversion container. This container is part of the console-operator Pod, which is not duplicated.
17531757
// If this pod is down (due to restarts for upgrades or other reasons), transient failures will be reported.

pkg/tasks/alertmanager.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,11 @@ func NewAlertmanagerTask(
4444
}
4545

4646
func (t *AlertmanagerTask) Run(ctx context.Context) error {
47-
if t.config.ClusterMonitoringConfiguration.AlertmanagerMainConfig.IsEnabled() {
47+
optionalMonitoringEnabled, err := t.client.HasOptionalMonitoringCapability(ctx)
48+
if err != nil {
49+
return fmt.Errorf("checking for optional monitoring capability failed: %w", err)
50+
}
51+
if t.config.ClusterMonitoringConfiguration.AlertmanagerMainConfig.IsEnabled() && optionalMonitoringEnabled {
4852
return t.create(ctx)
4953
}
5054

pkg/tasks/alertmanager_user_workload.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,11 @@ func NewAlertmanagerUserWorkloadTask(
4444
}
4545

4646
func (t *AlertmanagerUserWorkloadTask) Run(ctx context.Context) error {
47-
if t.config.UserWorkloadConfiguration.Alertmanager.Enabled {
47+
optionalMonitoringEnabled, err := t.client.HasOptionalMonitoringCapability(ctx)
48+
if err != nil {
49+
return fmt.Errorf("checking for optional monitoring capability failed: %w", err)
50+
}
51+
if t.config.UserWorkloadConfiguration.Alertmanager.Enabled && optionalMonitoringEnabled {
4852
return t.create(ctx)
4953
}
5054

pkg/tasks/clustermonitoringoperator.go

Lines changed: 37 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,23 @@ func NewClusterMonitoringOperatorTask(
4545
}
4646

4747
func (t *ClusterMonitoringOperatorTask) Run(ctx context.Context) error {
48-
for name, crf := range map[string]func() (*rbacv1.ClusterRole, error){
48+
optionalMonitoringEnabled, err := t.client.HasOptionalMonitoringCapability(ctx)
49+
if err != nil {
50+
return fmt.Errorf("checking for optional monitoring capability failed: %w", err)
51+
}
52+
53+
crfs := map[string]func() (*rbacv1.ClusterRole, error){
4954
"cluster-monitoring-view": t.factory.ClusterMonitoringClusterRoleView,
5055
"system:aggregated-metrics-reader": t.factory.ClusterMonitoringClusterRoleAggregatedMetricsReader,
5156
"pod-metrics-reader": t.factory.ClusterMonitoringClusterRolePodMetricsReader,
5257
"monitoring-rules-edit": t.factory.ClusterMonitoringRulesEditClusterRole,
5358
"monitoring-rules-view": t.factory.ClusterMonitoringRulesViewClusterRole,
5459
"monitoring-edit": t.factory.ClusterMonitoringEditClusterRole,
55-
"alert-routing-edit": t.factory.ClusterMonitoringAlertingEditClusterRole,
56-
} {
60+
}
61+
if optionalMonitoringEnabled {
62+
crfs["alert-routing-edit"] = t.factory.ClusterMonitoringAlertingEditClusterRole
63+
}
64+
for name, crf := range crfs {
5765
cr, err := crf()
5866
if err != nil {
5967
return fmt.Errorf("initializing %s ClusterRole failed: %w", name, err)
@@ -65,34 +73,35 @@ func (t *ClusterMonitoringOperatorTask) Run(ctx context.Context) error {
6573
}
6674
}
6775

68-
uwcr, err := t.factory.ClusterMonitoringEditUserWorkloadConfigRole()
69-
if err != nil {
70-
return fmt.Errorf("initializing UserWorkloadConfigEdit Role failed: %w", err)
71-
}
72-
73-
err = t.client.CreateOrUpdateRole(ctx, uwcr)
74-
if err != nil {
75-
return fmt.Errorf("reconciling UserWorkloadConfigEdit Role failed: %w", err)
76-
}
76+
if optionalMonitoringEnabled {
77+
uwcr, err := t.factory.ClusterMonitoringEditUserWorkloadConfigRole()
78+
if err != nil {
79+
return fmt.Errorf("initializing UserWorkloadConfigEdit Role failed: %w", err)
80+
}
7781

78-
uwar, err := t.factory.ClusterMonitoringEditUserWorkloadAlertmanagerApiReader()
79-
if err != nil {
80-
return fmt.Errorf("initializing UserWorkloadAlertmanagerApiReader Role failed: %w", err)
81-
}
82+
err = t.client.CreateOrUpdateRole(ctx, uwcr)
83+
if err != nil {
84+
return fmt.Errorf("reconciling UserWorkloadConfigEdit Role failed: %w", err)
85+
}
86+
uwar, err := t.factory.ClusterMonitoringEditUserWorkloadAlertmanagerApiReader()
87+
if err != nil {
88+
return fmt.Errorf("initializing UserWorkloadAlertmanagerApiReader Role failed: %w", err)
89+
}
8290

83-
err = t.client.CreateOrUpdateRole(ctx, uwar)
84-
if err != nil {
85-
return fmt.Errorf("reconciling UserWorkloadAlertmanagerApiReader Role failed: %w", err)
86-
}
91+
err = t.client.CreateOrUpdateRole(ctx, uwar)
92+
if err != nil {
93+
return fmt.Errorf("reconciling UserWorkloadAlertmanagerApiReader Role failed: %w", err)
94+
}
8795

88-
uwaw, err := t.factory.ClusterMonitoringEditUserWorkloadAlertmanagerApiWriter()
89-
if err != nil {
90-
return fmt.Errorf("initializing UserWorkloadAlertmanagerApiWriter Role failed: %w", err)
91-
}
96+
uwaw, err := t.factory.ClusterMonitoringEditUserWorkloadAlertmanagerApiWriter()
97+
if err != nil {
98+
return fmt.Errorf("initializing UserWorkloadAlertmanagerApiWriter Role failed: %w", err)
99+
}
92100

93-
err = t.client.CreateOrUpdateRole(ctx, uwaw)
94-
if err != nil {
95-
return fmt.Errorf("reconciling UserWorkloadAlertmanagerApiWriter Role failed: %w", err)
101+
err = t.client.CreateOrUpdateRole(ctx, uwaw)
102+
if err != nil {
103+
return fmt.Errorf("reconciling UserWorkloadAlertmanagerApiWriter Role failed: %w", err)
104+
}
96105
}
97106

98107
amrr, err := t.factory.ClusterMonitoringAlertManagerViewRole()
@@ -104,8 +113,7 @@ func (t *ClusterMonitoringOperatorTask) Run(ctx context.Context) error {
104113
if err != nil {
105114
return fmt.Errorf("initializing AlertmanagerWrite Role failed: %w", err)
106115
}
107-
108-
if t.config.ClusterMonitoringConfiguration.AlertmanagerMainConfig.IsEnabled() {
116+
if t.config.ClusterMonitoringConfiguration.AlertmanagerMainConfig.IsEnabled() && optionalMonitoringEnabled {
109117
if err = t.client.CreateOrUpdateRole(ctx, amwr); err != nil {
110118
return fmt.Errorf("reconciling AlertmanagerWrite Role failed: %w", err)
111119
}

pkg/tasks/configsharing.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,11 @@ func (t *ConfigSharingTask) Run(ctx context.Context) error {
5757
return fmt.Errorf("failed to retrieve Prometheus host: %w", err)
5858
}
5959

60-
if t.config.ClusterMonitoringConfiguration.AlertmanagerMainConfig.IsEnabled() {
60+
optionalMonitoringEnabled, err := t.client.HasOptionalMonitoringCapability(ctx)
61+
if err != nil {
62+
return fmt.Errorf("checking for optional monitoring capability failed: %w", err)
63+
}
64+
if t.config.ClusterMonitoringConfiguration.AlertmanagerMainConfig.IsEnabled() && optionalMonitoringEnabled {
6165
amRoute, err := t.factory.AlertmanagerRoute()
6266
if err != nil {
6367
return fmt.Errorf("initializing Alertmanager Route failed: %w", err)

pkg/tasks/prometheus.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,11 @@ func (t *PrometheusTask) create(ctx context.Context) error {
173173
return fmt.Errorf("initializing Prometheus Alertmanager RoleBinding failed: %w", err)
174174
}
175175

176-
if t.config.ClusterMonitoringConfiguration.AlertmanagerMainConfig.IsEnabled() {
176+
optionalMonitoringEnabled, err := t.client.HasOptionalMonitoringCapability(ctx)
177+
if err != nil {
178+
return fmt.Errorf("checking for optional monitoring capability failed: %w", err)
179+
}
180+
if t.config.ClusterMonitoringConfiguration.AlertmanagerMainConfig.IsEnabled() && optionalMonitoringEnabled {
177181
if err = t.client.CreateOrUpdateRoleBinding(ctx, amrb); err != nil {
178182
return fmt.Errorf("reconciling Prometheus Alertmanager RoleBinding failed: %w", err)
179183
}

pkg/tasks/prometheus_user_workload.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,11 @@ func NewPrometheusUserWorkloadTask(client *client.Client, factory *manifests.Fac
4040
}
4141

4242
func (t *PrometheusUserWorkloadTask) Run(ctx context.Context) error {
43-
if *t.config.ClusterMonitoringConfiguration.UserWorkloadEnabled {
43+
optionalMonitoringEnabled, err := t.client.HasOptionalMonitoringCapability(ctx)
44+
if err != nil {
45+
return fmt.Errorf("checking for optional monitoring capability failed: %w", err)
46+
}
47+
if *t.config.ClusterMonitoringConfiguration.UserWorkloadEnabled && optionalMonitoringEnabled {
4448
return t.create(ctx)
4549
}
4650

pkg/tasks/prometheusoperator.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -180,14 +180,20 @@ func (t *PrometheusOperatorTask) runAdmissionWebhook(ctx context.Context) error
180180
return fmt.Errorf("reconciling Prometheus Rule Validating Webhook failed: %w", err)
181181
}
182182

183-
aw, err := t.factory.AlertManagerConfigValidatingWebhook()
183+
optionalMonitoringEnabled, err := t.client.HasOptionalMonitoringCapability(ctx)
184184
if err != nil {
185-
return fmt.Errorf("initializing AlertManagerConfig Validating Webhook failed: %w", err)
185+
return fmt.Errorf("checking for optional monitoring capability failed: %w", err)
186186
}
187+
if optionalMonitoringEnabled {
188+
aw, err := t.factory.AlertManagerConfigValidatingWebhook()
189+
if err != nil {
190+
return fmt.Errorf("initializing AlertManagerConfig Validating Webhook failed: %w", err)
191+
}
187192

188-
err = t.client.CreateOrUpdateValidatingWebhookConfiguration(ctx, aw)
189-
if err != nil {
190-
return fmt.Errorf("reconciling AlertManagerConfig Validating Webhook failed: %w", err)
193+
err = t.client.CreateOrUpdateValidatingWebhookConfiguration(ctx, aw)
194+
if err != nil {
195+
return fmt.Errorf("reconciling AlertManagerConfig Validating Webhook failed: %w", err)
196+
}
191197
}
192198

193199
return nil

pkg/tasks/prometheusoperator_user_workload.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,11 @@ func NewPrometheusOperatorUserWorkloadTask(client *client.Client, factory *manif
3939
}
4040

4141
func (t *PrometheusOperatorUserWorkloadTask) Run(ctx context.Context) error {
42-
if *t.config.ClusterMonitoringConfiguration.UserWorkloadEnabled {
42+
optionalMonitoringEnabled, err := t.client.HasOptionalMonitoringCapability(ctx)
43+
if err != nil {
44+
return fmt.Errorf("checking for optional monitoring capability failed: %w", err)
45+
}
46+
if *t.config.ClusterMonitoringConfiguration.UserWorkloadEnabled && optionalMonitoringEnabled {
4347
return t.create(ctx)
4448
}
4549

pkg/tasks/thanos_ruler_user_workload.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,11 @@ func NewThanosRulerUserWorkloadTask(client *client.Client, factory *manifests.Fa
3939
}
4040

4141
func (t *ThanosRulerUserWorkloadTask) Run(ctx context.Context) error {
42-
if *t.config.ClusterMonitoringConfiguration.UserWorkloadEnabled {
42+
optionalMonitoringEnabled, err := t.client.HasOptionalMonitoringCapability(ctx)
43+
if err != nil {
44+
return fmt.Errorf("checking for optional monitoring capability failed: %w", err)
45+
}
46+
if *t.config.ClusterMonitoringConfiguration.UserWorkloadEnabled && optionalMonitoringEnabled {
4347
return t.create(ctx)
4448
}
4549

0 commit comments

Comments
 (0)