Skip to content

Commit c5668e3

Browse files
Merge pull request #2452 from simonpasquier/OCPBUGS-39126
OCPBUGS-39126: disable user-defined monitoring per object
2 parents 886a4be + dd9708a commit c5668e3

File tree

9 files changed

+154
-28
lines changed

9 files changed

+154
-28
lines changed

assets/prometheus-user-workload/prometheus.yaml

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,12 @@ spec:
204204
operator: NotIn
205205
values:
206206
- "false"
207-
podMonitorSelector: {}
207+
podMonitorSelector:
208+
matchExpressions:
209+
- key: openshift.io/user-monitoring
210+
operator: NotIn
211+
values:
212+
- "false"
208213
priorityClassName: openshift-user-critical
209214
probeNamespaceSelector:
210215
matchExpressions:
@@ -216,7 +221,12 @@ spec:
216221
operator: NotIn
217222
values:
218223
- "false"
219-
probeSelector: {}
224+
probeSelector:
225+
matchExpressions:
226+
- key: openshift.io/user-monitoring
227+
operator: NotIn
228+
values:
229+
- "false"
220230
replicas: 2
221231
resources:
222232
requests:
@@ -233,8 +243,15 @@ spec:
233243
values:
234244
- "false"
235245
ruleSelector:
236-
matchLabels:
237-
openshift.io/prometheus-rule-evaluation-scope: leaf-prometheus
246+
matchExpressions:
247+
- key: openshift.io/user-monitoring
248+
operator: NotIn
249+
values:
250+
- "false"
251+
- key: openshift.io/prometheus-rule-evaluation-scope
252+
operator: In
253+
values:
254+
- leaf-prometheus
238255
scrapeConfigNamespaceSelector: null
239256
scrapeConfigSelector: null
240257
secrets:
@@ -259,7 +276,12 @@ spec:
259276
operator: NotIn
260277
values:
261278
- "false"
262-
serviceMonitorSelector: {}
279+
serviceMonitorSelector:
280+
matchExpressions:
281+
- key: openshift.io/user-monitoring
282+
operator: NotIn
283+
values:
284+
- "false"
263285
thanos:
264286
image: quay.io/thanos/thanos:v0.36.1
265287
resources:

assets/thanos-ruler/thanos-ruler.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,10 @@ spec:
129129
- "false"
130130
ruleSelector:
131131
matchExpressions:
132+
- key: openshift.io/user-monitoring
133+
operator: NotIn
134+
values:
135+
- "false"
132136
- key: openshift.io/prometheus-rule-evaluation-scope
133137
operator: NotIn
134138
values:

jsonnet/components/prometheus-user-workload.libsonnet

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -352,16 +352,20 @@ function(params)
352352
$.kubeRbacProxyFederateSecret.metadata.name,
353353
],
354354
configMaps: ['serving-certs-ca-bundle', 'metrics-client-ca'],
355-
probeSelector: {},
355+
probeSelector: cfg.resourceSelector,
356356
probeNamespaceSelector: cfg.namespaceSelector,
357-
podMonitorSelector: {},
357+
podMonitorSelector: cfg.resourceSelector,
358358
podMonitorNamespaceSelector: cfg.namespaceSelector,
359-
serviceMonitorSelector: {},
359+
serviceMonitorSelector: cfg.resourceSelector,
360360
serviceMonitorNamespaceSelector: cfg.namespaceSelector,
361-
ruleSelector: {
362-
matchLabels: {
363-
'openshift.io/prometheus-rule-evaluation-scope': 'leaf-prometheus',
364-
},
361+
ruleSelector: cfg.resourceSelector {
362+
matchExpressions+: [
363+
{
364+
key: 'openshift.io/prometheus-rule-evaluation-scope',
365+
operator: 'In',
366+
values: ['leaf-prometheus'],
367+
},
368+
],
365369
},
366370
ruleNamespaceSelector: cfg.namespaceSelector,
367371
scrapeConfigSelector: null,

jsonnet/components/thanos-ruler.libsonnet

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -389,8 +389,8 @@ function(params)
389389
},
390390
enforcedNamespaceLabel: 'namespace',
391391
listenLocal: true,
392-
ruleSelector: {
393-
matchExpressions:
392+
ruleSelector: cfg.resourceSelector {
393+
matchExpressions+:
394394
[
395395
{
396396
key: 'openshift.io/prometheus-rule-evaluation-scope',

jsonnet/main.jsonnet

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,15 @@ local commonConfig = {
3636
'openshift.io/cluster-monitoring': 'true',
3737
},
3838
},
39+
userWorkloadMonitoringResourceSelector: {
40+
matchExpressions: [
41+
{
42+
key: 'openshift.io/user-monitoring',
43+
operator: 'NotIn',
44+
values: ['false'],
45+
},
46+
],
47+
},
3948
userWorkloadMonitoringNamespaceSelector: {
4049
matchExpressions: [
4150
{
@@ -326,6 +335,7 @@ local inCluster =
326335
'app.kubernetes.io/name': 'thanos-ruler',
327336
'thanos-ruler': 'user-workload',
328337
},
338+
resourceSelector: $.values.common.userWorkloadMonitoringResourceSelector,
329339
namespaceSelector: $.values.common.userWorkloadMonitoringNamespaceSelector,
330340
commonLabels+: $.values.common.commonLabels,
331341
kubeRbacProxyImage: $.values.common.images.kubeRbacProxy,
@@ -478,6 +488,7 @@ local userWorkload =
478488
requests: { memory: '30Mi', cpu: '6m' },
479489
},
480490
namespaces: [$.values.common.namespaceUserWorkload],
491+
resourceSelector: $.values.common.userWorkloadMonitoringResourceSelector,
481492
namespaceSelector: $.values.common.userWorkloadMonitoringNamespaceSelector,
482493
thanos: inCluster.values.prometheus.thanos,
483494
tlsCipherSuites: $.values.common.tlsCipherSuites,

pkg/manifests/manifests.go

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ const (
6363

6464
nodeSelectorMaster = "node-role.kubernetes.io/master"
6565

66+
userMonitoringLabel = "openshift.io/user-monitoring"
67+
clusterMonitoringLabel = "openshift.io/cluster-monitoring"
68+
6669
platformAlertmanagerService = "alertmanager-main"
6770
userWorkloadAlertmanagerService = "alertmanager-user-workload"
6871

@@ -476,17 +479,24 @@ func (f *Factory) AlertmanagerUserWorkload() (*monv1.Alertmanager, error) {
476479
a.Spec.Secrets = append(a.Spec.Secrets, alertmanagerConfig.Secrets...)
477480

478481
if alertmanagerConfig.EnableAlertmanagerConfig {
479-
a.Spec.AlertmanagerConfigSelector = &metav1.LabelSelector{}
480-
482+
a.Spec.AlertmanagerConfigSelector = &metav1.LabelSelector{
483+
MatchExpressions: []metav1.LabelSelectorRequirement{
484+
{
485+
Key: userMonitoringLabel,
486+
Operator: metav1.LabelSelectorOpNotIn,
487+
Values: []string{"false"},
488+
},
489+
},
490+
}
481491
a.Spec.AlertmanagerConfigNamespaceSelector = &metav1.LabelSelector{
482492
MatchExpressions: []metav1.LabelSelectorRequirement{
483493
{
484-
Key: "openshift.io/cluster-monitoring",
494+
Key: clusterMonitoringLabel,
485495
Operator: metav1.LabelSelectorOpNotIn,
486496
Values: []string{"true"},
487497
},
488498
{
489-
Key: "openshift.io/user-monitoring",
499+
Key: userMonitoringLabel,
490500
Operator: metav1.LabelSelectorOpNotIn,
491501
Values: []string{"false"},
492502
},
@@ -608,17 +618,25 @@ func (f *Factory) AlertmanagerMain() (*monv1.Alertmanager, error) {
608618

609619
if f.config.ClusterMonitoringConfiguration.AlertmanagerMainConfig.EnableUserAlertManagerConfig &&
610620
!f.config.UserWorkloadConfiguration.Alertmanager.Enabled {
611-
a.Spec.AlertmanagerConfigSelector = &metav1.LabelSelector{}
621+
a.Spec.AlertmanagerConfigSelector = &metav1.LabelSelector{
622+
MatchExpressions: []metav1.LabelSelectorRequirement{
623+
{
624+
Key: userMonitoringLabel,
625+
Operator: metav1.LabelSelectorOpNotIn,
626+
Values: []string{"false"},
627+
},
628+
},
629+
}
612630

613631
a.Spec.AlertmanagerConfigNamespaceSelector = &metav1.LabelSelector{
614632
MatchExpressions: []metav1.LabelSelectorRequirement{
615633
{
616-
Key: "openshift.io/cluster-monitoring",
634+
Key: clusterMonitoringLabel,
617635
Operator: metav1.LabelSelectorOpNotIn,
618636
Values: []string{"true"},
619637
},
620638
{
621-
Key: "openshift.io/user-monitoring",
639+
Key: userMonitoringLabel,
622640
Operator: metav1.LabelSelectorOpNotIn,
623641
Values: []string{"false"},
624642
},

pkg/manifests/manifests_test.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3085,7 +3085,17 @@ ingress:
30853085
t.Fatal("expected 'alertmanagerConfigSelector' to configure selector")
30863086
}
30873087

3088-
if !reflect.DeepEqual(a.Spec.AlertmanagerConfigSelector, &metav1.LabelSelector{}) {
3088+
if !reflect.DeepEqual(
3089+
a.Spec.AlertmanagerConfigSelector,
3090+
&metav1.LabelSelector{
3091+
MatchExpressions: []metav1.LabelSelectorRequirement{
3092+
{
3093+
Key: userMonitoringLabel,
3094+
Operator: metav1.LabelSelectorOpNotIn,
3095+
Values: []string{"false"},
3096+
},
3097+
},
3098+
}) {
30893099
t.Fatal("expected match all alertmanagerConfigSelector")
30903100
}
30913101

@@ -3098,13 +3108,13 @@ ingress:
30983108
}
30993109

31003110
expectPlatformOptIn := metav1.LabelSelectorRequirement{
3101-
Key: "openshift.io/cluster-monitoring",
3111+
Key: clusterMonitoringLabel,
31023112
Operator: metav1.LabelSelectorOpNotIn,
31033113
Values: []string{"true"},
31043114
}
31053115

31063116
expectUWMOptIn := metav1.LabelSelectorRequirement{
3107-
Key: "openshift.io/user-monitoring",
3117+
Key: userMonitoringLabel,
31083118
Operator: metav1.LabelSelectorOpNotIn,
31093119
Values: []string{"false"},
31103120
}

test/e2e/user_workload_monitoring_test.go

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ func TestUserWorkloadMonitoringOptOut(t *testing.T) {
199199
f func(*testing.T)
200200
}{
201201
{"assert namespace opt out removes appropriate targets", assertNamespaceOptOut},
202+
{"assert service monitor opt out removes appropriate targets", assertServiceMonitorOptOut},
202203
} {
203204
t.Run(scenario.name, scenario.f)
204205
}
@@ -1408,7 +1409,7 @@ func assertGRPCTLSRotation(t *testing.T) {
14081409
func assertNamespaceOptOut(t *testing.T) {
14091410
ctx := context.Background()
14101411

1411-
serviceMonitorJobName := "serviceMonitor/user-workload-test/prometheus-example-monitor/0"
1412+
serviceMonitorJobName := fmt.Sprintf("serviceMonitor/%s/%s/0", userWorkloadTestNs, serviceMonitorTestName)
14121413

14131414
// Ensure the target for the example ServiceMonitor exists.
14141415
f.ThanosQuerierClient.WaitForTargetsReturn(t, 5*time.Minute, func(body []byte) error {
@@ -1459,3 +1460,58 @@ func assertNamespaceOptOut(t *testing.T) {
14591460
return getActiveTarget(body, serviceMonitorJobName)
14601461
})
14611462
}
1463+
1464+
func assertServiceMonitorOptOut(t *testing.T) {
1465+
ctx := context.Background()
1466+
1467+
serviceMonitorJobName := fmt.Sprintf("serviceMonitor/%s/%s/0", userWorkloadTestNs, serviceMonitorTestName)
1468+
1469+
// Ensure the target for the example ServiceMonitor exists.
1470+
f.ThanosQuerierClient.WaitForTargetsReturn(t, 5*time.Minute, func(body []byte) error {
1471+
return getActiveTarget(body, serviceMonitorJobName)
1472+
})
1473+
1474+
// Add opt-out label to service monitor.
1475+
sm, err := f.MonitoringClient.ServiceMonitors(userWorkloadTestNs).Get(ctx, serviceMonitorTestName, metav1.GetOptions{})
1476+
if err != nil {
1477+
t.Fatalf("Failed to fetch user-workload service monitor: %v", err)
1478+
}
1479+
1480+
labels := sm.GetLabels()
1481+
labels["openshift.io/user-monitoring"] = "false"
1482+
sm.SetLabels(labels)
1483+
1484+
_, err = f.MonitoringClient.ServiceMonitors(userWorkloadTestNs).Update(ctx, sm, metav1.UpdateOptions{})
1485+
if err != nil {
1486+
t.Fatalf("Failed to apply user-monitoring opt-out label: %v", err)
1487+
}
1488+
1489+
// Ensure the target for the example ServiceMonitor is removed.
1490+
f.ThanosQuerierClient.WaitForTargetsReturn(t, 5*time.Minute, func(body []byte) error {
1491+
if err := getActiveTarget(body, serviceMonitorJobName); err == nil {
1492+
return fmt.Errorf("target '%s' exists, but should not", serviceMonitorJobName)
1493+
}
1494+
1495+
return nil
1496+
})
1497+
1498+
// Remove opt-out label from namespace.
1499+
sm, err = f.MonitoringClient.ServiceMonitors(userWorkloadTestNs).Get(ctx, serviceMonitorTestName, metav1.GetOptions{})
1500+
if err != nil {
1501+
t.Fatalf("Failed to fetch user-workload service monitor: %v", err)
1502+
}
1503+
1504+
labels = sm.GetLabels()
1505+
delete(labels, "openshift.io/user-monitoring")
1506+
sm.SetLabels(labels)
1507+
1508+
_, err = f.MonitoringClient.ServiceMonitors(userWorkloadTestNs).Update(ctx, sm, metav1.UpdateOptions{})
1509+
if err != nil {
1510+
t.Fatalf("Failed to remove user-monitoring opt-out label: %v", err)
1511+
}
1512+
1513+
// Ensure the target for the example ServiceMonitor is recreated.
1514+
f.ThanosQuerierClient.WaitForTargetsReturn(t, 5*time.Minute, func(body []byte) error {
1515+
return getActiveTarget(body, serviceMonitorJobName)
1516+
})
1517+
}

test/e2e/uwm_helpers.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ import (
1717
)
1818

1919
const (
20-
userWorkloadTestNs = "user-workload-test"
20+
userWorkloadTestNs = "user-workload-test"
21+
serviceMonitorTestName = "prometheus-example-monitor"
2122
)
2223

2324
var (
@@ -171,9 +172,9 @@ func deployUserApplication(t *testing.T, f *framework.Framework) error {
171172

172173
_, err = f.MonitoringClient.ServiceMonitors(userWorkloadTestNs).Create(ctx, &monitoringv1.ServiceMonitor{
173174
ObjectMeta: metav1.ObjectMeta{
174-
Name: "prometheus-example-monitor",
175+
Name: serviceMonitorTestName,
175176
Labels: map[string]string{
176-
"k8s-app": "prometheus-example-monitor",
177+
"k8s-app": serviceMonitorTestName,
177178
framework.E2eTestLabelName: framework.E2eTestLabelValue,
178179
},
179180
},

0 commit comments

Comments
 (0)