Skip to content

Commit 26d99b4

Browse files
Merge pull request #2595 from danwinship/crd-metrics
MON-4282: Multi-tenant support for KSM's CRS
2 parents 26f9666 + a5c3356 commit 26d99b4

File tree

10 files changed

+38
-143
lines changed

10 files changed

+38
-143
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Note: This CHANGELOG is only for the monitoring team to track all monitoring related changes. Please see OpenShift release notes for official changes.
22

3+
## 4.20
4+
5+
- [#2595](https://github.com/openshift/cluster-monitoring-operator/pull/2595) Multi-tenant support for KSM's CRS feature-set downstream.
6+
37
## 4.18
48

59
- [#2503](https://github.com/openshift/cluster-monitoring-operator/issues/2503) Expose `scrapeInterval` setting for UWM Prometheus.

assets/kube-state-metrics/cluster-role.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,17 +129,17 @@ rules:
129129
- list
130130
- watch
131131
- apiGroups:
132-
- autoscaling.k8s.io
132+
- apiextensions.k8s.io
133133
resources:
134-
- verticalpodautoscalers
134+
- customresourcedefinitions
135135
verbs:
136+
- get
136137
- list
137138
- watch
138139
- apiGroups:
139-
- apiextensions.k8s.io
140+
- autoscaling.k8s.io
140141
resources:
141-
- customresourcedefinitions
142+
- verticalpodautoscalers
142143
verbs:
143-
- get
144144
- list
145145
- watch

assets/kube-state-metrics/deployment.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ spec:
3636
- --port=8081
3737
- --telemetry-host=127.0.0.1
3838
- --telemetry-port=8082
39+
- --custom-resource-state-config-file=/etc/kube-state-metrics/custom-resource-state-configmap.yaml
3940
- |
4041
--metric-denylist=
4142
^kube_secret_labels$,

jsonnet/components/kube-state-metrics.libsonnet

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,18 +42,18 @@ function(params)
4242

4343
clusterRole+: {
4444
rules+: [
45-
{
46-
apiGroups: ['autoscaling.k8s.io'],
47-
resources: ['verticalpodautoscalers'],
48-
verbs: ['list', 'watch'],
49-
},
5045
// CRD read permissions are required for kube-state-metrics to support the CRS feature-set.
5146
// Refer: https://github.com/kubernetes/kube-state-metrics/pull/1851/files#diff-916e6863e1245c673b4e5965c98dc27bafbd72650fdb38ce65ea73ee6304e027R45-R47
5247
{
5348
apiGroups: ['apiextensions.k8s.io'],
5449
resources: ['customresourcedefinitions'],
5550
verbs: ['get', 'list', 'watch'],
5651
},
52+
{
53+
apiGroups: ['autoscaling.k8s.io'],
54+
resources: ['verticalpodautoscalers'],
55+
verbs: ['list', 'watch'],
56+
},
5757
],
5858
},
5959

@@ -173,8 +173,8 @@ function(params)
173173
kubeRbacProxySecret: generateSecret.staticAuthSecret(cfg.namespace, cfg.commonLabels, 'kube-state-metrics-kube-rbac-proxy-config'),
174174

175175
// This removes the upstream addon-resizer and all resource requests and
176-
// limits. Additionally configures the kube-rbac-proxies to use the serving
177-
// cert configured on the `Service` above.
176+
// limits. Additionally, it configures the kube-rbac-proxies to use the serving
177+
// cert configured on the `Service` above, and enables CRD metrics.
178178
//
179179
// The upstream kube-state-metrics Dockerfile defines a `VOLUME` directive
180180
// in `/tmp`. Although this is unused it will take some time for it to get
@@ -237,6 +237,7 @@ function(params)
237237
else
238238
c {
239239
args+: [
240+
'--custom-resource-state-config-file=/etc/kube-state-metrics/custom-resource-state-configmap.yaml',
240241
|||
241242
--metric-denylist=
242243
^kube_secret_labels$,
@@ -259,7 +260,6 @@ function(params)
259260
name: tmpVolumeName,
260261
readOnly: false,
261262
},
262-
// The custom resource state configmap is always mounted in the kube-state-metrics container and only when the VPA CRD is installed, CMO will add `--custom-resource-state-config-file` to the container arguments list.
263263
{
264264
mountPath: '/etc/kube-state-metrics',
265265
name: crsVolumeName,

manifests/0000_50_cluster-monitoring-operator_02-role.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -360,18 +360,18 @@ rules:
360360
- list
361361
- watch
362362
- apiGroups:
363-
- autoscaling.k8s.io
363+
- apiextensions.k8s.io
364364
resources:
365-
- verticalpodautoscalers
365+
- customresourcedefinitions
366366
verbs:
367+
- get
367368
- list
368369
- watch
369370
- apiGroups:
370-
- apiextensions.k8s.io
371+
- autoscaling.k8s.io
371372
resources:
372-
- customresourcedefinitions
373+
- verticalpodautoscalers
373374
verbs:
374-
- get
375375
- list
376376
- watch
377377
- apiGroups:

pkg/client/client.go

Lines changed: 3 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,12 @@ import (
6565
"k8s.io/klog/v2"
6666
apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1"
6767
aggregatorclient "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset"
68-
"k8s.io/utils/ptr"
6968
)
7069

7170
const (
72-
deleteTimeout = 10 * time.Minute
73-
metadataPrefix = "monitoring.openshift.io/"
74-
clusterConsole = "cluster"
75-
VerticalPodAutoscalerCRDMetadataName = "verticalpodautoscalers.autoscaling.k8s.io"
71+
deleteTimeout = 10 * time.Minute
72+
metadataPrefix = "monitoring.openshift.io/"
73+
clusterConsole = "cluster"
7674
)
7775

7876
type Client struct {
@@ -276,10 +274,6 @@ func (c *Client) KubernetesInterface() kubernetes.Interface {
276274
return c.kclient
277275
}
278276

279-
func (c *Client) ApiExtensionsInterface() apiextensionsclient.Interface {
280-
return c.eclient
281-
}
282-
283277
func (c *Client) EventRecorder() events.Recorder {
284278
return c.eventRecorder
285279
}
@@ -423,21 +417,6 @@ func (c *Client) ClusterOperatorListWatch(ctx context.Context, name string) *cac
423417
}
424418
}
425419

426-
func (c *Client) VerticalPodAutoscalerCRDListWatch(ctx context.Context) *cache.ListWatch {
427-
return &cache.ListWatch{
428-
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
429-
return c.eclient.ApiextensionsV1().CustomResourceDefinitions().List(ctx, metav1.ListOptions{
430-
FieldSelector: fields.OneTermEqualSelector("metadata.name", VerticalPodAutoscalerCRDMetadataName).String(),
431-
})
432-
},
433-
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
434-
return c.eclient.ApiextensionsV1().CustomResourceDefinitions().Watch(ctx, metav1.ListOptions{
435-
FieldSelector: fields.OneTermEqualSelector("metadata.name", VerticalPodAutoscalerCRDMetadataName).String(),
436-
})
437-
},
438-
}
439-
}
440-
441420
func (c *Client) HasRouteCapability(ctx context.Context) (bool, error) {
442421
_, err := c.oscclient.ConfigV1().ClusterOperators().Get(ctx, "ingress", metav1.GetOptions{})
443422
if apierrors.IsNotFound(err) {
@@ -1850,29 +1829,6 @@ func (c *Client) RegisterConsolePlugin(ctx context.Context, name string) error {
18501829
return nil
18511830
}
18521831

1853-
// VPACustomResourceDefinitionPresent checks if VerticalPodAutoscaler CRD is present in the cluster.
1854-
func (c *Client) VPACustomResourceDefinitionPresent(ctx context.Context, lastKnownVPACustomResourceDefinitionPresent *bool) (*bool, error) {
1855-
_, err := c.ApiExtensionsInterface().ApiextensionsV1().CustomResourceDefinitions().Get(ctx, VerticalPodAutoscalerCRDMetadataName, metav1.GetOptions{})
1856-
if err != nil {
1857-
// VPA CRD is absent.
1858-
if apierrors.IsNotFound(err) {
1859-
return ptr.To(false), nil
1860-
}
1861-
1862-
// See if we have an idea of the state of the CRD's presence before the transient error occurred.
1863-
// If we do, resort to that since we do not want this to cause unnecessary reconciles.
1864-
if lastKnownVPACustomResourceDefinitionPresent != nil {
1865-
return lastKnownVPACustomResourceDefinitionPresent, nil
1866-
}
1867-
1868-
// If we don't, throw.
1869-
return nil, fmt.Errorf("failed to get %s CRD: %w", VerticalPodAutoscalerCRDMetadataName, err)
1870-
}
1871-
1872-
// VPA CRD is present.
1873-
return ptr.To(true), nil
1874-
}
1875-
18761832
// mergeMetadata merges labels and annotations from `existing` map into `required` one where `required` has precedence
18771833
// over `existing` keys and values. Additionally, function performs filtering of labels and annotations from `exiting` map
18781834
// where keys starting from string defined in `metadataPrefix` are deleted. This prevents issues with preserving stale

pkg/manifests/manifests.go

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -316,9 +316,6 @@ var (
316316
KubeRbacProxyTLSCipherSuitesFlag = "--tls-cipher-suites="
317317
KubeRbacProxyMinTLSVersionFlag = "--tls-min-version="
318318

319-
kubeStateMetricsCustomResourceStateConfigFileFlag = "--custom-resource-state-config-file="
320-
kubeStateMetricsCustomResourceStateConfigFile = "/etc/kube-state-metrics/custom-resource-state-configmap.yaml"
321-
322319
AuthProxyExternalURLFlag = "-external-url="
323320
AuthProxyCookieDomainFlag = "-cookie-domain="
324321
AuthProxyRedirectURLFlag = "-redirect-url="
@@ -748,8 +745,7 @@ func (f *Factory) KubeStateMetricsMinimalServiceMonitor() (*monv1.ServiceMonitor
748745
return f.NewServiceMonitor(f.assets.MustNewAssetSlice(KubeStateMetricsMinimalServiceMonitor))
749746
}
750747

751-
func (f *Factory) KubeStateMetricsDeployment(enableCRSMetrics bool) (*appsv1.Deployment, error) {
752-
flagCRSConfigFile := kubeStateMetricsCustomResourceStateConfigFileFlag + kubeStateMetricsCustomResourceStateConfigFile
748+
func (f *Factory) KubeStateMetricsDeployment() (*appsv1.Deployment, error) {
753749
d, err := f.NewDeployment(f.assets.MustNewAssetSlice(KubeStateMetricsDeployment))
754750
if err != nil {
755751
return nil, err
@@ -764,9 +760,6 @@ func (f *Factory) KubeStateMetricsDeployment(enableCRSMetrics bool) (*appsv1.Dep
764760
if f.config.ClusterMonitoringConfiguration.KubeStateMetricsConfig.Resources != nil {
765761
d.Spec.Template.Spec.Containers[i].Resources = *f.config.ClusterMonitoringConfiguration.KubeStateMetricsConfig.Resources
766762
}
767-
if enableCRSMetrics {
768-
d.Spec.Template.Spec.Containers[i].Args = append(container.Args, flagCRSConfigFile)
769-
}
770763
}
771764
}
772765

pkg/manifests/manifests_test.go

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ func TestUnconfiguredManifests(t *testing.T) {
272272
t.Fatal(err)
273273
}
274274

275-
_, err = f.KubeStateMetricsDeployment(false)
275+
_, err = f.KubeStateMetricsDeployment()
276276
if err != nil {
277277
t.Fatal(err)
278278
}
@@ -3694,14 +3694,13 @@ func TestKubeStateMetrics(t *testing.T) {
36943694

36953695
f := NewFactory("openshift-monitoring", "openshift-user-workload-monitoring", c, defaultInfrastructureReader(), &fakeProxyReader{}, NewAssets(assetsPath), &APIServerConfig{}, &configv1.Console{})
36963696

3697-
d, err := f.KubeStateMetricsDeployment(true)
3697+
d, err := f.KubeStateMetricsDeployment()
36983698
if err != nil {
36993699
t.Fatal(err)
37003700
}
37013701

37023702
kubeRbacProxyTLSCipherSuitesArg := ""
37033703
kubeRbacProxyMinTLSVersionArg := ""
3704-
gotKubeStateMetricsCustomResourceStateConfigFile := ""
37053704
for _, container := range d.Spec.Template.Spec.Containers {
37063705
switch container.Name {
37073706
case "kube-state-metrics":
@@ -3712,7 +3711,6 @@ func TestKubeStateMetrics(t *testing.T) {
37123711
t.Fatal("kube-state-metrics resources incorrectly configured")
37133712
}
37143713

3715-
gotKubeStateMetricsCustomResourceStateConfigFile = getContainerArgValue(d.Spec.Template.Spec.Containers, kubeStateMetricsCustomResourceStateConfigFileFlag, container.Name)
37163714
case "kube-rbac-proxy-self", "kube-rbac-proxy-main":
37173715
if container.Image != "docker.io/openshift/origin-kube-rbac-proxy:latest" {
37183716
t.Fatalf("%s image incorrectly configured", container.Name)
@@ -3745,11 +3743,7 @@ func TestKubeStateMetrics(t *testing.T) {
37453743
t.Fatal("kube-state-metrics topology spread constraints WhenUnsatisfiable not configured correctly")
37463744
}
37473745

3748-
if gotKubeStateMetricsCustomResourceStateConfigFile != kubeStateMetricsCustomResourceStateConfigFileFlag+kubeStateMetricsCustomResourceStateConfigFile {
3749-
t.Fatal("expected kube-state-metrics to enable custom-resource-state metrics")
3750-
}
3751-
3752-
d2, err := f.KubeStateMetricsDeployment(true)
3746+
d2, err := f.KubeStateMetricsDeployment()
37533747
if err != nil {
37543748
t.Fatal(err)
37553749
}

pkg/operator/operator.go

Lines changed: 2 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ import (
3333
"github.com/openshift/library-go/pkg/operator/events"
3434
certapiv1 "k8s.io/api/certificates/v1"
3535
v1 "k8s.io/api/core/v1"
36-
apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
3736
apierrors "k8s.io/apimachinery/pkg/api/errors"
3837
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3938
k8sruntime "k8s.io/apimachinery/pkg/runtime"
@@ -205,27 +204,6 @@ type Operator struct {
205204

206205
ruleController *alert.RuleController
207206
relabelController *alert.RelabelConfigController
208-
209-
// lastKnownVPACustomResourceDefinitionPresent is a boolean pointer that
210-
// remembers the presence of the VPA CRD in the cluster between
211-
// kube-state-metrics task reconciliations. It is used to determine
212-
// whether to enable kube-state-metrics custom-resource-state-based
213-
// metrics for VPA CRs, even in cases where the Kube API may emit a
214-
// transient error (errors excluding `IsNotFound`) on the VPA CRD `GET`
215-
// requests (to determine its presence).
216-
// * `true` indicates that the VPA CRD is already present in the
217-
// cluster, and the custom-resource-state-based metrics can be safely
218-
// enabled.
219-
// * `false` indicates that the VPA CRD is not present in the cluster,
220-
// and enabling the custom-resource-state-based metrics will cause
221-
// kube-state-metrics to error (affecting `KubeStateMetricsListErrors`).
222-
// * `nil` indicates that the presence of the VPA CRD is unknown, and
223-
// the operator will attempt to determine the presence of the VPA CRD in
224-
// the current reconciliation cycle, and remember its state in the next
225-
// one. In the case where the VPA CRD is added or removed between
226-
// reconciliations, the variable "forgets" it, and is set to `nil`,
227-
// triggering a check on the next cycle.
228-
lastKnownVPACustomResourceDefinitionPresent *bool
229207
}
230208

231209
func New(
@@ -446,20 +424,6 @@ func New(
446424
}
447425
o.informers = append(o.informers, informer)
448426

449-
informer = cache.NewSharedIndexInformer(
450-
o.client.VerticalPodAutoscalerCRDListWatch(ctx),
451-
&apiextv1.CustomResourceDefinition{}, resyncPeriod, cache.Indexers{},
452-
)
453-
// Only trigger reconciliation on the addition or removal of VPA CRDs.
454-
_, err = informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
455-
AddFunc: o.handleEvent,
456-
DeleteFunc: o.handleEvent,
457-
})
458-
if err != nil {
459-
return nil, err
460-
}
461-
o.informers = append(o.informers, informer)
462-
463427
kubeInformersOperatorNS := informers.NewSharedInformerFactoryWithOptions(
464428
c.KubernetesInterface(),
465429
resyncPeriod,
@@ -667,10 +631,7 @@ func (o *Operator) handleEvent(obj interface{}) {
667631
*configv1.APIServer,
668632
*configv1.Console,
669633
*configv1.ClusterOperator,
670-
*configv1.ClusterVersion,
671-
// Currently, the CRDs that trigger reconciliation are:
672-
// * verticalpodautoscalers.autoscaling.k8s.io
673-
*apiextv1.CustomResourceDefinition:
634+
*configv1.ClusterVersion:
674635
// Log GroupKind and Name of the obj
675636
rtObj := obj.(k8sruntime.Object)
676637
gk := rtObj.GetObjectKind().GroupVersionKind().GroupKind()
@@ -819,18 +780,6 @@ func (o *Operator) sync(ctx context.Context, key string) error {
819780
klog.Warningf("Fail to load ConsoleConfig, AlertManager's externalURL may be outdated")
820781
}
821782

822-
// Enable kube-state-metrics' custom-resource-state-based metrics if VPA CRD is installed within the cluster.
823-
o.lastKnownVPACustomResourceDefinitionPresent, err = o.client.VPACustomResourceDefinitionPresent(ctx, o.lastKnownVPACustomResourceDefinitionPresent)
824-
if err != nil {
825-
// Throw on all transient errors.
826-
return fmt.Errorf("unable to guess the desired state for kube-state-metrics' custom-resource-state metrics enablement: %w", err)
827-
} else {
828-
// If we didn't get an error, we can safely assume that the CRD is deterministically either present or absent.
829-
if *o.lastKnownVPACustomResourceDefinitionPresent {
830-
klog.Infof("%s CRD found, enabling kube-state-metrics' custom-resource-state-based metrics", client.VerticalPodAutoscalerCRDMetadataName)
831-
}
832-
}
833-
834783
factory := manifests.NewFactory(
835784
o.namespace,
836785
o.namespaceUserWorkload,
@@ -859,7 +808,7 @@ func (o *Operator) sync(ctx context.Context, key string) error {
859808
newTaskSpec("Prometheus", tasks.NewPrometheusTask(o.client, factory, config)),
860809
newTaskSpec("Alertmanager", tasks.NewAlertmanagerTask(o.client, factory, config)),
861810
newTaskSpec("NodeExporter", tasks.NewNodeExporterTask(o.client, factory)),
862-
newTaskSpec("KubeStateMetrics", tasks.NewKubeStateMetricsTask(o.client, factory, *o.lastKnownVPACustomResourceDefinitionPresent)),
811+
newTaskSpec("KubeStateMetrics", tasks.NewKubeStateMetricsTask(o.client, factory)),
863812
newTaskSpec("OpenshiftStateMetrics", tasks.NewOpenShiftStateMetricsTask(o.client, factory)),
864813
newTaskSpec("MetricsServer", tasks.NewMetricsServerTask(ctx, o.namespace, o.client, factory, config)),
865814
newTaskSpec("TelemeterClient", tasks.NewTelemeterClientTask(o.client, factory, config)),

0 commit comments

Comments
 (0)