Skip to content

Commit 51f5244

Browse files
committed
Always generate CRD metrics
When the VPA metrics were (re-)added, code was added to not try to watch VPAs unless the CRDs had been created, to avoid spurious errors. But it appears that ksm has since been fixed to not need this; it watches for CRD creation/deletion itself, and ignores the VPA metric definitions if the VPA CRDs don't exist. Signed-off-by: Dan Winship <[email protected]>
1 parent 09f9bc0 commit 51f5244

File tree

8 files changed

+23
-128
lines changed

8 files changed

+23
-128
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Note: This CHANGELOG is only for the monitoring team to track all monitoring related changes. Please see OpenShift release notes for official changes.
22

3+
## 4.20
4+
5+
- [#2595](https://github.com/openshift/cluster-monitoring-operator/pull/2595) Multi-tenant support for KSM's CRS feature-set downstream.
6+
37
## 4.18
48

59
- [#2503](https://github.com/openshift/cluster-monitoring-operator/issues/2503) Expose `scrapeInterval` setting for UWM Prometheus.

assets/kube-state-metrics/deployment.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ spec:
3636
- --port=8081
3737
- --telemetry-host=127.0.0.1
3838
- --telemetry-port=8082
39+
- --custom-resource-state-config-file=/etc/kube-state-metrics/custom-resource-state-configmap.yaml
3940
- |
4041
--metric-denylist=
4142
^kube_secret_labels$,

jsonnet/components/kube-state-metrics.libsonnet

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,8 @@ function(params)
173173
kubeRbacProxySecret: generateSecret.staticAuthSecret(cfg.namespace, cfg.commonLabels, 'kube-state-metrics-kube-rbac-proxy-config'),
174174

175175
// This removes the upstream addon-resizer and all resource requests and
176-
// limits. Additionally configures the kube-rbac-proxies to use the serving
177-
// cert configured on the `Service` above.
176+
// limits. Additionally, it configures the kube-rbac-proxies to use the serving
177+
// cert configured on the `Service` above, and enables CRD metrics.
178178
//
179179
// The upstream kube-state-metrics Dockerfile defines a `VOLUME` directive
180180
// in `/tmp`. Although this is unused it will take some time for it to get
@@ -237,6 +237,7 @@ function(params)
237237
else
238238
c {
239239
args+: [
240+
'--custom-resource-state-config-file=/etc/kube-state-metrics/custom-resource-state-configmap.yaml',
240241
|||
241242
--metric-denylist=
242243
^kube_secret_labels$,
@@ -259,7 +260,6 @@ function(params)
259260
name: tmpVolumeName,
260261
readOnly: false,
261262
},
262-
// The custom resource state configmap is always mounted in the kube-state-metrics container and only when the VPA CRD is installed, CMO will add `--custom-resource-state-config-file` to the container arguments list.
263263
{
264264
mountPath: '/etc/kube-state-metrics',
265265
name: crsVolumeName,

pkg/client/client.go

Lines changed: 3 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,12 @@ import (
6565
"k8s.io/klog/v2"
6666
apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1"
6767
aggregatorclient "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset"
68-
"k8s.io/utils/ptr"
6968
)
7069

7170
const (
72-
deleteTimeout = 10 * time.Minute
73-
metadataPrefix = "monitoring.openshift.io/"
74-
clusterConsole = "cluster"
75-
VerticalPodAutoscalerCRDMetadataName = "verticalpodautoscalers.autoscaling.k8s.io"
71+
deleteTimeout = 10 * time.Minute
72+
metadataPrefix = "monitoring.openshift.io/"
73+
clusterConsole = "cluster"
7674
)
7775

7876
type Client struct {
@@ -276,10 +274,6 @@ func (c *Client) KubernetesInterface() kubernetes.Interface {
276274
return c.kclient
277275
}
278276

279-
func (c *Client) ApiExtensionsInterface() apiextensionsclient.Interface {
280-
return c.eclient
281-
}
282-
283277
func (c *Client) EventRecorder() events.Recorder {
284278
return c.eventRecorder
285279
}
@@ -423,21 +417,6 @@ func (c *Client) ClusterOperatorListWatch(ctx context.Context, name string) *cac
423417
}
424418
}
425419

426-
func (c *Client) VerticalPodAutoscalerCRDListWatch(ctx context.Context) *cache.ListWatch {
427-
return &cache.ListWatch{
428-
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
429-
return c.eclient.ApiextensionsV1().CustomResourceDefinitions().List(ctx, metav1.ListOptions{
430-
FieldSelector: fields.OneTermEqualSelector("metadata.name", VerticalPodAutoscalerCRDMetadataName).String(),
431-
})
432-
},
433-
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
434-
return c.eclient.ApiextensionsV1().CustomResourceDefinitions().Watch(ctx, metav1.ListOptions{
435-
FieldSelector: fields.OneTermEqualSelector("metadata.name", VerticalPodAutoscalerCRDMetadataName).String(),
436-
})
437-
},
438-
}
439-
}
440-
441420
func (c *Client) HasRouteCapability(ctx context.Context) (bool, error) {
442421
_, err := c.oscclient.ConfigV1().ClusterOperators().Get(ctx, "ingress", metav1.GetOptions{})
443422
if apierrors.IsNotFound(err) {
@@ -1850,29 +1829,6 @@ func (c *Client) RegisterConsolePlugin(ctx context.Context, name string) error {
18501829
return nil
18511830
}
18521831

1853-
// VPACustomResourceDefinitionPresent checks if VerticalPodAutoscaler CRD is present in the cluster.
1854-
func (c *Client) VPACustomResourceDefinitionPresent(ctx context.Context, lastKnownVPACustomResourceDefinitionPresent *bool) (*bool, error) {
1855-
_, err := c.ApiExtensionsInterface().ApiextensionsV1().CustomResourceDefinitions().Get(ctx, VerticalPodAutoscalerCRDMetadataName, metav1.GetOptions{})
1856-
if err != nil {
1857-
// VPA CRD is absent.
1858-
if apierrors.IsNotFound(err) {
1859-
return ptr.To(false), nil
1860-
}
1861-
1862-
// See if we have an idea of the state of the CRD's presence before the transient error occurred.
1863-
// If we do, resort to that since we do not want this to cause unnecessary reconciles.
1864-
if lastKnownVPACustomResourceDefinitionPresent != nil {
1865-
return lastKnownVPACustomResourceDefinitionPresent, nil
1866-
}
1867-
1868-
// If we don't, throw.
1869-
return nil, fmt.Errorf("failed to get %s CRD: %w", VerticalPodAutoscalerCRDMetadataName, err)
1870-
}
1871-
1872-
// VPA CRD is present.
1873-
return ptr.To(true), nil
1874-
}
1875-
18761832
// mergeMetadata merges labels and annotations from `existing` map into `required` one where `required` has precedence
18771833
// over `existing` keys and values. Additionally, function performs filtering of labels and annotations from `exiting` map
18781834
// where keys starting from string defined in `metadataPrefix` are deleted. This prevents issues with preserving stale

pkg/manifests/manifests.go

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -316,9 +316,6 @@ var (
316316
KubeRbacProxyTLSCipherSuitesFlag = "--tls-cipher-suites="
317317
KubeRbacProxyMinTLSVersionFlag = "--tls-min-version="
318318

319-
kubeStateMetricsCustomResourceStateConfigFileFlag = "--custom-resource-state-config-file="
320-
kubeStateMetricsCustomResourceStateConfigFile = "/etc/kube-state-metrics/custom-resource-state-configmap.yaml"
321-
322319
AuthProxyExternalURLFlag = "-external-url="
323320
AuthProxyCookieDomainFlag = "-cookie-domain="
324321
AuthProxyRedirectURLFlag = "-redirect-url="
@@ -748,8 +745,7 @@ func (f *Factory) KubeStateMetricsMinimalServiceMonitor() (*monv1.ServiceMonitor
748745
return f.NewServiceMonitor(f.assets.MustNewAssetSlice(KubeStateMetricsMinimalServiceMonitor))
749746
}
750747

751-
func (f *Factory) KubeStateMetricsDeployment(enableCRSMetrics bool) (*appsv1.Deployment, error) {
752-
flagCRSConfigFile := kubeStateMetricsCustomResourceStateConfigFileFlag + kubeStateMetricsCustomResourceStateConfigFile
748+
func (f *Factory) KubeStateMetricsDeployment() (*appsv1.Deployment, error) {
753749
d, err := f.NewDeployment(f.assets.MustNewAssetSlice(KubeStateMetricsDeployment))
754750
if err != nil {
755751
return nil, err
@@ -764,9 +760,6 @@ func (f *Factory) KubeStateMetricsDeployment(enableCRSMetrics bool) (*appsv1.Dep
764760
if f.config.ClusterMonitoringConfiguration.KubeStateMetricsConfig.Resources != nil {
765761
d.Spec.Template.Spec.Containers[i].Resources = *f.config.ClusterMonitoringConfiguration.KubeStateMetricsConfig.Resources
766762
}
767-
if enableCRSMetrics {
768-
d.Spec.Template.Spec.Containers[i].Args = append(container.Args, flagCRSConfigFile)
769-
}
770763
}
771764
}
772765

pkg/manifests/manifests_test.go

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ func TestUnconfiguredManifests(t *testing.T) {
272272
t.Fatal(err)
273273
}
274274

275-
_, err = f.KubeStateMetricsDeployment(false)
275+
_, err = f.KubeStateMetricsDeployment()
276276
if err != nil {
277277
t.Fatal(err)
278278
}
@@ -3694,14 +3694,13 @@ func TestKubeStateMetrics(t *testing.T) {
36943694

36953695
f := NewFactory("openshift-monitoring", "openshift-user-workload-monitoring", c, defaultInfrastructureReader(), &fakeProxyReader{}, NewAssets(assetsPath), &APIServerConfig{}, &configv1.Console{})
36963696

3697-
d, err := f.KubeStateMetricsDeployment(true)
3697+
d, err := f.KubeStateMetricsDeployment()
36983698
if err != nil {
36993699
t.Fatal(err)
37003700
}
37013701

37023702
kubeRbacProxyTLSCipherSuitesArg := ""
37033703
kubeRbacProxyMinTLSVersionArg := ""
3704-
gotKubeStateMetricsCustomResourceStateConfigFile := ""
37053704
for _, container := range d.Spec.Template.Spec.Containers {
37063705
switch container.Name {
37073706
case "kube-state-metrics":
@@ -3712,7 +3711,6 @@ func TestKubeStateMetrics(t *testing.T) {
37123711
t.Fatal("kube-state-metrics resources incorrectly configured")
37133712
}
37143713

3715-
gotKubeStateMetricsCustomResourceStateConfigFile = getContainerArgValue(d.Spec.Template.Spec.Containers, kubeStateMetricsCustomResourceStateConfigFileFlag, container.Name)
37163714
case "kube-rbac-proxy-self", "kube-rbac-proxy-main":
37173715
if container.Image != "docker.io/openshift/origin-kube-rbac-proxy:latest" {
37183716
t.Fatalf("%s image incorrectly configured", container.Name)
@@ -3745,11 +3743,7 @@ func TestKubeStateMetrics(t *testing.T) {
37453743
t.Fatal("kube-state-metrics topology spread constraints WhenUnsatisfiable not configured correctly")
37463744
}
37473745

3748-
if gotKubeStateMetricsCustomResourceStateConfigFile != kubeStateMetricsCustomResourceStateConfigFileFlag+kubeStateMetricsCustomResourceStateConfigFile {
3749-
t.Fatal("expected kube-state-metrics to enable custom-resource-state metrics")
3750-
}
3751-
3752-
d2, err := f.KubeStateMetricsDeployment(true)
3746+
d2, err := f.KubeStateMetricsDeployment()
37533747
if err != nil {
37543748
t.Fatal(err)
37553749
}

pkg/operator/operator.go

Lines changed: 2 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ import (
3333
"github.com/openshift/library-go/pkg/operator/events"
3434
certapiv1 "k8s.io/api/certificates/v1"
3535
v1 "k8s.io/api/core/v1"
36-
apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
3736
apierrors "k8s.io/apimachinery/pkg/api/errors"
3837
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3938
k8sruntime "k8s.io/apimachinery/pkg/runtime"
@@ -205,27 +204,6 @@ type Operator struct {
205204

206205
ruleController *alert.RuleController
207206
relabelController *alert.RelabelConfigController
208-
209-
// lastKnownVPACustomResourceDefinitionPresent is a boolean pointer that
210-
// remembers the presence of the VPA CRD in the cluster between
211-
// kube-state-metrics task reconciliations. It is used to determine
212-
// whether to enable kube-state-metrics custom-resource-state-based
213-
// metrics for VPA CRs, even in cases where the Kube API may emit a
214-
// transient error (errors excluding `IsNotFound`) on the VPA CRD `GET`
215-
// requests (to determine its presence).
216-
// * `true` indicates that the VPA CRD is already present in the
217-
// cluster, and the custom-resource-state-based metrics can be safely
218-
// enabled.
219-
// * `false` indicates that the VPA CRD is not present in the cluster,
220-
// and enabling the custom-resource-state-based metrics will cause
221-
// kube-state-metrics to error (affecting `KubeStateMetricsListErrors`).
222-
// * `nil` indicates that the presence of the VPA CRD is unknown, and
223-
// the operator will attempt to determine the presence of the VPA CRD in
224-
// the current reconciliation cycle, and remember its state in the next
225-
// one. In the case where the VPA CRD is added or removed between
226-
// reconciliations, the variable "forgets" it, and is set to `nil`,
227-
// triggering a check on the next cycle.
228-
lastKnownVPACustomResourceDefinitionPresent *bool
229207
}
230208

231209
func New(
@@ -446,20 +424,6 @@ func New(
446424
}
447425
o.informers = append(o.informers, informer)
448426

449-
informer = cache.NewSharedIndexInformer(
450-
o.client.VerticalPodAutoscalerCRDListWatch(ctx),
451-
&apiextv1.CustomResourceDefinition{}, resyncPeriod, cache.Indexers{},
452-
)
453-
// Only trigger reconciliation on the addition or removal of VPA CRDs.
454-
_, err = informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
455-
AddFunc: o.handleEvent,
456-
DeleteFunc: o.handleEvent,
457-
})
458-
if err != nil {
459-
return nil, err
460-
}
461-
o.informers = append(o.informers, informer)
462-
463427
kubeInformersOperatorNS := informers.NewSharedInformerFactoryWithOptions(
464428
c.KubernetesInterface(),
465429
resyncPeriod,
@@ -667,10 +631,7 @@ func (o *Operator) handleEvent(obj interface{}) {
667631
*configv1.APIServer,
668632
*configv1.Console,
669633
*configv1.ClusterOperator,
670-
*configv1.ClusterVersion,
671-
// Currently, the CRDs that trigger reconciliation are:
672-
// * verticalpodautoscalers.autoscaling.k8s.io
673-
*apiextv1.CustomResourceDefinition:
634+
*configv1.ClusterVersion:
674635
// Log GroupKind and Name of the obj
675636
rtObj := obj.(k8sruntime.Object)
676637
gk := rtObj.GetObjectKind().GroupVersionKind().GroupKind()
@@ -819,18 +780,6 @@ func (o *Operator) sync(ctx context.Context, key string) error {
819780
klog.Warningf("Fail to load ConsoleConfig, AlertManager's externalURL may be outdated")
820781
}
821782

822-
// Enable kube-state-metrics' custom-resource-state-based metrics if VPA CRD is installed within the cluster.
823-
o.lastKnownVPACustomResourceDefinitionPresent, err = o.client.VPACustomResourceDefinitionPresent(ctx, o.lastKnownVPACustomResourceDefinitionPresent)
824-
if err != nil {
825-
// Throw on all transient errors.
826-
return fmt.Errorf("unable to guess the desired state for kube-state-metrics' custom-resource-state metrics enablement: %w", err)
827-
} else {
828-
// If we didn't get an error, we can safely assume that the CRD is deterministically either present or absent.
829-
if *o.lastKnownVPACustomResourceDefinitionPresent {
830-
klog.Infof("%s CRD found, enabling kube-state-metrics' custom-resource-state-based metrics", client.VerticalPodAutoscalerCRDMetadataName)
831-
}
832-
}
833-
834783
factory := manifests.NewFactory(
835784
o.namespace,
836785
o.namespaceUserWorkload,
@@ -859,7 +808,7 @@ func (o *Operator) sync(ctx context.Context, key string) error {
859808
newTaskSpec("Prometheus", tasks.NewPrometheusTask(o.client, factory, config)),
860809
newTaskSpec("Alertmanager", tasks.NewAlertmanagerTask(o.client, factory, config)),
861810
newTaskSpec("NodeExporter", tasks.NewNodeExporterTask(o.client, factory)),
862-
newTaskSpec("KubeStateMetrics", tasks.NewKubeStateMetricsTask(o.client, factory, *o.lastKnownVPACustomResourceDefinitionPresent)),
811+
newTaskSpec("KubeStateMetrics", tasks.NewKubeStateMetricsTask(o.client, factory)),
863812
newTaskSpec("OpenshiftStateMetrics", tasks.NewOpenShiftStateMetricsTask(o.client, factory)),
864813
newTaskSpec("MetricsServer", tasks.NewMetricsServerTask(ctx, o.namespace, o.client, factory, config)),
865814
newTaskSpec("TelemeterClient", tasks.NewTelemeterClientTask(o.client, factory, config)),

pkg/tasks/kubestatemetrics.go

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,14 @@ import (
2323
)
2424

2525
type KubeStateMetricsTask struct {
26-
client *client.Client
27-
factory *manifests.Factory
28-
enableCRSMetrics bool
26+
client *client.Client
27+
factory *manifests.Factory
2928
}
3029

31-
func NewKubeStateMetricsTask(client *client.Client, factory *manifests.Factory, enableCRSMetrics bool) *KubeStateMetricsTask {
30+
func NewKubeStateMetricsTask(client *client.Client, factory *manifests.Factory) *KubeStateMetricsTask {
3231
return &KubeStateMetricsTask{
33-
client: client,
34-
factory: factory,
35-
enableCRSMetrics: enableCRSMetrics,
32+
client: client,
33+
factory: factory,
3634
}
3735
}
3836

@@ -97,7 +95,7 @@ func (t *KubeStateMetricsTask) Run(ctx context.Context) error {
9795
return fmt.Errorf("reconciling %s/%s ConfigMap failed: %w", cm.Namespace, cm.Name, err)
9896
}
9997

100-
dep, err := t.factory.KubeStateMetricsDeployment(t.enableCRSMetrics)
98+
dep, err := t.factory.KubeStateMetricsDeployment()
10199
if err != nil {
102100
return fmt.Errorf("initializing kube-state-metrics Deployment failed: %w", err)
103101
}

0 commit comments

Comments
 (0)