Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions Documentation/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ The `ClusterMonitoringConfiguration` resource defines settings that customize th
| prometheusOperator | *[PrometheusOperatorConfig](#prometheusoperatorconfig) | `PrometheusOperatorConfig` defines settings for the Prometheus Operator component. |
| prometheusOperatorAdmissionWebhook | *[PrometheusOperatorAdmissionWebhookConfig](#prometheusoperatoradmissionwebhookconfig) | `PrometheusOperatorAdmissionWebhookConfig` defines settings for the Prometheus Operator's admission webhook component. |
| openshiftStateMetrics | *[OpenShiftStateMetricsConfig](#openshiftstatemetricsconfig) | `OpenShiftMetricsConfig` defines settings for the `openshift-state-metrics` agent. |
| telemeterClient | *[TelemeterClientConfig](#telemeterclientconfig) | `TelemeterClientConfig` defines settings for the Telemeter Client component. |
| telemetryConfig | *[TelemetryConfig](#telemetryconfig) | TelemetryConfig defines settings for telemetry reporting. |
| thanosQuerier | *[ThanosQuerierConfig](#thanosquerierconfig) | `ThanosQuerierConfig` defines settings for the Thanos Querier component. |
| nodeExporter | [NodeExporterConfig](#nodeexporterconfig) | `NodeExporterConfig` defines settings for the `node-exporter` agent. |
| monitoringPlugin | *[MonitoringPluginConfig](#monitoringpluginconfig) | `MonitoringPluginConfig` defines settings for the monitoring `console-plugin`. |
Expand Down Expand Up @@ -568,9 +568,6 @@ The `TLSConfig` resource configures the settings for TLS connections.
#### Required
- ` nodeSelector `
- ` tolerations `

<em>appears in: [ClusterMonitoringConfiguration](#clustermonitoringconfiguration)</em>

| Property | Type | Description |
| -------- | ---- | ----------- |
| nodeSelector | map[string]string | Defines the nodes on which the pods are scheduled. |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ The `ClusterMonitoringConfiguration` resource defines settings that customize th

|openshiftStateMetrics|*link:openshiftstatemetricsconfig.adoc[OpenShiftStateMetricsConfig]|`OpenShiftMetricsConfig` defines settings for the `openshift-state-metrics` agent.

|telemeterClient|*link:telemeterclientconfig.adoc[TelemeterClientConfig]|`TelemeterClientConfig` defines settings for the Telemeter Client component.
|telemetryConfig|*link:telemetryconfig.adoc[TelemetryConfig]|TelemetryConfig defines settings for telemetry reporting.

|thanosQuerier|*link:thanosquerierconfig.adoc[ThanosQuerierConfig]|`ThanosQuerierConfig` defines settings for the Thanos Querier component.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@
* `nodeSelector`
* `tolerations`


Appears in: link:clustermonitoringconfiguration.adoc[ClusterMonitoringConfiguration]

[options="header"]
|===
| Property | Type | Description
Expand Down
406 changes: 406 additions & 0 deletions assets/telemetry-recording-rules/prometheus-rule.yaml

Large diffs are not rendered by default.

41 changes: 41 additions & 0 deletions jsonnet/components/telemetry-recording-rules.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Parse the telemetry config to extract properly grouped matchers
local telemetryConfigYaml = std.parseYaml(importstr '../../manifests/0000_50_cluster-monitoring-operator_04-config.yaml');
local telemetryMatches = std.parseYaml(telemetryConfigYaml.data['metrics.yaml']).matches;

// Generate individual recording rules for each properly grouped telemetry matcher
local generateTelemetryRules() = [
{
record: 'telemetry:metric',
# We keep track of the metric name in a label. For regex matchers this is
# required, so we might as well do it consistently.
# Otherwise Prometheus can log `execution: vector cannot contain metrics with the same labelset`
# since the metric name is dropped while querying. See also https://github.com/prometheus/prometheus/issues/11397
# We reset the correct label name in the remote_write config.
expr: 'label_replace(sum without(pod, container) (%s),"original_name_label","$1","__name__", "(.+)")' % match
}
for match in telemetryMatches
];

function(params) {
local cfg = params,
local telemetryRules = generateTelemetryRules(),

prometheusRule: {
apiVersion: 'monitoring.coreos.com/v1',
kind: 'PrometheusRule',
metadata: {
labels: cfg.commonLabels + {
'role': 'telemetry-rules',
},
name: 'telemetry-recording-rules',
namespace: cfg.namespace,
},
spec: {
groups: [{
name: 'telemetry-recording.rules',
interval: '4m30s',
rules: telemetryRules,
}],
},
},
}
7 changes: 7 additions & 0 deletions jsonnet/main.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ local thanosQuerier = import './components/thanos-querier.libsonnet';

local openshiftStateMetrics = import './components/openshift-state-metrics.libsonnet';
local telemeterClient = import './components/telemeter-client.libsonnet';
local telemetryRecordingRules = import './components/telemetry-recording-rules.libsonnet';

// Common configuration
local commonConfig = {
Expand Down Expand Up @@ -386,6 +387,10 @@ local inCluster =
},
},
},
telemetryRecordingRules: {
namespace: $.values.common.namespace,
commonLabels+: $.values.common.commonLabels,
},
},

// Objects
Expand Down Expand Up @@ -430,6 +435,7 @@ local inCluster =
telemeterClient: telemeterClient($.values.telemeterClient),
monitoringPlugin: monitoringPlugin($.values.monitoringPlugin),
openshiftStateMetrics: openshiftStateMetrics($.values.openshiftStateMetrics),
telemetryRecordingRules: telemetryRecordingRules($.values.telemetryRecordingRules),
} +
(import './utils/anti-affinity.libsonnet') +
(import 'github.com/prometheus-operator/kube-prometheus/jsonnet/kube-prometheus/addons/ksm-lite.libsonnet') +
Expand Down Expand Up @@ -535,6 +541,7 @@ setTerminationMessagePolicy(
{ ['thanos-querier/' + name]: inCluster.thanosQuerier[name] for name in std.objectFields(inCluster.thanosQuerier) } +
{ ['thanos-ruler/' + name]: inCluster.thanosRuler[name] for name in std.objectFields(inCluster.thanosRuler) } +
{ ['control-plane/' + name]: inCluster.controlPlane[name] for name in std.objectFields(inCluster.controlPlane) } +
{ ['telemetry-recording-rules/' + name]: inCluster.telemetryRecordingRules[name] for name in std.objectFields(inCluster.telemetryRecordingRules) } +
{ ['manifests/' + name]: inCluster.manifests[name] for name in std.objectFields(inCluster.manifests) } +
{}
)
Expand Down
4 changes: 0 additions & 4 deletions manifests/image-references
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,6 @@ spec:
from:
kind: DockerImage
name: quay.io/openshift/origin-kube-rbac-proxy:latest
- name: telemeter
from:
kind: DockerImage
name: quay.io/openshift/origin-telemeter:latest
- name: prom-label-proxy
from:
kind: DockerImage
Expand Down
62 changes: 46 additions & 16 deletions pkg/manifests/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,20 @@ type Audit struct {
Profile auditv1.Level `json:"profile"`
}

func (cfg *TelemetryConfig) IsEnabled() bool {
if cfg == nil {
return false
}

if (cfg.Enabled != nil && !*cfg.Enabled) ||
cfg.ClusterID == "" ||
cfg.Token == "" {
return false
}

return true
}

func (cfg *TelemeterClientConfig) IsEnabled() bool {
if cfg == nil {
return false
Expand Down Expand Up @@ -439,9 +453,18 @@ func (c *Config) applyDefaults() {
if c.ClusterMonitoringConfiguration.HTTPConfig == nil {
c.ClusterMonitoringConfiguration.HTTPConfig = &HTTPConfig{}
}
if c.ClusterMonitoringConfiguration.TelemeterClientConfig == nil {
c.ClusterMonitoringConfiguration.TelemeterClientConfig = &TelemeterClientConfig{
TelemeterServerURL: "https://infogw.api.openshift.com/",
if c.ClusterMonitoringConfiguration.TelemetryConfig == nil {
if c.ClusterMonitoringConfiguration.TelemeterClientConfig != nil {
c.ClusterMonitoringConfiguration.TelemetryConfig = &TelemetryConfig{
ClusterID: c.ClusterMonitoringConfiguration.TelemeterClientConfig.ClusterID,
Enabled: c.ClusterMonitoringConfiguration.TelemeterClientConfig.Enabled,
TelemeterServerURL: c.ClusterMonitoringConfiguration.TelemeterClientConfig.TelemeterServerURL,
Token: c.ClusterMonitoringConfiguration.TelemeterClientConfig.Token,
}
} else {
c.ClusterMonitoringConfiguration.TelemetryConfig = &TelemetryConfig{
TelemeterServerURL: "https://infogw.api.openshift.com/metrics/v1/receive",
}
}
}

Expand Down Expand Up @@ -515,13 +538,10 @@ func (c *Config) SetTelemetryMatches(matches []string) {

func (c *Config) SetRemoteWrite(rw bool) {
c.RemoteWrite = rw
if c.RemoteWrite && c.ClusterMonitoringConfiguration.TelemeterClientConfig.TelemeterServerURL == "https://infogw.api.openshift.com/" {
c.ClusterMonitoringConfiguration.TelemeterClientConfig.TelemeterServerURL = "https://infogw.api.openshift.com/metrics/v1/receive"
}
}

func (c *Config) LoadClusterID(load func() (*configv1.ClusterVersion, error)) error {
if c.ClusterMonitoringConfiguration.TelemeterClientConfig.ClusterID != "" {
if c.ClusterMonitoringConfiguration.TelemetryConfig.ClusterID != "" {
return nil
}

Expand All @@ -530,12 +550,12 @@ func (c *Config) LoadClusterID(load func() (*configv1.ClusterVersion, error)) er
return fmt.Errorf("error loading cluster version: %w", err)
}

c.ClusterMonitoringConfiguration.TelemeterClientConfig.ClusterID = string(cv.Spec.ClusterID)
c.ClusterMonitoringConfiguration.TelemetryConfig.ClusterID = string(cv.Spec.ClusterID)
return nil
}

func (c *Config) LoadToken(load func() (*v1.Secret, error)) error {
if c.ClusterMonitoringConfiguration.TelemeterClientConfig.Token != "" {
if c.ClusterMonitoringConfiguration.TelemetryConfig.Token != "" {
return nil
}

Expand All @@ -560,7 +580,7 @@ func (c *Config) LoadToken(load func() (*v1.Secret, error)) error {
return fmt.Errorf("unmarshaling pull secret failed: %w", err)
}

c.ClusterMonitoringConfiguration.TelemeterClientConfig.Token = ps.Auths.COC.Auth
c.ClusterMonitoringConfiguration.TelemetryConfig.Token = ps.Auths.COC.Auth
return nil
}

Expand Down Expand Up @@ -639,13 +659,23 @@ func (c *Config) Precheck() error {
}

// Highlight deprecated config fields.
var d float64
if c.ClusterMonitoringConfiguration.K8sPrometheusAdapter != nil {
klog.Infof("k8sPrometheusAdapter is a deprecated config use metricsServer instead")
d = 1
{
var d float64
if c.ClusterMonitoringConfiguration.K8sPrometheusAdapter != nil {
klog.Infof("k8sPrometheusAdapter is a deprecated config use metricsServer instead")
d = 1
}
// Prometheus-Adapter is replaced with Metrics Server by default from 4.16
metrics.DeprecatedConfig.WithLabelValues("openshift-monitoring/cluster-monitoring-config", "k8sPrometheusAdapter", "4.16").Set(d)
}
{
var d float64
if c.ClusterMonitoringConfiguration.TelemeterClientConfig != nil {
klog.Infof("telemeterClientConfig is a deprecated config use telemetryConfig instead")
d = 1
}
metrics.DeprecatedConfig.WithLabelValues("openshift-monitoring/cluster-monitoring-config", "telemeterClientConfig", "4.21").Set(d)
}
// Prometheus-Adapter is replaced with Metrics Server by default from 4.16
metrics.DeprecatedConfig.WithLabelValues("openshift-monitoring/cluster-monitoring-config", "k8sPrometheusAdapter", "4.16").Set(d)

// TODO: remove after 4.19
// Only to assist with the migration to Prometheus 3; fail early if Alertmanager v1 is still in use.
Expand Down
Loading