diff --git a/CHANGELOG.md b/CHANGELOG.md index 7cf701b1b1..94bb7e2d9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Note: This CHANGELOG is only for the monitoring team to track all monitoring related changes. Please see OpenShift release notes for official changes. +## 4.21 + +- [#2339](https://github.com/openshift/cluster-monitoring-operator/pull/2339) Add support to sysctl node-exporter collector + ## 4.20 - [#2595](https://github.com/openshift/cluster-monitoring-operator/pull/2595) Multi-tenant support for KSM's CRS feature-set downstream. diff --git a/Documentation/api.md b/Documentation/api.md index 4f8a1d2ab1..26a1590101 100644 --- a/Documentation/api.md +++ b/Documentation/api.md @@ -34,6 +34,7 @@ Configuring Cluster Monitoring is optional. If the config does not exist or is e * [NodeExporterCollectorNetClassConfig](#nodeexportercollectornetclassconfig) * [NodeExporterCollectorNetDevConfig](#nodeexportercollectornetdevconfig) * [NodeExporterCollectorProcessesConfig](#nodeexportercollectorprocessesconfig) +* [NodeExporterCollectorSysctlConfig](#nodeexportercollectorsysctlconfig) * [NodeExporterCollectorSystemdConfig](#nodeexportercollectorsystemdconfig) * [NodeExporterCollectorTcpStatConfig](#nodeexportercollectortcpstatconfig) * [NodeExporterConfig](#nodeexporterconfig) @@ -249,6 +250,7 @@ The `NodeExporterCollectorConfig` resource defines settings for individual colle | mountstats | [NodeExporterCollectorMountStatsConfig](#nodeexportercollectormountstatsconfig) | Defines the configuration of the `mountstats` collector, which collects statistics about NFS volume I/O activities. Disabled by default. | | ksmd | [NodeExporterCollectorKSMDConfig](#nodeexportercollectorksmdconfig) | Defines the configuration of the `ksmd` collector, which collects statistics from the kernel same-page merger daemon. Disabled by default. | | processes | [NodeExporterCollectorProcessesConfig](#nodeexportercollectorprocessesconfig) | Defines the configuration of the `processes` collector, which collects statistics from processes and threads running in the system. Disabled by default. | +| sysctl | [NodeExporterCollectorSysctlConfig](#nodeexportercollectorsysctlconfig) | Defines the configuration of the `sysctl` collector, which collects sysctl metrics. Disabled by default. | | systemd | [NodeExporterCollectorSystemdConfig](#nodeexportercollectorsystemdconfig) | Defines the configuration of the `systemd` collector, which collects statistics on the systemd daemon and its managed services. Disabled by default. | [Back to TOC](#table-of-contents) @@ -344,6 +346,23 @@ The `NodeExporterCollectorProcessesConfig` resource works as an on/off switch fo [Back to TOC](#table-of-contents) +## NodeExporterCollectorSysctlConfig + +#### Description + +The `NodeExporterCollectorSysctlConfig` resource works as an on/off switch for the `sysctl` collector of the `node-exporter` agent. Caution! Exposing metrics like kernel.random.uuid can disrupt Prometheus, as it generates new data series with every scrape. Use this option judiciously! By default, the `sysctl` collector is disabled. + + +appears in: [NodeExporterCollectorConfig](#nodeexportercollectorconfig) + +| Property | Type | Description | +| -------- | ---- | ----------- | +| enabled | bool | A Boolean flag that enables or disables the `sysctl` collector. | +| includeSysctlMetrics | []string | A list of numeric sysctl values. Note that a sysctl can contain multiple values, for example: `net.ipv4.tcp_rmem = 4096\t131072\t6291456`. Using `includeSysctlMetrics: ['net.ipv4.tcp_rmem']` the collector will expose: `node_sysctl_net_ipv4_tcp_rmem{index=\"0\"} 4096`, `node_sysctl_net_ipv4_tcp_rmem{index=\"1\"} 131072`, `node_sysctl_net_ipv4_tcp_rmem{index=\"2\"} 6291456`. If the indexes have defined meaning like in this case, the values can be mapped to multiple metrics: `includeSysctlMetrics: ['net.ipv4.tcp_rmem:min,default,max']`. The collector will expose these metrics as such: `node_sysctl_net_ipv4_tcp_rmem_min 4096`, `node_sysctl_net_ipv4_tcp_rmem_default 131072`, `node_sysctl_net_ipv4_tcp_rmem_max 6291456`. | +| includeInfoSysctlMetrics | []string | A list of string sysctl values. For example: `includeSysctlMetrics: ['kernel.core_pattern', 'kernel.seccomp.actions_avail = kill_process kill_thread']`. The collector will expose these metrics as such: `node_sysctl_info{name=\"kernel.core_pattern\", value=\"core\"} 1`, `node_sysctl_info{name=\"kernel.seccomp.actions_avail\", index=\"0\", value=\"kill_process\"} 1`, `node_sysctl_info{name=\"kernel.seccomp.actions_avail\", index=\"1\", value=\"kill_thread\"} 1`, ... | + +[Back to TOC](#table-of-contents) + ## NodeExporterCollectorSystemdConfig #### Description diff --git a/Documentation/openshiftdocs/index.adoc b/Documentation/openshiftdocs/index.adoc index 7dcc4dc7bd..efddc88d5e 100644 --- a/Documentation/openshiftdocs/index.adoc +++ b/Documentation/openshiftdocs/index.adoc @@ -54,6 +54,7 @@ The configuration file itself is always defined under the `config.yaml` key in t * link:modules/nodeexportercollectornetclassconfig.adoc[NodeExporterCollectorNetClassConfig] * link:modules/nodeexportercollectornetdevconfig.adoc[NodeExporterCollectorNetDevConfig] * link:modules/nodeexportercollectorprocessesconfig.adoc[NodeExporterCollectorProcessesConfig] +* link:modules/nodeexportercollectorsysctlconfig.adoc[NodeExporterCollectorSysctlConfig] * link:modules/nodeexportercollectorsystemdconfig.adoc[NodeExporterCollectorSystemdConfig] * link:modules/nodeexportercollectortcpstatconfig.adoc[NodeExporterCollectorTcpStatConfig] * link:modules/nodeexporterconfig.adoc[NodeExporterConfig] diff --git a/Documentation/openshiftdocs/modules/nodeexportercollectorconfig.adoc b/Documentation/openshiftdocs/modules/nodeexportercollectorconfig.adoc index 7d5e2fbbe9..7470c6d522 100644 --- a/Documentation/openshiftdocs/modules/nodeexportercollectorconfig.adoc +++ b/Documentation/openshiftdocs/modules/nodeexportercollectorconfig.adoc @@ -34,6 +34,8 @@ Appears in: link:nodeexporterconfig.adoc[NodeExporterConfig] |processes|link:nodeexportercollectorprocessesconfig.adoc[NodeExporterCollectorProcessesConfig]|Defines the configuration of the `processes` collector, which collects statistics from processes and threads running in the system. Disabled by default. +|sysctl|link:nodeexportercollectorsysctlconfig.adoc[NodeExporterCollectorSysctlConfig]|Defines the configuration of the `sysctl` collector, which collects sysctl metrics. Disabled by default. + |systemd|link:nodeexportercollectorsystemdconfig.adoc[NodeExporterCollectorSystemdConfig]|Defines the configuration of the `systemd` collector, which collects statistics on the systemd daemon and its managed services. Disabled by default. |=== diff --git a/Documentation/openshiftdocs/modules/nodeexportercollectorsysctlconfig.adoc b/Documentation/openshiftdocs/modules/nodeexportercollectorsysctlconfig.adoc new file mode 100644 index 0000000000..6c405e2bae --- /dev/null +++ b/Documentation/openshiftdocs/modules/nodeexportercollectorsysctlconfig.adoc @@ -0,0 +1,29 @@ +// DO NOT EDIT THE CONTENT IN THIS FILE. It is automatically generated from the + // source code for the Cluster Monitoring Operator. Any changes made to this + // file will be overwritten when the content is re-generated. If you wish to + // make edits, read the docgen utility instructions in the source code for the + // CMO. + :_content-type: ASSEMBLY + +== NodeExporterCollectorSysctlConfig + +=== Description + +The `NodeExporterCollectorSysctlConfig` resource works as an on/off switch for the `sysctl` collector of the `node-exporter` agent. Caution! Exposing metrics like kernel.random.uuid can disrupt Prometheus, as it generates new data series with every scrape. Use this option judiciously! By default, the `sysctl` collector is disabled. + + + +Appears in: link:nodeexportercollectorconfig.adoc[NodeExporterCollectorConfig] + +[options="header"] +|=== +| Property | Type | Description +|enabled|bool|A Boolean flag that enables or disables the `sysctl` collector. + +|includeSysctlMetrics|[]string|A list of numeric sysctl values. Note that a sysctl can contain multiple values, for example: `net.ipv4.tcp_rmem = 4096\t131072\t6291456`. Using `includeSysctlMetrics: ['net.ipv4.tcp_rmem']` the collector will expose: `node_sysctl_net_ipv4_tcp_rmem{index=\"0\"} 4096`, `node_sysctl_net_ipv4_tcp_rmem{index=\"1\"} 131072`, `node_sysctl_net_ipv4_tcp_rmem{index=\"2\"} 6291456`. If the indexes have defined meaning like in this case, the values can be mapped to multiple metrics: `includeSysctlMetrics: ['net.ipv4.tcp_rmem:min,default,max']`. The collector will expose these metrics as such: `node_sysctl_net_ipv4_tcp_rmem_min 4096`, `node_sysctl_net_ipv4_tcp_rmem_default 131072`, `node_sysctl_net_ipv4_tcp_rmem_max 6291456`. + +|includeInfoSysctlMetrics|[]string|A list of string sysctl values. For example: `includeSysctlMetrics: ['kernel.core_pattern', 'kernel.seccomp.actions_avail = kill_process kill_thread']`. The collector will expose these metrics as such: `node_sysctl_info{name=\"kernel.core_pattern\", value=\"core\"} 1`, `node_sysctl_info{name=\"kernel.seccomp.actions_avail\", index=\"0\", value=\"kill_process\"} 1`, `node_sysctl_info{name=\"kernel.seccomp.actions_avail\", index=\"1\", value=\"kill_thread\"} 1`, ... + +|=== + +link:../index.adoc[Back to TOC] diff --git a/pkg/manifests/config.go b/pkg/manifests/config.go index 0ca071ee87..54f5d94b3e 100644 --- a/pkg/manifests/config.go +++ b/pkg/manifests/config.go @@ -355,6 +355,11 @@ func defaultClusterMonitoringConfiguration() ClusterMonitoringConfiguration { Systemd: NodeExporterCollectorSystemdConfig{ Enabled: false, }, + Sysctl: NodeExporterCollectorSysctlConfig{ + Enabled: false, + IncludeSysctlMetrics: []string{}, + IncludeInfoSysctlMetrics: []string{}, + }, }, }, } diff --git a/pkg/manifests/manifests.go b/pkg/manifests/manifests.go index 3642321fed..ec318f133d 100644 --- a/pkg/manifests/manifests.go +++ b/pkg/manifests/manifests.go @@ -881,6 +881,25 @@ func (f *Factory) updateNodeExporterArgs(args []string) ([]string, error) { args = setArg(args, "--no-collector.tcpstat", "") } + if f.config.ClusterMonitoringConfiguration.NodeExporterConfig.Collectors.Sysctl.Enabled { + includeSysctlMetrics := f.config.ClusterMonitoringConfiguration.NodeExporterConfig.Collectors.Sysctl.IncludeSysctlMetrics + includeInfoSysctlMetrics := f.config.ClusterMonitoringConfiguration.NodeExporterConfig.Collectors.Sysctl.IncludeInfoSysctlMetrics + + args = setArg(args, "--collector.sysctl", "") + + sysctlSet := uniqueSet(includeSysctlMetrics) + for _, sysctl := range sysctlSet { + args = append(args, fmt.Sprintf("--collector.sysctl.include=%s", sysctl)) + } + + sysctlSet = uniqueSet(includeInfoSysctlMetrics) + for _, sysctl := range sysctlSet { + args = append(args, fmt.Sprintf("--collector.sysctl.include-info=%s", sysctl)) + } + } else { + args = setArg(args, "--no-collector.sysctl", "") + } + var excludedDevices string if f.config.ClusterMonitoringConfiguration.NodeExporterConfig.Collectors.NetDev.Enabled || f.config.ClusterMonitoringConfiguration.NodeExporterConfig.Collectors.NetClass.Enabled { @@ -2365,6 +2384,18 @@ func setArg(args []string, argName string, argValue string) []string { return args } +func uniqueSet(input []string) []string { + uniqueMap := make(map[string]struct{}) + var unique []string + for _, str := range input { + if _, ok := uniqueMap[str]; !ok { + uniqueMap[str] = struct{}{} + unique = append(unique, str) + } + } + return unique +} + func (f *Factory) PrometheusRuleValidatingWebhook() (*admissionv1.ValidatingWebhookConfiguration, error) { return f.NewValidatingWebhook(f.assets.MustNewAssetSlice(AdmissionWebhookRuleValidatingWebhook)) } diff --git a/pkg/manifests/manifests_test.go b/pkg/manifests/manifests_test.go index a040194a01..51eccfd64a 100644 --- a/pkg/manifests/manifests_test.go +++ b/pkg/manifests/manifests_test.go @@ -3381,6 +3381,7 @@ func TestNodeExporterCollectorSettings(t *testing.T) { name: "default config", config: "", argsPresent: []string{"--no-collector.cpufreq", + "--no-collector.sysctl", "--no-collector.tcpstat", "--collector.netdev", "--collector.netclass", @@ -3393,6 +3394,7 @@ func TestNodeExporterCollectorSettings(t *testing.T) { "--no-collector.systemd", }, argsAbsent: []string{"--collector.cpufreq", + "--collector.sysctl", "--collector.tcpstat", "--no-collector.netdev", "--no-collector.netclass", @@ -3558,6 +3560,38 @@ nodeExporter: "--collector.systemd.unit-include=^(network.+|nss.+)$"}, argsAbsent: []string{"--no-collector.systemd"}, }, + { + name: "disable sysctl collector", + config: ` +nodeExporter: + collectors: + sysctl: + enabled: false +`, + argsPresent: []string{"--no-collector.sysctl"}, + argsAbsent: []string{"--collector.sysctl"}, + }, + { + name: "enable sysctl collector", + config: ` +nodeExporter: + collectors: + sysctl: + enabled: true + includeSysctlMetrics: + - net.ipv4.tcp_rmem:min,default,max + - net.ipv4.tcp_mem + includeInfoSysctlMetrics: + - kernel.core_pattern + - kernel.seccomp.actions_avail +`, + argsPresent: []string{"--collector.sysctl", + "--collector.sysctl.include=net.ipv4.tcp_rmem:min,default,max", + "--collector.sysctl.include=net.ipv4.tcp_mem", + "--collector.sysctl.include-info=kernel.core_pattern", + "--collector.sysctl.include-info=kernel.seccomp.actions_avail"}, + argsAbsent: []string{"--no-collector.sysctl"}, + }, } for _, test := range tests { diff --git a/pkg/manifests/types.go b/pkg/manifests/types.go index 287d1bc61e..a0b653abfc 100644 --- a/pkg/manifests/types.go +++ b/pkg/manifests/types.go @@ -372,6 +372,9 @@ type NodeExporterCollectorConfig struct { // Defines the configuration of the `processes` collector, which collects statistics from processes and threads running in the system. // Disabled by default. Processes NodeExporterCollectorProcessesConfig `json:"processes,omitempty"` + // Defines the configuration of the `sysctl` collector, which collects sysctl metrics. + // Disabled by default. + Sysctl NodeExporterCollectorSysctlConfig `json:"sysctl,omitempty"` // Defines the configuration of the `systemd` collector, which collects statistics on the systemd daemon and its managed services. // Disabled by default. Systemd NodeExporterCollectorSystemdConfig `json:"systemd,omitempty"` @@ -397,6 +400,38 @@ type NodeExporterCollectorTcpStatConfig struct { Enabled bool `json:"enabled,omitempty"` } +// The `NodeExporterCollectorSysctlConfig` resource works as an on/off switch for +// the `sysctl` collector of the `node-exporter` agent. +// Caution! Exposing metrics like kernel.random.uuid can disrupt Prometheus, as it generates new data series with every scrape. Use this option judiciously! +// By default, the `sysctl` collector is disabled. +type NodeExporterCollectorSysctlConfig struct { + // A Boolean flag that enables or disables the `sysctl` collector. + Enabled bool `json:"enabled,omitempty"` + // A list of numeric sysctl values. + // Note that a sysctl can contain multiple values, for example: + // `net.ipv4.tcp_rmem = 4096 131072 6291456`. + // Using `includeSysctlMetrics: ['net.ipv4.tcp_rmem']` the collector will expose: + // `node_sysctl_net_ipv4_tcp_rmem{index="0"} 4096`, + // `node_sysctl_net_ipv4_tcp_rmem{index="1"} 131072`, + // `node_sysctl_net_ipv4_tcp_rmem{index="2"} 6291456`. + // If the indexes have defined meaning like in this case, the values can be mapped to multiple metrics: + // `includeSysctlMetrics: ['net.ipv4.tcp_rmem:min,default,max']`. + // The collector will expose these metrics as such: + // `node_sysctl_net_ipv4_tcp_rmem_min 4096`, + // `node_sysctl_net_ipv4_tcp_rmem_default 131072`, + // `node_sysctl_net_ipv4_tcp_rmem_max 6291456`. + IncludeSysctlMetrics []string `json:"includeSysctlMetrics,omitempty"` + // A list of string sysctl values. + // For example: + // `includeSysctlMetrics: ['kernel.core_pattern', 'kernel.seccomp.actions_avail = kill_process kill_thread']`. + // The collector will expose these metrics as such: + // `node_sysctl_info{name="kernel.core_pattern", value="core"} 1`, + // `node_sysctl_info{name="kernel.seccomp.actions_avail", index="0", value="kill_process"} 1`, + // `node_sysctl_info{name="kernel.seccomp.actions_avail", index="1", value="kill_thread"} 1`, + // ... + IncludeInfoSysctlMetrics []string `json:"includeInfoSysctlMetrics,omitempty"` +} + // The `NodeExporterCollectorNetDevConfig` resource works as an on/off switch for // the `netdev` collector of the `node-exporter` agent. // By default, the `netdev` collector is enabled.