diff --git a/common-lib/common/panels/cpu/stat/count.libsonnet b/common-lib/common/panels/cpu/stat/count.libsonnet index 915060b49..4b9a9ed77 100644 --- a/common-lib/common/panels/cpu/stat/count.libsonnet +++ b/common-lib/common/panels/cpu/stat/count.libsonnet @@ -16,6 +16,7 @@ base { stylize(allLayers=true): (if allLayers then super.stylize() else {}) - + generic.info.stylize(allLayers=false), + + generic.info.stylize(allLayers=false) + + g.panel.stat.standardOptions.withUnit('none'), } diff --git a/windows-active-directory-mixin/mixin.libsonnet b/windows-active-directory-mixin/mixin.libsonnet index 23a9c558b..938a47671 100644 --- a/windows-active-directory-mixin/mixin.libsonnet +++ b/windows-active-directory-mixin/mixin.libsonnet @@ -3,16 +3,13 @@ local alerts = import './alerts/alerts.libsonnet'; local g = import './g.libsonnet'; local var = g.dashboard.variable; local activedirectorymixin = - windowsobservlib.new( - filteringSelector='job=~"integrations/windows_exporter"', - uid='active-directory', - groupLabels=['job'], - instanceLabels=['instance'], - ) + windowsobservlib.new() { config+: { enableADDashboard: true, + groupLabels: ['job'], + uid: 'active-directory', }, } diff --git a/windows-mixin/mixin.libsonnet b/windows-mixin/mixin.libsonnet index 2bae55c62..0aea86ab7 100644 --- a/windows-mixin/mixin.libsonnet +++ b/windows-mixin/mixin.libsonnet @@ -4,11 +4,7 @@ local winlib = import 'windows-observ-lib/main.libsonnet'; local config = (import 'config.libsonnet')._config; { local windows = - winlib.new( - dashboardNamePrefix=config.dashboardNamePrefix, - uid=config.uid, - filteringSelector=config.filteringSelector, - ) + winlib.new() + { config+: config, diff --git a/windows-observ-lib/config.libsonnet b/windows-observ-lib/config.libsonnet new file mode 100644 index 000000000..8ff0a6e0b --- /dev/null +++ b/windows-observ-lib/config.libsonnet @@ -0,0 +1,42 @@ +{ + // any modular library should include as inputs: + // 'dashboardNamePrefix' - Use as prefix for all Dashboards and (optional) rule groups + // 'filteringSelector' - Static selector to apply to ALL dashboard variables of type query, panel queries, alerts and recording rules. + // 'groupLabels' - one or more labels that can be used to identify 'group' of instances. In simple cases, can be 'job' or 'cluster'. + // 'instanceLabels' - one or more labels that can be used to identify single entity of instances. In simple cases, can be 'instance' or 'pod'. + // 'uid' - UID to prefix all dashboards original uids + groupLabels: ['job'], + instanceLabels: ['instance'], + filteringSelector: 'job=~".*windows.*"', + dashboardTags: ['windows'], + uid: 'windows', + dashboardNamePrefix: '', + + // optional + ignoreVolumes: 'HarddiskVolume.*', + alertsCPUThresholdWarning: '90', + alertMemoryUsageThresholdCritical: '90', + alertDiskUsageThresholdCritical: '90', + dashboardPeriod: 'now-1h', + dashboardTimezone: 'default', + dashboardRefresh: '1m', + + // optional Windows AD + alertsHighPendingReplicationOperations: 50, // count + alertsHighReplicationSyncRequestFailures: 0, // count + alertsHighPasswordChanges: 25, // count + alertsMetricsDownJobName: 'integrations/windows_exporter', + enableADDashboard: false, + + // logs lib related + enableLokiLogs: true, + extraLogLabels: ['channel', 'source', 'keywords', 'level'], + logsVolumeGroupBy: 'level', + showLogsVolume: true, + logsExtraFilters: + ||| + | label_format timestamp="{{__timestamp__}}" + | drop channel_extracted,source_extracted,computer_extracted,level_extracted,keywords_extracted + | line_format `{{ if eq "[[instance]]" ".*" }}{{ alignLeft 25 .instance}}|{{end}}{{alignLeft 12 .channel }}| {{ alignLeft 25 .source}}| {{ .message }}` + |||, +} diff --git a/windows-observ-lib/main.libsonnet b/windows-observ-lib/main.libsonnet index 1ee5a7a7a..df7145a01 100644 --- a/windows-observ-lib/main.libsonnet +++ b/windows-observ-lib/main.libsonnet @@ -1,4 +1,5 @@ local alerts = import './alerts.libsonnet'; +local config = import './config.libsonnet'; local dashboards = import './dashboards.libsonnet'; local datasources = import './datasources.libsonnet'; local g = import './g.libsonnet'; @@ -7,58 +8,10 @@ local targets = import './targets.libsonnet'; local commonlib = import 'common-lib/common/main.libsonnet'; { - new( - filteringSelector, - groupLabels=['job'], - instanceLabels=['instance'], - dashboardNamePrefix='', - dashboardTags=[uid], - uid, - ): { + new(): { local this = self, - config: { - // any modular library should include as inputs: - // 'dashboardNamePrefix' - Use as prefix for all Dashboards and (optional) rule groups - // 'filteringSelector' - Static selector to apply to ALL dashboard variables of type query, panel queries, alerts and recording rules. - // 'groupLabels' - one or more labels that can be used to identify 'group' of instances. In simple cases, can be 'job' or 'cluster'. - // 'instanceLabels' - one or more labels that can be used to identify single entity of instances. In simple cases, can be 'instance' or 'pod'. - // 'uid' - UID to prefix all dashboards original uids - groupLabels: groupLabels, - instanceLabels: instanceLabels, - filteringSelector: filteringSelector, - dashboardTags: dashboardTags, - uid: uid, - dashboardNamePrefix: dashboardNamePrefix, - - // optional - ignoreVolumes: 'HarddiskVolume.*', - alertsCPUThresholdWarning: '90', - alertMemoryUsageThresholdCritical: '90', - alertDiskUsageThresholdCritical: '90', - dashboardPeriod: 'now-1h', - dashboardTimezone: 'default', - dashboardRefresh: '1m', - - // optional Windows AD - alertsHighPendingReplicationOperations: 50, // count - alertsHighReplicationSyncRequestFailures: 0, // count - alertsHighPasswordChanges: 25, // count - alertsMetricsDownJobName: 'integrations/windows_exporter', - enableADDashboard: false, - - // logs lib related - enableLokiLogs: true, - extraLogLabels: ['channel', 'source', 'keywords', 'level'], - logsVolumeGroupBy: 'level', - showLogsVolume: true, - logsExtraFilters: - ||| - | label_format timestamp="{{__timestamp__}}" - | drop channel_extracted,source_extracted,computer_extracted,level_extracted,keywords_extracted - | line_format `{{ if eq "[[instance]]" ".*" }}{{ alignLeft 25 .instance}}|{{end}}{{alignLeft 12 .channel }}| {{ alignLeft 25 .source}}| {{ .message }}` - |||, - }, + config: config, grafana: { variables: commonlib.variables.new( filteringSelector=this.config.filteringSelector, @@ -74,7 +27,7 @@ local commonlib = import 'common-lib/common/main.libsonnet'; reboot: commonlib.annotations.reboot.new( title='Reboot', target=this.grafana.targets.reboot, - instanceLabels=std.join(',', instanceLabels), + instanceLabels=std.join(',', this.config.instanceLabels), ) + commonlib.annotations.base.withTagKeys(std.join(',', this.config.groupLabels + this.config.instanceLabels)), } @@ -124,5 +77,8 @@ local commonlib = import 'common-lib/common/main.libsonnet'; }, }, + withConfigMixin(config): { + config+: config, + }, } diff --git a/windows-observ-lib/mixin.libsonnet b/windows-observ-lib/mixin.libsonnet new file mode 100644 index 000000000..4e356b0a6 --- /dev/null +++ b/windows-observ-lib/mixin.libsonnet @@ -0,0 +1,10 @@ +local windowslib = import './main.libsonnet'; +{ + _config:: {}, + _windowsib:: + windowslib.new() + + windowslib.withConfigMixin(self._config), + grafanaDashboards+:: self._windowsib.grafana.dashboards, + prometheusAlerts+:: self._windowsib.prometheus.alerts, + prometheusRules+:: self._windowsib.prometheus.recordingRules, +} diff --git a/windows-observ-lib/panels.libsonnet b/windows-observ-lib/panels.libsonnet index 8170502fb..6b436c612 100644 --- a/windows-observ-lib/panels.libsonnet +++ b/windows-observ-lib/panels.libsonnet @@ -187,7 +187,8 @@ local utils = commonlib.utils; A high number of context switches or interrupts can indicate that the system is overloaded or that there are problems with specific devices or processes. ||| - ), + ) + + g.panel.timeSeries.standardOptions.withUnit('short'), systemExceptions: commonlib.panels.generic.timeSeries.base.new( 'System calls and exceptions', @@ -195,14 +196,16 @@ local utils = commonlib.utils; t.windowsSystemExceptions, t.windowsSystemCalls, ], - ), + ) + + g.panel.timeSeries.standardOptions.withUnit('short'), systemThreads: commonlib.panels.generic.timeSeries.base.new( 'System threads', targets=[ t.windowsSystemThreads, ], - ), + ) + + g.panel.timeSeries.standardOptions.withUnit('short'), timeNtpStatus: commonlib.panels.system.statusHistory.ntp.new( 'NTP status', @@ -224,7 +227,7 @@ local utils = commonlib.utils; Time offset: Absolute time offset between the system clock and the chosen time source, in seconds. ||| ) - + g.panel.timeSeries.standardOptions.withUnit('seconds') + + g.panel.timeSeries.standardOptions.withUnit('s') + g.panel.timeSeries.standardOptions.withNoValue('No data. Please check that "time" collector is enabled.'), cpuCount: commonlib.panels.cpu.stat.count.new(targets=[t.cpuCount]), cpuUsageTs: commonlib.panels.cpu.timeSeries.utilization.new(targets=[t.cpuUsage]), @@ -242,17 +245,19 @@ local utils = commonlib.utils; CPU usage by different modes. ||| ), - cpuQueue: commonlib.panels.generic.timeSeries.base.new( - 'CPU average queue size', - targets=[t.cpuQueue], - description=||| - The CPU average queue size in Windows, often referred to as the "Processor Queue Length" or "CPU Queue Length," is a metric that measures the number of threads or tasks waiting to be processed by the central processing unit (CPU) at a given moment. - It is an essential performance indicator that reflects the workload and responsiveness of the CPU. - When the CPU queue length is high, it indicates that there are more tasks in line for processing than the CPU can handle immediately. + cpuQueue: + commonlib.panels.generic.timeSeries.base.new( + 'CPU average queue size', + targets=[t.cpuQueue], + description=||| + The CPU average queue size in Windows, often referred to as the "Processor Queue Length" or "CPU Queue Length," is a metric that measures the number of threads or tasks waiting to be processed by the central processing unit (CPU) at a given moment. + It is an essential performance indicator that reflects the workload and responsiveness of the CPU. + When the CPU queue length is high, it indicates that there are more tasks in line for processing than the CPU can handle immediately. - This can lead to system slowdowns, decreased responsiveness, and potential performance issues. High CPU queue lengths are often associated with CPU saturation, where the CPU is struggling to keep up with the demands placed on it. - ||| - ), + This can lead to system slowdowns, decreased responsiveness, and potential performance issues. High CPU queue lengths are often associated with CPU saturation, where the CPU is struggling to keep up with the demands placed on it. + ||| + ) + + g.panel.timeSeries.standardOptions.withUnit('short'), memoryTotalBytes: commonlib.panels.memory.stat.total.new(targets=[t.memoryTotalBytes]), memoryPageTotalBytes: commonlib.panels.memory.stat.total.new( @@ -363,7 +368,7 @@ local utils = commonlib.utils; targets=[t.osInfo], description="System's hostname." ) - { options+: { reduceOptions+: { fields: '/^hostname$/' } } }, + { options+: { reduceOptions+: { fields: '/^instance$/' } } }, networkErrorsAndDroppedPerSec: commonlib.panels.network.timeSeries.errors.new( 'Network errors and dropped packets',