Skip to content

Commit 6226b75

Browse files
authored
MSSQL Utilize More Libraries (#1450)
* mssql modernization * persist makefile * add prometheus_rules_out * apply suggestion from review * use signals API * make fmt * fix lint * pr feedback; minus the db as instance label * use guidance to adjust query generation so only database signals have the database filter * update plugin version to match * keep things that were increase as increase but up the minstep for proper viewing * remove duplicate and unneeded set * remove decorate dashboard
1 parent 1cd23af commit 6226b75

22 files changed

+2108
-3012
lines changed

mssql-mixin/alerts/alerts.libsonnet renamed to mssql-mixin/alerts.libsonnet

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,30 @@
11
{
2-
prometheusAlerts+:: {
3-
groups+: [
2+
new(this): {
3+
groups: [
44
{
55
name: 'MSSQLAlerts',
66
rules: [
77
{
88
alert: 'MSSQLHighNumberOfDeadlocks',
99
expr: |||
1010
increase(mssql_deadlocks_total{}[5m]) > %(alertsWarningDeadlocks5m)s
11-
||| % $._config,
11+
||| % this.config,
1212
'for': '5m',
1313
labels: {
1414
severity: 'warning',
1515
},
1616
annotations: {
17-
summary: 'There are deadlocks ocurring in the database.',
17+
summary: 'There are deadlocks occurring in the database.',
1818
description:
1919
('{{ printf "%%.2f" $value }} deadlocks have occurred over the last 5 minutes on {{$labels.instance}}, ' +
20-
'which is above threshold of %(alertsWarningDeadlocks5m)s deadlocks.') % $._config,
20+
'which is above threshold of %(alertsWarningDeadlocks5m)s deadlocks.') % this.config,
2121
},
2222
},
2323
{
2424
alert: 'MSSQLModerateReadStallTime',
2525
expr: |||
2626
1000 * increase(mssql_io_stall_seconds_total{operation="read"}[5m]) > %(alertsWarningModerateReadStallTimeMS)s
27-
||| % $._config,
27+
||| % this.config,
2828
'for': '5m',
2929
labels: {
3030
severity: 'warning',
@@ -33,14 +33,14 @@
3333
summary: 'There is a moderate amount of IO stall for database reads.',
3434
description:
3535
('{{ printf "%%.2f" $value }}ms of IO read stall has occurred on {{$labels.instance}}, ' +
36-
'which is above threshold of %(alertsWarningModerateReadStallTimeMS)sms.') % $._config,
36+
'which is above threshold of %(alertsWarningModerateReadStallTimeMS)sms.') % this.config,
3737
},
3838
},
3939
{
4040
alert: 'MSSQLHighReadStallTime',
4141
expr: |||
4242
1000 * increase(mssql_io_stall_seconds_total{operation="read"}[5m]) > %(alertsCriticalHighReadStallTimeMS)s
43-
||| % $._config,
43+
||| % this.config,
4444
'for': '5m',
4545
labels: {
4646
severity: 'critical',
@@ -49,14 +49,14 @@
4949
summary: 'There is a high amount of IO stall for database reads.',
5050
description:
5151
('{{ printf "%%.2f" $value }}ms of IO read stall has occurred on {{$labels.instance}}, ' +
52-
'which is above threshold of %(alertsCriticalHighReadStallTimeMS)sms.') % $._config,
52+
'which is above threshold of %(alertsCriticalHighReadStallTimeMS)sms.') % this.config,
5353
},
5454
},
5555
{
5656
alert: 'MSSQLModerateWriteStallTime',
5757
expr: |||
5858
1000 * increase(mssql_io_stall_seconds_total{operation="write"}[5m]) > %(alertsWarningModerateWriteStallTimeMS)s
59-
||| % $._config,
59+
||| % this.config,
6060
'for': '5m',
6161
labels: {
6262
severity: 'warning',
@@ -65,14 +65,14 @@
6565
summary: 'There is a moderate amount of IO stall for database writes.',
6666
description:
6767
('{{ printf "%%.2f" $value }}ms of IO write stall has occurred on {{$labels.instance}}, ' +
68-
'which is above threshold of %(alertsWarningModerateWriteStallTimeMS)sms.') % $._config,
68+
'which is above threshold of %(alertsWarningModerateWriteStallTimeMS)sms.') % this.config,
6969
},
7070
},
7171
{
7272
alert: 'MSSQLHighWriteStallTime',
7373
expr: |||
7474
1000 * increase(mssql_io_stall_seconds_total{operation="write"}[5m]) > %(alertsCriticalHighWriteStallTimeMS)s
75-
||| % $._config,
75+
||| % this.config,
7676
'for': '5m',
7777
labels: {
7878
severity: 'critical',
@@ -81,7 +81,7 @@
8181
summary: 'There is a high amount of IO stall for database writes.',
8282
description:
8383
('{{ printf "%%.2f" $value }}ms of IO write stall has occurred on {{$labels.instance}}, ' +
84-
'which is above threshold of %(alertsCriticalHighWriteStallTimeMS)sms.') % $._config,
84+
'which is above threshold of %(alertsCriticalHighWriteStallTimeMS)sms.') % this.config,
8585
},
8686
},
8787
],

mssql-mixin/config.libsonnet

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,40 @@
11
{
2-
_config+:: {
3-
enableMultiCluster: false,
4-
multiclusterSelector: 'job=~"$job"',
5-
mssqlSelector: if self.enableMultiCluster then 'job=~"$job", cluster=~"$cluster"' else 'job=~"$job"',
6-
dashboardTags: ['mssql-mixin'],
7-
dashboardPeriod: 'now-1h',
8-
dashboardTimezone: 'default',
9-
dashboardRefresh: '1m',
2+
local this = self,
3+
enableMultiCluster: false,
4+
filteringSelector: 'job=~"integrations/mssql"',
5+
groupLabels: ['job', 'cluster'],
6+
instanceLabels: ['instance'],
7+
databaseLabels: ['db'],
108

11-
// alert thresholds
12-
alertsWarningDeadlocks5m: 10,
13-
alertsWarningModerateReadStallTimeMS: 200,
14-
alertsCriticalHighReadStallTimeMS: 400,
15-
alertsWarningModerateWriteStallTimeMS: 200,
16-
alertsCriticalHighWriteStallTimeMS: 400,
179

18-
// enable Loki logs
19-
enableLokiLogs: true,
10+
dashboardTags: [self.uid],
11+
legendLabels: ['instance'],
12+
uid: 'mssql',
13+
dashboardNamePrefix: 'MSSQL',
14+
15+
// additional params
16+
dashboardPeriod: 'now-1h',
17+
dashboardTimezone: 'default',
18+
dashboardRefresh: '1m',
19+
metricsSource: 'prometheus',
20+
21+
// logs lib related
22+
enableLokiLogs: true,
23+
logLabels: self.groupLabels + self.instanceLabels,
24+
extraLogLabels: [], // Required by logs-lib
25+
logsVolumeGroupBy: 'level',
26+
showLogsVolume: true,
27+
28+
// alert thresholds
29+
alertsWarningDeadlocks5m: 10,
30+
alertsWarningModerateReadStallTimeMS: 200,
31+
alertsCriticalHighReadStallTimeMS: 400,
32+
alertsWarningModerateWriteStallTimeMS: 200,
33+
alertsCriticalHighWriteStallTimeMS: 400,
34+
35+
signals+: {
36+
memory: (import './signals/memory.libsonnet')(this),
37+
connections: (import './signals/connections.libsonnet')(this),
38+
database: (import './signals/database.libsonnet')(this),
2039
},
2140
}

mssql-mixin/dashboards.libsonnet

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
local g = import './g.libsonnet';
2+
local logslib = import 'logs-lib/logs/main.libsonnet';
3+
{
4+
local root = self,
5+
new(this)::
6+
local prefix = this.config.dashboardNamePrefix;
7+
local links = this.grafana.links;
8+
local tags = this.config.dashboardTags;
9+
local uid = g.util.string.slugify(this.config.uid);
10+
local vars = this.grafana.variables;
11+
local annotations = this.grafana.annotations;
12+
local refresh = this.config.dashboardRefresh;
13+
local period = this.config.dashboardPeriod;
14+
local timezone = this.config.dashboardTimezone;
15+
16+
{
17+
'mssql_overview.json':
18+
g.dashboard.new(prefix + ' overview')
19+
+ g.dashboard.withPanels(
20+
g.util.panel.resolveCollapsedFlagOnRows(
21+
g.util.grid.wrapPanels(
22+
[
23+
this.grafana.rows.connections + g.panel.row.withCollapsed(false),
24+
this.grafana.rows.memory + g.panel.row.withCollapsed(false),
25+
this.grafana.rows.database + g.panel.row.withCollapsed(false),
26+
],
27+
),
28+
)
29+
)
30+
+ root.applyCommon(
31+
vars.multiInstance,
32+
uid + '_mssql_overview',
33+
tags,
34+
links { mssqlOverview+:: {} },
35+
annotations,
36+
timezone,
37+
refresh,
38+
period
39+
),
40+
41+
'mssql_pages.json':
42+
g.dashboard.new(prefix + ' pages')
43+
+ g.dashboard.withPanels(
44+
g.util.panel.resolveCollapsedFlagOnRows(
45+
g.util.grid.wrapPanels(
46+
[
47+
this.grafana.rows.pages + g.panel.row.withCollapsed(false),
48+
]
49+
),
50+
)
51+
)
52+
+ root.applyCommon(
53+
vars.multiInstance,
54+
uid + '_mssql_pages',
55+
tags,
56+
links { mssqlPages+:: {} },
57+
annotations,
58+
timezone,
59+
refresh,
60+
period
61+
),
62+
63+
}
64+
+
65+
if this.config.enableLokiLogs then
66+
{
67+
'logs.json':
68+
logslib.new(
69+
prefix + ' logs',
70+
datasourceName=this.grafana.variables.datasources.loki.name,
71+
datasourceRegex=this.grafana.variables.datasources.loki.regex,
72+
filterSelector=this.config.filteringSelector,
73+
labels=this.config.groupLabels + this.config.extraLogLabels,
74+
formatParser=null,
75+
showLogsVolume=this.config.showLogsVolume,
76+
)
77+
{
78+
dashboards+:
79+
{
80+
logs+:
81+
root.applyCommon(super.logs.templating.list, uid=uid + '-logs', tags=tags, links=links { logs+:: {} }, annotations=annotations, timezone=timezone, refresh=refresh, period=period),
82+
},
83+
panels+:
84+
{
85+
logs+:
86+
g.panel.logs.options.withEnableLogDetails(true)
87+
+ g.panel.logs.options.withShowTime(false)
88+
+ g.panel.logs.options.withWrapLogMessage(false),
89+
},
90+
variables+: {
91+
toArray+: [
92+
this.grafana.variables.datasources.prometheus { hide: 2 },
93+
],
94+
},
95+
}.dashboards.logs,
96+
}
97+
else {},
98+
99+
applyCommon(vars, uid, tags, links, annotations, timezone, refresh, period):
100+
g.dashboard.withTags(tags)
101+
+ g.dashboard.withUid(uid)
102+
+ g.dashboard.withLinks(std.objectValues(links))
103+
+ g.dashboard.withTimezone(timezone)
104+
+ g.dashboard.withRefresh(refresh)
105+
+ g.dashboard.time.withFrom(period)
106+
+ g.dashboard.withVariables(vars)
107+
+ g.dashboard.withAnnotations(std.objectValues(annotations)),
108+
}

mssql-mixin/dashboards/dashboards.libsonnet

Lines changed: 0 additions & 2 deletions
This file was deleted.

0 commit comments

Comments
 (0)