Skip to content

Commit 70fc75e

Browse files
authored
Merge branch 'master' into system-slice-metrics
2 parents aeba5f5 + 86f29cf commit 70fc75e

File tree

511 files changed

+63184
-64047
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

511 files changed

+63184
-64047
lines changed

.github/workflows/build.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@ jobs:
1919
cache-hit: ${{ steps.tools-cache.outputs.cache-hit }}
2020
steps:
2121
- name: Checkout
22-
uses: actions/checkout@v4
22+
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
2323
with:
2424
fetch-depth: 0
2525
persist-credentials: false
2626
- name: Setup Go
27-
uses: actions/setup-go@v5
27+
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6
2828
with:
2929
go-version: 1.23
3030
- name: Tools cache
@@ -45,12 +45,12 @@ jobs:
4545
runs-on: ubuntu-latest
4646
steps:
4747
- name: Checkout
48-
uses: actions/checkout@v4
48+
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
4949
with:
5050
fetch-depth: 0
5151
persist-credentials: false
5252
- name: Setup Go
53-
uses: actions/setup-go@v5
53+
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6
5454
with:
5555
go-version: 1.23
5656
- name: Restore Tools cache
@@ -69,12 +69,12 @@ jobs:
6969
runs-on: ubuntu-latest
7070
steps:
7171
- name: Checkout
72-
uses: actions/checkout@v4
72+
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
7373
with:
7474
fetch-depth: 0
7575
persist-credentials: false
7676
- name: Setup Go
77-
uses: actions/setup-go@v5
77+
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6
7878
with:
7979
go-version: 1.23
8080
- name: Restore Tools cache

.github/workflows/test-mixins.yml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@ jobs:
1818
cache-hit: ${{ steps.tools-cache.outputs.cache-hit }}
1919
steps:
2020
- name: Checkout
21-
uses: actions/checkout@v4
21+
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
2222
with:
2323
fetch-depth: 0
2424
persist-credentials: false
2525
- name: Setup Go
26-
uses: actions/setup-go@v5
26+
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6
2727
with:
2828
go-version: 1.23
2929
- name: Tools cache
@@ -46,13 +46,13 @@ jobs:
4646
changed-mixins: ${{ steps.changed-mixins.outputs.all_changed_files }}
4747
steps:
4848
- name: Checkout
49-
uses: actions/checkout@v4
49+
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
5050
with:
5151
fetch-depth: 0
5252
persist-credentials: false
5353
- name: Get changed mixins
5454
id: changed-mixins
55-
uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
55+
uses: step-security/changed-files@95b56dadb92a30ca9036f16423fd3c088a71ee94 # v46.0.5
5656
with:
5757
dir_names: true
5858
dir_names_exclude_current_dir: true
@@ -82,7 +82,7 @@ jobs:
8282
fail-fast: false
8383
services:
8484
grafana:
85-
image: grafana/grafana:11.4.0
85+
image: grafana/grafana:12.2.1@sha256:35c41e0fd0295f5d0ee5db7e780cf33506abfaf47686196f825364889dee878b
8686
ports:
8787
- 3000:3000
8888
env:
@@ -91,12 +91,12 @@ jobs:
9191
GF_AUTH_ANONYMOUS_ORG_ROLE: "Admin"
9292
steps:
9393
- name: Checkout
94-
uses: actions/checkout@v4
94+
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
9595
with:
9696
fetch-depth: 0
9797
persist-credentials: false
9898
- name: Setup Go
99-
uses: actions/setup-go@v5
99+
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6
100100
with:
101101
go-version: 1.23
102102
- name: Restore Tools cache

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,5 @@ update-mixins:
8787

8888
tests:
8989
pushd . && cd ./common-lib && make vendor && make tests
90+
pushd . && cd ./grafana-builder/test && make tests
9091
pushd . && cd ./mixin-utils/test && make tests

README.md

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,6 @@ The monitoring mixins in this repository use two linting tools to ensure quality
4141

4242
Observability library is a flexible format to describe dashboards and alerts in a modular way so libraries can be imported into one another or into monitoring-mixins. Observability libraries can be found in folders with `-observ-lib` suffix. [Common library](https://github.com/grafana/jsonnet-libs/tree/master/common-lib) is also used to apply consistent style options.
4343

44-
See [helloworld-observ-lib](helloworld-observ-lib/) for starter template and format description.
45-
46-
More examples:
47-
- [windows-observ-lib](windows-observ-lib/)
48-
4944
### Observability libraries signal extention
5045

5146
[Signal](https://github.com/grafana/jsonnet-libs/tree/master/common-lib/common/signal#signal) is the experimental extension to observability libraries format to declare metrics (signals) and then render them as different grafana panel types (timeseries, stat, table, etc), or alert rules.
@@ -58,6 +53,7 @@ Examples:
5853
- [process-observ-lib](process-observ-lib/)
5954
- [golang-observ-lib](golang-observ-lib/)
6055
- [csp-mixin](csp-mixin/)
56+
- [windows-observ-lib](windows-observ-lib/)
6157

6258
## Prometheus rules testing for monitoring mixins and observability libraries
6359

aerospike-mixin/alerts/alerts.libsonnet renamed to aerospike-mixin/alerts.libsonnet

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
{
2-
prometheusAlerts+:: {
3-
groups+: [
2+
new(this): {
3+
groups: [
44
{
5-
name: 'aerospike',
5+
name: 'aerospike.rules',
66
rules: [
77
{
88
alert: 'AerospikeNodeHighMemoryUsage',
99
expr: |||
1010
100 - sum without (service) (aerospike_node_stats_system_free_mem_pct) >= %(alertsCriticalNodeHighMemoryUsage)s
11-
||| % $._config,
11+
||| % this.config,
1212
'for': '5m',
1313
labels: {
1414
severity: 'critical',
@@ -19,14 +19,14 @@
1919
(
2020
'{{ printf "%%.0f" $value }} percent of system memory used on node {{$labels.instance}} on cluster {{$labels.aerospike_cluster}}, ' +
2121
'which is above the threshold of %(alertsCriticalNodeHighMemoryUsage)s.'
22-
) % $._config,
22+
) % this.config,
2323
},
2424
},
2525
{
2626
alert: 'AerospikeNamespaceHighDiskUsage',
2727
expr: |||
2828
100 - sum without (service) (aerospike_namespace_device_free_pct) >= %(alertsCriticalNamespaceHighDiskUsage)s
29-
||| % $._config,
29+
||| % this.config,
3030
'for': '5m',
3131
labels: {
3232
severity: 'critical',
@@ -37,14 +37,14 @@
3737
(
3838
'{{ printf "%%.0f" $value }} percent of disk space available for namespace {{$labels.ns}} on node {{$labels.instance}}, on cluster {{$labels.aerospike_cluster}}, ' +
3939
'which is above the threshold of %(alertsCriticalNamespaceHighDiskUsage)s.'
40-
) % $._config,
40+
) % this.config,
4141
},
4242
},
4343
{
4444
alert: 'AerospikeUnavailablePartitions',
4545
expr: |||
4646
sum without(service) (aerospike_namespace_unavailable_partitions) > %(alertsCriticalUnavailablePartitions)s
47-
||| % $._config,
47+
||| % this.config,
4848
'for': '5m',
4949
labels: {
5050
severity: 'critical',
@@ -55,14 +55,14 @@
5555
(
5656
'{{ printf "%%.0f" $value }} unavailable partition(s) in namespace {{$labels.ns}}, on node {{$labels.instance}}, on cluster {{$labels.aerospike_cluster}}, ' +
5757
'which is above the threshold of %(alertsCriticalUnavailablePartitions)s.'
58-
) % $._config,
58+
) % this.config,
5959
},
6060
},
6161
{
6262
alert: 'AerospikeDeadPartitions',
6363
expr: |||
6464
sum without(service) (aerospike_namespace_dead_partitions) > %(alertsCriticalDeadPartitions)s
65-
||| % $._config,
65+
||| % this.config,
6666
'for': '5m',
6767
labels: {
6868
severity: 'critical',
@@ -73,14 +73,14 @@
7373
(
7474
'{{ printf "%%.0f" $value }} dead partition(s) in namespace {{$labels.ns}}, on node {{$labels.instance}}, on cluster {{$labels.aerospike_cluster}}, ' +
7575
'which is above the threshold of %(alertsCriticalDeadPartitions)s.'
76-
) % $._config,
76+
) % this.config,
7777
},
7878
},
7979
{
8080
alert: 'AerospikeNamespaceRejectingWrites',
8181
expr: |||
8282
sum without(service) (aerospike_namespace_stop_writes + aerospike_namespace_clock_skew_stop_writes) > %(alertsCriticalSystemRejectingWrites)s
83-
||| % $._config,
83+
||| % this.config,
8484
'for': '5m',
8585
labels: {
8686
severity: 'critical',
@@ -90,14 +90,14 @@
9090
description:
9191
(
9292
'Namespace {{$labels.ns}} on node {{$labels.instance}} on cluster {{$labels.aerospike_cluster}} is currently rejecting all client-originated writes.'
93-
) % $._config,
93+
) % this.config,
9494
},
9595
},
9696
{
9797
alert: 'AerospikeHighClientReadErrorRate',
9898
expr: |||
99-
sum without(service) (rate(aerospike_namespace_client_read_error[5m])) / (clamp_min(sum without(service) (rate(aerospike_namespace_client_read_error[5m])) + sum without(service) (rate(aerospike_namespace_client_read_success[5m])), 1)) > %(alertsWarningHighClientReadErrorRate)s
100-
||| % $._config,
99+
sum by(instance, aerospike_cluster, ns) (rate(aerospike_namespace_client_read_error[5m])) / (clamp_min(sum by(instance, aerospike_cluster, ns) (rate(aerospike_namespace_client_read_error[5m])) + sum by(instance, aerospike_cluster, ns) (rate(aerospike_namespace_client_read_success[5m])), 1)) > %(alertsWarningHighClientReadErrorRate)s
100+
||| % this.config,
101101
'for': '5m',
102102
labels: {
103103
severity: 'warning',
@@ -108,14 +108,14 @@
108108
(
109109
'{{ printf "%%.0f" $value }} percent of client read transactions are resulting in errors for namespace {{$labels.ns}}, on node {{$labels.instance}}, on cluster {{$labels.aerospike_cluster}}, ' +
110110
'which is above the threshold of %(alertsWarningHighClientReadErrorRate)s.'
111-
) % $._config,
111+
) % this.config,
112112
},
113113
},
114114
{
115115
alert: 'AerospikeHighClientWriteErrorRate',
116116
expr: |||
117-
sum without(service) (rate(aerospike_namespace_client_write_error[5m])) / (clamp_min(sum without(service) (rate(aerospike_namespace_client_write_error[5m])) + sum without(service) (rate(aerospike_namespace_client_write_success[5m])), 1)) > %(alertsWarningHighClientWriteErrorRate)s
118-
||| % $._config,
117+
sum by(instance, aerospike_cluster, ns) (rate(aerospike_namespace_client_write_error[5m])) / (clamp_min(sum by(instance, aerospike_cluster, ns) (rate(aerospike_namespace_client_write_error[5m])) + sum by(instance, aerospike_cluster, ns) (rate(aerospike_namespace_client_write_success[5m])), 1)) > %(alertsWarningHighClientWriteErrorRate)s
118+
||| % this.config,
119119
'for': '5m',
120120
labels: {
121121
severity: 'warning',
@@ -126,14 +126,14 @@
126126
(
127127
'{{ printf "%%.0f" $value }} percent of client write transactions are resulting in errors for namespace {{$labels.ns}}, on node {{$labels.instance}}, on cluster {{$labels.aerospike_cluster}}, ' +
128128
'which is above the threshold of %(alertsWarningHighClientWriteErrorRate)s.'
129-
) % $._config,
129+
) % this.config,
130130
},
131131
},
132132
{
133133
alert: 'AerospikeHighClientUDFErrorRate',
134134
expr: |||
135-
sum without(service) (rate(aerospike_namespace_client_udf_error[5m])) / (clamp_min(sum without(service) (rate(aerospike_namespace_client_udf_error[5m])) + sum without(service) (rate(aerospike_namespace_client_udf_complete[5m])), 1)) > %(alertsWarningHighClientUDFErrorRate)s
136-
||| % $._config,
135+
sum by(instance, aerospike_cluster, ns) (rate(aerospike_namespace_client_udf_error[5m])) / (clamp_min(sum by(instance, aerospike_cluster, ns) (rate(aerospike_namespace_client_udf_error[5m])) + sum by(instance, aerospike_cluster, ns) (rate(aerospike_namespace_client_udf_complete[5m])), 1)) > %(alertsWarningHighClientUDFErrorRate)s
136+
||| % this.config,
137137
'for': '5m',
138138
labels: {
139139
severity: 'warning',
@@ -144,7 +144,7 @@
144144
(
145145
'{{ printf "%%.0f" $value }} percent of client UDF transactions are resulting in errors for namespace {{$labels.ns}}, on node {{$labels.instance}}, on cluster {{$labels.aerospike_cluster}}, ' +
146146
'which is above the threshold of %(alertsWarningHighClientUDFErrorRate)s.'
147-
) % $._config,
147+
) % this.config,
148148
},
149149
},
150150
],

aerospike-mixin/config.libsonnet

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,41 @@
11
{
2-
_config+:: {
3-
enableMultiCluster: false,
4-
aerospikeSelector: if self.enableMultiCluster then 'job=~"$job", cluster=~"$cluster"' else 'job=~"$job"',
5-
multiclusterSelector: 'job=~"$job"',
6-
filterSelector: 'job=~"integrations/aerospike"',
2+
local this = self,
3+
filteringSelector: 'job="integrations/aerospike"',
4+
groupLabels: ['job', 'aerospike_cluster', 'cluster'],
5+
logLabels: ['job', 'cluster', 'instance'],
6+
instanceLabels: ['instance', 'ns'], // ns == namespace
77

8-
dashboardTags: ['aerospike-mixin'],
9-
dashboardPeriod: 'now-30m',
10-
dashboardTimezone: 'default',
11-
dashboardRefresh: '1m',
8+
dashboardTags: [self.uid],
9+
uid: 'aerospike',
10+
dashboardNamePrefix: 'Aerospike',
11+
dashboardPeriod: 'now-30m',
12+
dashboardTimezone: 'default',
13+
dashboardRefresh: '1m',
14+
metricsSource: [
15+
'prometheusAerospike7', // For queries that are required for Aerospike 7.0+ with metric changes
16+
'prometheus', // For Aerospike < 7.0
17+
],
1218

13-
// alerts thresholds
14-
alertsCriticalNodeHighMemoryUsage: 80, // %
15-
alertsCriticalNamespaceHighDiskUsage: 80, // %
16-
alertsCriticalUnavailablePartitions: 0, // count
17-
alertsCriticalDeadPartitions: 0, // count
18-
alertsCriticalSystemRejectingWrites: 0, // count
19-
alertsWarningHighClientReadErrorRate: 25, // %
20-
alertsWarningHighClientWriteErrorRate: 25, // %
21-
alertsWarningHighClientUDFErrorRate: 25, // %
19+
// Logging configuration
20+
enableLokiLogs: true,
21+
extraLogLabels: ['level'], // Required by logs-lib
22+
logsVolumeGroupBy: 'level',
23+
showLogsVolume: true,
2224

23-
enableLokiLogs: true,
25+
// Alerts thresholds
26+
alertsCriticalNodeHighMemoryUsage: 80, // %
27+
alertsCriticalNamespaceHighDiskUsage: 80, // %
28+
alertsCriticalUnavailablePartitions: 0, // count
29+
alertsCriticalDeadPartitions: 0, // count
30+
alertsCriticalSystemRejectingWrites: 0, // count
31+
alertsWarningHighClientReadErrorRate: 25, // %
32+
alertsWarningHighClientWriteErrorRate: 25, // %
33+
alertsWarningHighClientUDFErrorRate: 25, // %
34+
35+
// Signals configuration
36+
signals+: {
37+
overview: (import './signals/overview.libsonnet')(this),
38+
namespace: (import './signals/namespace.libsonnet')(this),
39+
instance: (import './signals/instance.libsonnet')(this),
2440
},
2541
}

0 commit comments

Comments
 (0)