Skip to content

Commit 12e1166

Browse files
committed
added old recording rules to defaults
1 parent 4bffe4b commit 12e1166

File tree

3 files changed

+19
-5
lines changed

3 files changed

+19
-5
lines changed

ansible/roles/kube_prometheus_stack/defaults/main/helm.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ kube_prometheus_stack_release_defaults:
9191
nodeSelector:
9292
clusterrole: "server"
9393

94-
additionalPrometheusRulesMap: "{{ prometheus_alert_rules }}"
94+
additionalPrometheusRulesMap: "{{ prometheus_rules }}"
9595

9696
grafana:
9797
service:

ansible/roles/kube_prometheus_stack/defaults/main/main.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,13 +126,13 @@ prometheus_scrape_configs:
126126
# - prometheus/targets/*.yml
127127
# - prometheus/targets/*.json
128128

129-
prometheus_extra_alert_rules: []
129+
prometheus_extra_rules: []
130130

131-
prometheus_alert_rules:
131+
prometheus_rules:
132132
appliance-rules:
133133
groups:
134134
- name: all
135-
rules: "{{ prometheus_extra_alert_rules }}"
135+
rules: "{{ prometheus_extra_rules }}"
136136

137137
# ------------------------------------------------------------------------------------------
138138

environments/common/inventory/group_vars/all/prometheus.yml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,25 @@ prometheus_scrape_configs_default:
2323
replacement: '${1}'
2424

2525
prometheus_scrape_configs: "{{ prometheus_scrape_configs_default + (openondemand_scrape_configs if groups['openondemand'] | count > 0 else []) }}"
26-
prometheus_extra_alert_rules:
26+
prometheus_extra_rules:
2727
- alert: SlurmNodeDown
2828
annotations:
2929
message: '{% raw %}{{ $value }} Slurm nodes are in down status.{% endraw %}'
3030
summary: 'At least one Slurm node is down.'
3131
expr: "slurm_nodes_down > 0\n"
3232
labels:
3333
severity: critical
34+
- record: node_cpu_system_seconds:record
35+
expr: (100 * sum by(instance)(increase(node_cpu_seconds_total{mode="system",job="node-exporter"}[60s]))) / (sum by(instance)(increase(node_cpu_seconds_total{job="node-exporter"}[60s])))
36+
- record: node_cpu_user_seconds:record
37+
expr: (100 * sum by(instance)(increase(node_cpu_seconds_total{mode="user",job="node-exporter"}[60s]))) / (sum by(instance)(increase(node_cpu_seconds_total{job="node-exporter"}[60s])))
38+
- record: node_cpu_iowait_seconds:record
39+
expr: (100 * sum by(instance)(increase(node_cpu_seconds_total{mode="iowait",job="node-exporter"}[60s]))) / (sum by(instance)(increase(node_cpu_seconds_total{job="node-exporter"}[60s])))
40+
- record: node_cpu_other_seconds:record
41+
expr: (100 * sum by(instance)(increase(node_cpu_seconds_total{mode!="idle",mode!="user",mode!="system",mode!="iowait",job="node-exporter"}[60s]))) / (sum by(instance)(increase(node_cpu_seconds_total{job="node-exporter"}[60s])))
42+
- record: node_cpu_scaling_frequency_hertz_avg:record # frequency rules aren't working
43+
expr: avg by (instance) (node_cpu_scaling_frequency_hertz)
44+
- record: node_cpu_scaling_frequency_hertz_min:record
45+
expr: min by (instance) (node_cpu_scaling_frequency_hertz)
46+
- record: node_cpu_scaling_frequency_hertz_max:record
47+
expr: max by (instance) (node_cpu_scaling_frequency_hertz)

0 commit comments

Comments
 (0)