2
2
# Alerting rules ###############################################################################################################################
3
3
# ###############################################################################################################################################
4
4
5
- resource "aws_prometheus_rule_group_namespace" "nodenw " {
5
+ resource "aws_prometheus_rule_group_namespace" "alerting_rules " {
6
6
count = var. enable_alerting_rules ? 1 : 0
7
7
8
8
name = " nodenw-rules"
@@ -19,18 +19,6 @@ groups:
19
19
annotations:
20
20
description: Network interface "{{ $labels.device }}" changing its up status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod }}
21
21
summary: Network interface is often changing its status
22
- EOF
23
- }
24
-
25
-
26
-
27
- resource "aws_prometheus_rule_group_namespace" "nodeexporter" {
28
- count = var. enable_alerting_rules ? 1 : 0
29
-
30
- name = " nodeexporter-rules"
31
- workspace_id = var. managed_prometheus_workspace_id
32
- data = << EOF
33
- groups:
34
22
- name: nodeexp-01
35
23
rules:
36
24
- alert: NodeFilesystemSpaceFillingUp
@@ -208,16 +196,6 @@ groups:
208
196
annotations:
209
197
description: File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%.
210
198
summary: Kernel is predicted to exhaust file descriptors limit soon.
211
- EOF
212
- }
213
-
214
- resource "aws_prometheus_rule_group_namespace" "kubesyschdlr" {
215
- count = var. enable_alerting_rules ? 1 : 0
216
-
217
- name = " kubesyschdlr-rules"
218
- workspace_id = var. managed_prometheus_workspace_id
219
- data = << EOF
220
- groups:
221
199
- name: kubesysschdlr-01
222
200
rules:
223
201
- alert: KubeSchedulerDown
@@ -228,16 +206,6 @@ groups:
228
206
annotations:
229
207
description: KubeScheduler has disappeared from Prometheus target discovery.
230
208
summary: Target disappeared from Prometheus target discovery.
231
- EOF
232
- }
233
-
234
- resource "aws_prometheus_rule_group_namespace" "kubesyskblt" {
235
- count = var. enable_alerting_rules ? 1 : 0
236
-
237
- name = " kubesyskblt-rules"
238
- workspace_id = var. managed_prometheus_workspace_id
239
- data = << EOF
240
- groups:
241
209
- name: kubesyskblt-01
242
210
rules:
243
211
- alert: KubeNodeNotReady
@@ -364,17 +332,6 @@ groups:
364
332
annotations:
365
333
description: Kubelet has disappeared from Prometheus target discovery.
366
334
summary: Target disappeared from Prometheus target discovery.
367
-
368
- EOF
369
- }
370
-
371
- resource "aws_prometheus_rule_group_namespace" "kubesyskbpxy" {
372
- count = var. enable_alerting_rules ? 1 : 0
373
-
374
- name = " kubesyskbpxy-rules"
375
- workspace_id = var. managed_prometheus_workspace_id
376
- data = << EOF
377
- groups:
378
335
- name: kubesyspxy-01
379
336
rules:
380
337
- alert: KubeProxyDown
@@ -385,16 +342,6 @@ groups:
385
342
annotations:
386
343
description: KubeProxy has disappeared from Prometheus target discovery.
387
344
summary: Target disappeared from Prometheus target discovery.
388
- EOF
389
- }
390
-
391
- resource "aws_prometheus_rule_group_namespace" "kubesys" {
392
- count = var. enable_alerting_rules ? 1 : 0
393
-
394
- name = " kubesys-rules"
395
- workspace_id = var. managed_prometheus_workspace_id
396
- data = << EOF
397
- groups:
398
345
- name: kubesys-01
399
346
rules:
400
347
- alert: KubeVersionMismatch
@@ -415,17 +362,6 @@ groups:
415
362
annotations:
416
363
description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance }}' is experiencing {{ $value | humanizePercentage }} errors.'
417
364
summary: Kubernetes API server client is experiencing errors.
418
-
419
- EOF
420
- }
421
-
422
- resource "aws_prometheus_rule_group_namespace" "kubesyscm" {
423
- count = var. enable_alerting_rules ? 1 : 0
424
-
425
- name = " kubesyscm-rules"
426
- workspace_id = var. managed_prometheus_workspace_id
427
- data = << EOF
428
- groups:
429
365
- name: kubesyscm-01
430
366
rules:
431
367
- alert: KubeControllerManagerDown
@@ -436,16 +372,6 @@ groups:
436
372
annotations:
437
373
description: KubeControllerManager has disappeared from Prometheus target discovery.
438
374
summary: Target disappeared from Prometheus target discovery.
439
- EOF
440
- }
441
-
442
- resource "aws_prometheus_rule_group_namespace" "kubesysapi" {
443
- count = var. enable_alerting_rules ? 1 : 0
444
-
445
- name = " kubesysapi-rules"
446
- workspace_id = var. managed_prometheus_workspace_id
447
- data = << EOF
448
- groups:
449
375
- name: kubesysapi-01
450
376
rules:
451
377
- alert: KubeClientCertificateExpiration
@@ -503,18 +429,6 @@ groups:
503
429
annotations:
504
430
description: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.
505
431
summary: The kubernetes apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.
506
- EOF
507
- }
508
-
509
- # Default limit of 10 needs to be raised
510
-
511
- resource "aws_prometheus_rule_group_namespace" "kubestorage" {
512
- count = var. enable_alerting_rules ? 1 : 0
513
-
514
- name = " kubestorage-rules"
515
- workspace_id = var. managed_prometheus_workspace_id
516
- data = << EOF
517
- groups:
518
432
- name: kubestg-01
519
433
rules:
520
434
- alert: KubePersistentVolumeFillingUp
@@ -565,16 +479,6 @@ groups:
565
479
annotations:
566
480
description: The persistent volume {{ $labels.persistentvolume }} has status {{ $labels.phase }}.
567
481
summary: PersistentVolume is having issues with provisioning.
568
- EOF
569
- }
570
-
571
- resource "aws_prometheus_rule_group_namespace" "kuberesources" {
572
- count = var. enable_alerting_rules ? 1 : 0
573
-
574
- name = " kuberesources-rules"
575
- workspace_id = var. managed_prometheus_workspace_id
576
- data = << EOF
577
- groups:
578
482
- name: kuberes-01
579
483
rules:
580
484
- alert: KubeCPUOvercommit
@@ -655,16 +559,6 @@ groups:
655
559
annotations:
656
560
description: The {{ $value | humanizePercentage }} throttling of CPU in namespace {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod }}.
657
561
summary: Processes experience elevated CPU throttling.
658
- EOF
659
- }
660
-
661
- resource "aws_prometheus_rule_group_namespace" "kubeapps" {
662
- count = var. enable_alerting_rules ? 1 : 0
663
-
664
- name = " kubeapps-rules"
665
- workspace_id = var. managed_prometheus_workspace_id
666
- data = << EOF
667
- groups:
668
562
- name: kubeapps-01
669
563
rules:
670
564
- alert: KubePodCrashLooping
@@ -814,18 +708,6 @@ groups:
814
708
annotations:
815
709
description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} has been running at max replicas for longer than 15 minutes.
816
710
summary: HPA is running at max replicas
817
- EOF
818
- }
819
-
820
-
821
-
822
- resource "aws_prometheus_rule_group_namespace" "kubestm" {
823
- count = var. enable_alerting_rules ? 1 : 0
824
-
825
- name = " kubestm-rules"
826
- workspace_id = var. managed_prometheus_workspace_id
827
- data = << EOF
828
- groups:
829
711
- name: kubestm-01
830
712
rules:
831
713
- alert: KubeStateMetricsListErrors
@@ -866,16 +748,6 @@ groups:
866
748
annotations:
867
749
description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed.
868
750
summary: kube-state-metrics shards are missing.
869
- EOF
870
- }
871
-
872
- resource "aws_prometheus_rule_group_namespace" "apislos" {
873
- count = var. enable_alerting_rules ? 1 : 0
874
-
875
- name = " api-slos"
876
- workspace_id = var. managed_prometheus_workspace_id
877
- data = << EOF
878
- groups:
879
751
- name: apislos-01
880
752
rules:
881
753
- alert: KubeAPIErrorBudgetBurn
@@ -924,17 +796,6 @@ groups:
924
796
annotations:
925
797
description: The API server is burning too much error budget.
926
798
summary: The API server is burning too much error budget.
927
- EOF
928
- }
929
-
930
-
931
- resource "aws_prometheus_rule_group_namespace" "generic" {
932
- count = var. enable_alerting_rules ? 1 : 0
933
-
934
- name = " generic-rules"
935
- workspace_id = var. managed_prometheus_workspace_id
936
- data = << EOF
937
- groups:
938
799
- name: general-01
939
800
rules:
940
801
- alert: TargetDown
@@ -960,16 +821,6 @@ groups:
960
821
severity: none
961
822
annotations:
962
823
description: This is an alert that is used to inhibit info alerts. By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with other alerts. This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a severity of 'warning' or 'critical' starts firing on the same namespace. This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
963
- EOF
964
- }
965
-
966
- resource "aws_prometheus_rule_group_namespace" "etcd" {
967
- count = var. enable_alerting_rules ? 1 : 0
968
-
969
- name = " etcd-rules"
970
- workspace_id = var. managed_prometheus_workspace_id
971
- data = << EOF
972
- groups:
973
824
- name: etcd-01
974
825
rules:
975
826
- alert: etcdInsufficientMembers
0 commit comments