You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: resources/php-fpm/alerts.yaml
+27-64Lines changed: 27 additions & 64 deletions
Original file line number
Diff line number
Diff line change
@@ -9,83 +9,46 @@ configurations:
9
9
- kind: Prometheus
10
10
data: |-
11
11
groups:
12
-
- name: "Php-Fpm"
13
-
groupId: "php-fpm"
14
-
description: "Php-Fpm alerts"
15
-
integrationType: "php-fpm"
16
-
scopeVariables:
17
-
- variable: cluster
18
-
label: kubernetes.cluster.name
19
-
operator: in
20
-
- variable: namespace
21
-
label: kubernetes.namespace.name
22
-
operator: in
23
-
- variable: workload
24
-
label: kubernetes.workload.name
25
-
operator: in
12
+
- name: Php-Fpm
26
13
rules:
27
-
- alert: "[Php-Fpm] Percentage of instances low"
28
-
alertId: "PercentageOfInstancesLow"
29
-
description: "Most of the instances are down"
14
+
- alert: '[Php-Fpm] Percentage of instances low'
30
15
expr: |
31
-
sum by (kube_workload_name,kube_namespace_name,kube_cluster_name)(phpfpm_up{kube_cluster_name=~$cluster,kube_namespace_name=~$namespace,kube_workload_name=~$workload})/sum by (kube_workload_name,kube_namespace_name,kube_cluster_name)(kube_workload_status_desired{kube_cluster_name=~$cluster,kube_namespace_name=~$namespace,kube_workload_name=~$workload}) < 0.75
16
+
sum (phpfpm_up==1) / sum (phpfpm_up) < 0.75
32
17
for: 5m
33
18
labels:
34
-
severity: high
19
+
severity: critical
35
20
annotations:
36
-
summary: |
37
-
[{{$labels.kube_cluster_name}} > {{$labels.kube_namespace_name}} > {{$labels.kube_workload_name}}] {{__alert_name__}} is {{__alert_status__}}
38
-
description: |
39
-
Most of the instances are down
40
-
- alert: "[Php-Fpm] Recently reboot"
41
-
alertId: "RecentlyReeboot"
42
-
description: "Instances have been recently reboot"
21
+
description: Less than 75% of instances are down
22
+
- alert: '[Php-Fpm] Recently reboot'
43
23
expr: |
44
-
(count by (kube_cluster_name, kube_namespace_name,kube_pod_name)(phpfpm_start_since{kube_cluster_name=~$cluster,kube_namespace_name=~$namespace,kube_workload_name=~$workload} < 1800) or vector (0))/sum by (kube_cluster_name, kube_namespace_name,kube_pod_name)(phpfpm_up{kube_cluster_name=~$cluster,kube_namespace_name=~$namespace,kube_workload_name=~$workload}) > 0.4
24
+
(count (phpfpm_start_since < 1800) or vector (0))/sum (phpfpm_up) > 0.4
45
25
for: 5m
46
26
labels:
47
-
severity: high
27
+
severity: info
48
28
annotations:
49
-
summary: |
50
-
[{{$labels.kube_cluster_name}} > {{$labels.kube_namespace_name}} > {{$labels.kube_workload_name}}] {{__alert_name__}} is {{__alert_status__}}
51
-
description: |
52
-
Instances have been recently reboot
53
-
- alert: "[Php-Fpm] Limit of child proccess exceeded"
54
-
alertId: "LimitOfChildsExceeded"
55
-
description: "Number of childs process have been exceeded"
29
+
description: Instances have been recently reboot
30
+
- alert: '[Php-Fpm] Limit of child proccess exceeded'
56
31
expr: |
57
-
sum by (kube_cluster_name, kube_namespace_name,kube_pod_name) (rate (phpfpm_max_children_reached{kube_cluster_name=~$cluster,kube_namespace_name=~$namespace,kube_workload_name=~$workload}[5m])) > 0
32
+
sum (rate (phpfpm_max_children_reached[5m])) > 0
58
33
for: 5m
59
34
labels:
60
-
severity: high
35
+
severity: critical
61
36
annotations:
62
-
summary: |
63
-
[{{$labels.kube_cluster_name}} > {{$labels.kube_namespace_name}} > {{$labels.kube_workload_name}}] {{__alert_name__}} is {{__alert_status__}}
64
-
description: |
65
-
Number of childs process have been exceeded
66
-
- alert: "[Php-Fpm] Reaching limit of queue process"
67
-
alertId: "ReachedQueueLimit"
68
-
description: "Buffer of queue requests reaching its limit"
37
+
description: Number of childs process have been exceeded
38
+
- alert: '[Php-Fpm] Reaching limit of queue process'
0 commit comments