|
| 1 | +apiVersion: v1 |
| 2 | +kind: Alert |
| 3 | +app: php-fpm |
| 4 | +version: 1.0.0 |
| 5 | +appVersion: |
| 6 | +- '7.2' |
| 7 | +descriptionFile: ALERTS.md |
| 8 | +configurations: |
| 9 | +- kind: Prometheus |
| 10 | + data: |- |
| 11 | + groups: |
| 12 | + - name: "Php-Fpm" |
| 13 | + groupId: "php-fpm" |
| 14 | + description: "Php-Fpm alerts" |
| 15 | + integrationType: "php-fpm" |
| 16 | + scopeVariables: |
| 17 | + - variable: cluster |
| 18 | + label: kubernetes.cluster.name |
| 19 | + operator: in |
| 20 | + - variable: namespace |
| 21 | + label: kubernetes.namespace.name |
| 22 | + operator: in |
| 23 | + - variable: workload |
| 24 | + label: kubernetes.workload.name |
| 25 | + operator: in |
| 26 | + rules: |
| 27 | + - alert: "[Php-Fpm] Percentage of instances low" |
| 28 | + alertId: "PercentageOfInstancesLow" |
| 29 | + description: "Most of the instances are down" |
| 30 | + expr: | |
| 31 | + sum by (kube_workload_name,kube_namespace_name,kube_cluster_name)(phpfpm_up{kube_cluster_name=~$cluster,kube_namespace_name=~$namespace,kube_workload_name=~$workload})/sum by (kube_workload_name,kube_namespace_name,kube_cluster_name)(kube_workload_status_desired{kube_cluster_name=~$cluster,kube_namespace_name=~$namespace,kube_workload_name=~$workload}) < 0.75 |
| 32 | + for: 5m |
| 33 | + labels: |
| 34 | + severity: high |
| 35 | + annotations: |
| 36 | + summary: | |
| 37 | + [{{$labels.kube_cluster_name}} > {{$labels.kube_namespace_name}} > {{$labels.kube_workload_name}}] {{__alert_name__}} is {{__alert_status__}} |
| 38 | + description: | |
| 39 | + Most of the instances are down |
| 40 | + - alert: "[Php-Fpm] Recently reboot" |
| 41 | + alertId: "RecentlyReeboot" |
| 42 | + description: "Instances have been recently reboot" |
| 43 | + expr: | |
| 44 | + (count by (kube_cluster_name, kube_namespace_name,kube_pod_name)(phpfpm_start_since{kube_cluster_name=~$cluster,kube_namespace_name=~$namespace,kube_workload_name=~$workload} < 1800) or vector (0))/sum by (kube_cluster_name, kube_namespace_name,kube_pod_name)(phpfpm_up{kube_cluster_name=~$cluster,kube_namespace_name=~$namespace,kube_workload_name=~$workload}) > 0.4 |
| 45 | + for: 5m |
| 46 | + labels: |
| 47 | + severity: high |
| 48 | + annotations: |
| 49 | + summary: | |
| 50 | + [{{$labels.kube_cluster_name}} > {{$labels.kube_namespace_name}} > {{$labels.kube_workload_name}}] {{__alert_name__}} is {{__alert_status__}} |
| 51 | + description: | |
| 52 | + Instances have been recently reboot |
| 53 | + - alert: "[Php-Fpm] Limit of child proccess exceeded" |
| 54 | + alertId: "LimitOfChildsExceeded" |
| 55 | + description: "Number of childs process have been exceeded" |
| 56 | + expr: | |
| 57 | + sum by (kube_cluster_name, kube_namespace_name,kube_pod_name) (rate (phpfpm_max_children_reached{kube_cluster_name=~$cluster,kube_namespace_name=~$namespace,kube_workload_name=~$workload}[5m])) > 0 |
| 58 | + for: 5m |
| 59 | + labels: |
| 60 | + severity: high |
| 61 | + annotations: |
| 62 | + summary: | |
| 63 | + [{{$labels.kube_cluster_name}} > {{$labels.kube_namespace_name}} > {{$labels.kube_workload_name}}] {{__alert_name__}} is {{__alert_status__}} |
| 64 | + description: | |
| 65 | + Number of childs process have been exceeded |
| 66 | + - alert: "[Php-Fpm] Reaching limit of queue process" |
| 67 | + alertId: "ReachedQueueLimit" |
| 68 | + description: "Buffer of queue requests reaching its limit" |
| 69 | + expr: | |
| 70 | + (phpfpm_listen_queue{kube_cluster_name=~$cluster,kube_namespace_name=~$namespace,kube_workload_name=~$workload} / phpfpm_listen_queue_length{kube_cluster_name=~$cluster,kube_namespace_name=~$namespace,kube_workload_name=~$workload}) > 0.8 |
| 71 | + for: 5m |
| 72 | + labels: |
| 73 | + severity: medium |
| 74 | + annotations: |
| 75 | + summary: | |
| 76 | + [{{$labels.kube_cluster_name}} > {{$labels.kube_namespace_name}} > {{$labels.kube_workload_name}}] {{__alert_name__}} is {{__alert_status__}} |
| 77 | + description: | |
| 78 | + Buffer of queue requests reaching its limit |
| 79 | + - alert: "[Php-Fpm] Requests processed reached timeout limit" |
| 80 | + alertId: "TimeoutLimitRequestReached" |
| 81 | + description: "Timeout limit reached by some of the requests" |
| 82 | + expr: | |
| 83 | + rate (phpfpm_slow_requests{kube_cluster_name=~$cluster,kube_namespace_name=~$namespace,kube_workload_name=~$workload}[5m]) / rate (phpfpm_process_requests{kube_cluster_name=~$cluster,kube_namespace_name=~$namespace,kube_workload_name=~$workload}[5m]) > 0.75 |
| 84 | + for: 5m |
| 85 | + labels: |
| 86 | + severity: medium |
| 87 | + annotations: |
| 88 | + summary: | |
| 89 | + [{{$labels.kube_cluster_name}} > {{$labels.kube_namespace_name}} > {{$labels.kube_workload_name}}] {{__alert_name__}} is {{__alert_status__}} |
| 90 | + description: | |
| 91 | + Timeout limit reached by some of the requests |
0 commit comments