Skip to content

Commit 02e2d74

Browse files
authored
Remove certmanager alerts (#288)
Signed-off-by: ArthurSens <[email protected]> Signed-off-by: ArthurSens <[email protected]>
1 parent 1540f23 commit 02e2d74

File tree

3 files changed

+9
-51
lines changed

3 files changed

+9
-51
lines changed

lib/alert-filter.libsonnet

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ local unwatedAlerts = [
1616

1717
// From kube-prometheus
1818
'Watchdog',
19+
20+
// From certmanager
21+
'CertManagerAbsent',
22+
'CertManagerCertExpirySoon',
23+
'CertManagerCertNotReady',
24+
'CertManagerHittingRateLimits',
1925
];
2026

2127
{

monitoring-satellite/manifests/kube-prometheus-rules/rules.yaml

Lines changed: 2 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2200,54 +2200,6 @@ spec:
22002200
labels:
22012201
severity: warning
22022202
- name: cert-manager
2203-
rules:
2204-
- alert: CertManagerAbsent
2205-
annotations:
2206-
description: New certificates will not be able to be minted, and existing ones can't be renewed until cert-manager is back.
2207-
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/CertManagerAbsent.md
2208-
summary: Cert Manager has dissapeared from Prometheus service discovery.
2209-
expr: absent(up{job="certmanager"})
2210-
for: 10m
2211-
labels:
2212-
severity: critical
2203+
rules: []
22132204
- name: certificates
2214-
rules:
2215-
- alert: CertManagerCertExpirySoon
2216-
annotations:
2217-
dashboard_url: https://grafana.example.com/d/TvuRo2iMk/cert-manager
2218-
description: The domain that this cert covers will be unavailable after {{ $value | humanizeDuration }}. Clients using endpoints that this cert protects will start to fail in {{ $value | humanizeDuration }}.
2219-
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/CertManagerCertExpirySoon.md
2220-
summary: The cert `{{ $labels.name }}` is {{ $value | humanizeDuration }} from expiry, it should have renewed over a week ago.
2221-
expr: |
2222-
avg by (exported_namespace, namespace, name) (
2223-
certmanager_certificate_expiration_timestamp_seconds - time()
2224-
) < (7 * 24 * 3600) # 21 days in seconds
2225-
for: 1h
2226-
labels:
2227-
severity: warning
2228-
- alert: CertManagerCertNotReady
2229-
annotations:
2230-
dashboard_url: https://grafana.example.com/d/TvuRo2iMk/cert-manager
2231-
description: This certificate has not been ready to serve traffic for at least 10m. If the cert is being renewed or there is another valid cert, the ingress controller _may_ be able to serve that instead.
2232-
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/CertManagerCertNotReady.md
2233-
summary: The cert `{{ $labels.name }}` is not ready to serve traffic.
2234-
expr: |
2235-
max by (name, exported_namespace, namespace, condition) (
2236-
certmanager_certificate_ready_status{condition!="True"} == 1
2237-
)
2238-
for: 10m
2239-
labels:
2240-
severity: critical
2241-
- alert: CertManagerHittingRateLimits
2242-
annotations:
2243-
dashboard_url: https://grafana.example.com/d/TvuRo2iMk/cert-manager
2244-
description: Depending on the rate limit, cert-manager may be unable to generate certificates for up to a week.
2245-
runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/CertManagerHittingRateLimits.md
2246-
summary: Cert manager hitting LetsEncrypt rate limits.
2247-
expr: |
2248-
sum by (host) (
2249-
rate(certmanager_http_acme_client_request_count{status="429"}[5m])
2250-
) > 0
2251-
for: 5m
2252-
labels:
2253-
severity: critical
2205+
rules: []

monitoring-satellite/manifests/rules.jsonnet

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ local rules = {
2222
monitoringSatellite.prometheusOperator.prometheusRule.spec.groups +
2323
monitoringSatellite.certmanager.prometheusRule.spec.groups,
2424
},
25-
},
25+
} + (import '../lib/alert-severity-mapper.libsonnet') + (import '../lib/alert-filter.libsonnet') + (import '../lib/alert-duration-mapper.libsonnet'),
2626
};
2727

2828
{ 'kube-prometheus-rules/rules': rules.prometheusRule }

0 commit comments

Comments
 (0)