@@ -2200,54 +2200,6 @@ spec:
2200
2200
labels :
2201
2201
severity : warning
2202
2202
- name : cert-manager
2203
- rules :
2204
- - alert : CertManagerAbsent
2205
- annotations :
2206
- description : New certificates will not be able to be minted, and existing ones can't be renewed until cert-manager is back.
2207
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/CertManagerAbsent.md
2208
- summary : Cert Manager has dissapeared from Prometheus service discovery.
2209
- expr : absent(up{job="certmanager"})
2210
- for : 10m
2211
- labels :
2212
- severity : critical
2203
+ rules : []
2213
2204
- name : certificates
2214
- rules :
2215
- - alert : CertManagerCertExpirySoon
2216
- annotations :
2217
- dashboard_url : https://grafana.example.com/d/TvuRo2iMk/cert-manager
2218
- description : The domain that this cert covers will be unavailable after {{ $value | humanizeDuration }}. Clients using endpoints that this cert protects will start to fail in {{ $value | humanizeDuration }}.
2219
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/CertManagerCertExpirySoon.md
2220
- summary : The cert `{{ $labels.name }}` is {{ $value | humanizeDuration }} from expiry, it should have renewed over a week ago.
2221
- expr : |
2222
- avg by (exported_namespace, namespace, name) (
2223
- certmanager_certificate_expiration_timestamp_seconds - time()
2224
- ) < (7 * 24 * 3600) # 21 days in seconds
2225
- for : 1h
2226
- labels :
2227
- severity : warning
2228
- - alert : CertManagerCertNotReady
2229
- annotations :
2230
- dashboard_url : https://grafana.example.com/d/TvuRo2iMk/cert-manager
2231
- description : This certificate has not been ready to serve traffic for at least 10m. If the cert is being renewed or there is another valid cert, the ingress controller _may_ be able to serve that instead.
2232
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/CertManagerCertNotReady.md
2233
- summary : The cert `{{ $labels.name }}` is not ready to serve traffic.
2234
- expr : |
2235
- max by (name, exported_namespace, namespace, condition) (
2236
- certmanager_certificate_ready_status{condition!="True"} == 1
2237
- )
2238
- for : 10m
2239
- labels :
2240
- severity : critical
2241
- - alert : CertManagerHittingRateLimits
2242
- annotations :
2243
- dashboard_url : https://grafana.example.com/d/TvuRo2iMk/cert-manager
2244
- description : Depending on the rate limit, cert-manager may be unable to generate certificates for up to a week.
2245
- runbook_url : https://github.com/gitpod-io/runbooks/blob/main/runbooks/CertManagerHittingRateLimits.md
2246
- summary : Cert manager hitting LetsEncrypt rate limits.
2247
- expr : |
2248
- sum by (host) (
2249
- rate(certmanager_http_acme_client_request_count{status="429"}[5m])
2250
- ) > 0
2251
- for : 5m
2252
- labels :
2253
- severity : critical
2205
+ rules : []
0 commit comments