diff --git a/docs/metrics/workload/statefulset-metrics.md b/docs/metrics/workload/statefulset-metrics.md index 42ac8cac0..6c952e410 100644 --- a/docs/metrics/workload/statefulset-metrics.md +++ b/docs/metrics/workload/statefulset-metrics.md @@ -2,19 +2,118 @@ | Metric name | Metric type | Description | Labels/tags | Status | | ------------------------------------------------------- | ----------- | --------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------ | -| kube_statefulset_annotations | Gauge | Kubernetes annotations converted to Prometheus labels controlled via [--metric-annotations-allowlist](../../developer/cli-arguments.md) | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace>
`annotation_STATEFULSET_ANNOTATION`=<STATEFULSET_ANNOTATION> | EXPERIMENTAL | +| kube_statefulset_annotations | Gauge | Kubernetes annotations converted to Prometheus labels controlled via [--metric-annotations-allowlist](../../developer/cli-arguments.md) | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace>
`annotation_STATEFULSET_ANNOTATION`=<STATEFULSET_ANNOTATION> | BETA | | kube_statefulset_status_replicas | Gauge | | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace> | STABLE | | kube_statefulset_status_replicas_current | Gauge | | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace> | STABLE | | kube_statefulset_status_replicas_ready | Gauge | | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace> | STABLE | -| kube_statefulset_status_replicas_available | Gauge | | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace> | EXPERIMENTAL | +| kube_statefulset_status_replicas_available | Gauge | | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace> | BETA | | kube_statefulset_status_replicas_updated | Gauge | | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace> | STABLE | | kube_statefulset_status_observed_generation | Gauge | | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace> | STABLE | | kube_statefulset_replicas | Gauge | | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace> | STABLE | | kube_statefulset_ordinals_start | Gauge | | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace> | STABLE | | kube_statefulset_metadata_generation | Gauge | | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace> | STABLE | -| kube_statefulset_persistentvolumeclaim_retention_policy | Gauge | | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace>
`when_deleted`=<statefulset-when-deleted-pvc-policy>
`when_scaled`=<statefulset-when-scaled-pvc-policy> | EXPERIMENTAL | +| kube_statefulset_persistentvolumeclaim_retention_policy | Gauge | | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace>
`when_deleted`=<statefulset-when-deleted-pvc-policy>
`when_scaled`=<statefulset-when-scaled-pvc-policy> | BETA | | kube_statefulset_created | Gauge | | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace> | STABLE | | kube_statefulset_labels | Gauge | Kubernetes labels converted to Prometheus labels controlled via [--metric-labels-allowlist](../../developer/cli-arguments.md) | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace>
`label_STATEFULSET_LABEL`=<STATEFULSET_LABEL> | STABLE | | kube_statefulset_status_current_revision | Gauge | | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace>
`revision`=<statefulset-current-revision> | STABLE | | kube_statefulset_status_update_revision | Gauge | | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace>
`revision`=<statefulset-update-revision> | STABLE | -| kube_statefulset_deletion_timestamp | Gauge | Unix deletion timestamp | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace> | EXPERIMENTAL | +| kube_statefulset_deletion_timestamp | Gauge | Unix deletion timestamp | `statefulset`=<statefulset-name>
`namespace`=<statefulset-namespace> | BETA | + +## Common PromQL Queries + +### StatefulSet Health Monitoring + +**Check StatefulSet rollout status:** + +```promql +# Percentage of updated replicas +(kube_statefulset_status_replicas_updated / kube_statefulset_replicas) * 100 +``` + +**Monitor unavailable replicas:** + +```promql +# Number of unavailable replicas +kube_statefulset_replicas - kube_statefulset_status_replicas_available +``` + +### Troubleshooting Queries + +**Find StatefulSets with outdated replicas:** + +```promql +# StatefulSets with replicas not yet updated +kube_statefulset_status_replicas_current != kube_statefulset_status_replicas_updated +``` + +**StatefulSets stuck during rollout:** + +```promql +# StatefulSets where observed generation is behind metadata generation +kube_statefulset_status_observed_generation < kube_statefulset_metadata_generation +``` + +**StatefulSets with scaling issues:** + +```promql +# StatefulSets where current replicas don't match desired +kube_statefulset_status_replicas_current != kube_statefulset_replicas +``` + +## Major Alerting Rules + +### Critical Alerts + +**StatefulSet is completely down:** + +```yaml +- alert: StatefulSetDown + expr: kube_statefulset_status_replicas_available == 0 and kube_statefulset_replicas > 0 + for: 5m + labels: + severity: critical + annotations: + summary: "StatefulSet {{ $labels.statefulset }} is completely down" + description: "StatefulSet {{ $labels.statefulset }} in namespace {{ $labels.namespace }} has no available replicas despite having {{ $labels.replicas }} desired replicas." +``` + +**StatefulSet rollout stuck:** + +```yaml +- alert: StatefulSetRolloutStuck + expr: kube_statefulset_status_observed_generation < kube_statefulset_metadata_generation + for: 15m + labels: + severity: critical + annotations: + summary: "StatefulSet {{ $labels.statefulset }} rollout is stuck" + description: "StatefulSet {{ $labels.statefulset }} in namespace {{ $labels.namespace }} has been stuck rolling out for more than 15 minutes." +``` + +### Warning Alerts + +**StatefulSet has unavailable replicas:** + +```yaml +- alert: StatefulSetReplicasUnavailable + expr: (kube_statefulset_replicas - kube_statefulset_status_replicas_available) > 0 + for: 5m + labels: + severity: warning + annotations: + summary: "StatefulSet {{ $labels.statefulset }} has unavailable replicas" + description: "StatefulSet {{ $labels.statefulset }} in namespace {{ $labels.namespace }} has {{ $value }} unavailable replicas." +``` + +**StatefulSet replica count mismatch:** + +```yaml +- alert: StatefulSetReplicasMismatch + expr: kube_statefulset_status_replicas_current != kube_statefulset_replicas + for: 10m + labels: + severity: warning + annotations: + summary: "StatefulSet {{ $labels.statefulset }} replica count mismatch" + description: "StatefulSet {{ $labels.statefulset }} in namespace {{ $labels.namespace }} has {{ $labels.status_replicas_current }} current replicas but {{ $labels.replicas }} are desired." +``` diff --git a/internal/store/statefulset.go b/internal/store/statefulset.go index 77049e1a5..85653ea0e 100644 --- a/internal/store/statefulset.go +++ b/internal/store/statefulset.go @@ -82,7 +82,7 @@ func statefulSetMetricFamilies(allowAnnotationsList, allowLabelsList []string) [ "kube_statefulset_status_replicas_available", "The number of available replicas per StatefulSet.", metric.Gauge, - basemetrics.ALPHA, + basemetrics.BETA, "", wrapStatefulSetFunc(func(s *v1.StatefulSet) *metric.Family { return &metric.Family{ @@ -218,7 +218,7 @@ func statefulSetMetricFamilies(allowAnnotationsList, allowLabelsList []string) [ "kube_statefulset_persistentvolumeclaim_retention_policy", "Count of retention policy for StatefulSet template PVCs", metric.Gauge, - basemetrics.ALPHA, + basemetrics.BETA, "", wrapStatefulSetFunc(func(s *v1.StatefulSet) *metric.Family { @@ -245,7 +245,7 @@ func statefulSetMetricFamilies(allowAnnotationsList, allowLabelsList []string) [ descStatefulSetAnnotationsName, descStatefulSetAnnotationsHelp, metric.Gauge, - basemetrics.ALPHA, + basemetrics.BETA, "", wrapStatefulSetFunc(func(s *v1.StatefulSet) *metric.Family { if len(allowAnnotationsList) == 0 { @@ -325,7 +325,7 @@ func statefulSetMetricFamilies(allowAnnotationsList, allowLabelsList []string) [ "kube_statefulset_deletion_timestamp", "Unix deletion timestamp", metric.Gauge, - basemetrics.ALPHA, + basemetrics.BETA, "", wrapStatefulSetFunc(func(s *v1.StatefulSet) *metric.Family { ms := []*metric.Metric{} diff --git a/internal/store/statefulset_test.go b/internal/store/statefulset_test.go index 45d810fb2..33e356056 100644 --- a/internal/store/statefulset_test.go +++ b/internal/store/statefulset_test.go @@ -64,13 +64,13 @@ func TestStatefulSetStore(t *testing.T) { # HELP kube_statefulset_created [STABLE] Unix creation timestamp # HELP kube_statefulset_labels [STABLE] Kubernetes labels converted to Prometheus labels. # HELP kube_statefulset_metadata_generation [STABLE] Sequence number representing a specific generation of the desired state for the StatefulSet. - # HELP kube_statefulset_persistentvolumeclaim_retention_policy Count of retention policy for StatefulSet template PVCs + # HELP kube_statefulset_persistentvolumeclaim_retention_policy [BETA] Count of retention policy for StatefulSet template PVCs # HELP kube_statefulset_replicas [STABLE] Number of desired pods for a StatefulSet. # HELP kube_statefulset_ordinals_start [STABLE] Start ordinal of the StatefulSet. # HELP kube_statefulset_status_current_revision [STABLE] Indicates the version of the StatefulSet used to generate Pods in the sequence [0,currentReplicas). # HELP kube_statefulset_status_observed_generation [STABLE] The generation observed by the StatefulSet controller. # HELP kube_statefulset_status_replicas [STABLE] The number of replicas per StatefulSet. - # HELP kube_statefulset_status_replicas_available The number of available replicas per StatefulSet. + # HELP kube_statefulset_status_replicas_available [BETA] The number of available replicas per StatefulSet. # HELP kube_statefulset_status_replicas_current [STABLE] The number of current replicas per StatefulSet. # HELP kube_statefulset_status_replicas_ready [STABLE] The number of ready replicas per StatefulSet. # HELP kube_statefulset_status_replicas_updated [STABLE] The number of updated replicas per StatefulSet. @@ -146,12 +146,12 @@ func TestStatefulSetStore(t *testing.T) { Want: ` # HELP kube_statefulset_labels [STABLE] Kubernetes labels converted to Prometheus labels. # HELP kube_statefulset_metadata_generation [STABLE] Sequence number representing a specific generation of the desired state for the StatefulSet. - # HELP kube_statefulset_persistentvolumeclaim_retention_policy Count of retention policy for StatefulSet template PVCs + # HELP kube_statefulset_persistentvolumeclaim_retention_policy [BETA] Count of retention policy for StatefulSet template PVCs # HELP kube_statefulset_replicas [STABLE] Number of desired pods for a StatefulSet. # HELP kube_statefulset_status_current_revision [STABLE] Indicates the version of the StatefulSet used to generate Pods in the sequence [0,currentReplicas). # HELP kube_statefulset_status_observed_generation [STABLE] The generation observed by the StatefulSet controller. # HELP kube_statefulset_status_replicas [STABLE] The number of replicas per StatefulSet. - # HELP kube_statefulset_status_replicas_available The number of available replicas per StatefulSet. + # HELP kube_statefulset_status_replicas_available [BETA] The number of available replicas per StatefulSet. # HELP kube_statefulset_status_replicas_current [STABLE] The number of current replicas per StatefulSet. # HELP kube_statefulset_status_replicas_ready [STABLE] The number of ready replicas per StatefulSet. # HELP kube_statefulset_status_replicas_updated [STABLE] The number of updated replicas per StatefulSet. @@ -218,11 +218,11 @@ func TestStatefulSetStore(t *testing.T) { Want: ` # HELP kube_statefulset_labels [STABLE] Kubernetes labels converted to Prometheus labels. # HELP kube_statefulset_metadata_generation [STABLE] Sequence number representing a specific generation of the desired state for the StatefulSet. - # HELP kube_statefulset_persistentvolumeclaim_retention_policy Count of retention policy for StatefulSet template PVCs + # HELP kube_statefulset_persistentvolumeclaim_retention_policy [BETA] Count of retention policy for StatefulSet template PVCs # HELP kube_statefulset_replicas [STABLE] Number of desired pods for a StatefulSet. # HELP kube_statefulset_status_current_revision [STABLE] Indicates the version of the StatefulSet used to generate Pods in the sequence [0,currentReplicas). # HELP kube_statefulset_status_replicas [STABLE] The number of replicas per StatefulSet. - # HELP kube_statefulset_status_replicas_available The number of available replicas per StatefulSet. + # HELP kube_statefulset_status_replicas_available [BETA] The number of available replicas per StatefulSet. # HELP kube_statefulset_status_replicas_current [STABLE] The number of current replicas per StatefulSet. # HELP kube_statefulset_status_replicas_ready [STABLE] The number of ready replicas per StatefulSet. # HELP kube_statefulset_status_replicas_updated [STABLE] The number of updated replicas per StatefulSet. @@ -290,11 +290,11 @@ func TestStatefulSetStore(t *testing.T) { Want: ` # HELP kube_statefulset_labels [STABLE] Kubernetes labels converted to Prometheus labels. # HELP kube_statefulset_metadata_generation [STABLE] Sequence number representing a specific generation of the desired state for the StatefulSet. - # HELP kube_statefulset_persistentvolumeclaim_retention_policy Count of retention policy for StatefulSet template PVCs + # HELP kube_statefulset_persistentvolumeclaim_retention_policy [BETA] Count of retention policy for StatefulSet template PVCs # HELP kube_statefulset_replicas [STABLE] Number of desired pods for a StatefulSet. # HELP kube_statefulset_status_current_revision [STABLE] Indicates the version of the StatefulSet used to generate Pods in the sequence [0,currentReplicas). # HELP kube_statefulset_status_replicas [STABLE] The number of replicas per StatefulSet. - # HELP kube_statefulset_status_replicas_available The number of available replicas per StatefulSet. + # HELP kube_statefulset_status_replicas_available [BETA] The number of available replicas per StatefulSet. # HELP kube_statefulset_status_replicas_current [STABLE] The number of current replicas per StatefulSet. # HELP kube_statefulset_status_replicas_ready [STABLE] The number of ready replicas per StatefulSet. # HELP kube_statefulset_status_replicas_updated [STABLE] The number of updated replicas per StatefulSet. @@ -363,12 +363,12 @@ func TestStatefulSetStore(t *testing.T) { Want: ` # HELP kube_statefulset_labels [STABLE] Kubernetes labels converted to Prometheus labels. # HELP kube_statefulset_metadata_generation [STABLE] Sequence number representing a specific generation of the desired state for the StatefulSet. - # HELP kube_statefulset_persistentvolumeclaim_retention_policy Count of retention policy for StatefulSet template PVCs + # HELP kube_statefulset_persistentvolumeclaim_retention_policy [BETA] Count of retention policy for StatefulSet template PVCs # HELP kube_statefulset_replicas [STABLE] Number of desired pods for a StatefulSet. # HELP kube_statefulset_ordinals_start [STABLE] Start ordinal of the StatefulSet. # HELP kube_statefulset_status_current_revision [STABLE] Indicates the version of the StatefulSet used to generate Pods in the sequence [0,currentReplicas). # HELP kube_statefulset_status_replicas [STABLE] The number of replicas per StatefulSet. - # HELP kube_statefulset_status_replicas_available The number of available replicas per StatefulSet. + # HELP kube_statefulset_status_replicas_available [BETA] The number of available replicas per StatefulSet. # HELP kube_statefulset_status_replicas_current [STABLE] The number of current replicas per StatefulSet. # HELP kube_statefulset_status_replicas_ready [STABLE] The number of ready replicas per StatefulSet. # HELP kube_statefulset_status_replicas_updated [STABLE] The number of updated replicas per StatefulSet. @@ -432,7 +432,7 @@ func TestStatefulSetStore(t *testing.T) { }, }, Want: ` - # HELP kube_statefulset_deletion_timestamp Unix deletion timestamp + # HELP kube_statefulset_deletion_timestamp [BETA] Unix deletion timestamp # TYPE kube_statefulset_deletion_timestamp gauge kube_statefulset_deletion_timestamp{statefulset="statefulset6",namespace="ns6"} 1.8e+09 `,