Skip to content

Commit 3c44e11

Browse files
kubelet: Record preemptions similarly to evictions
A preemption is a disruption event that should have a metric so that the rate of preemption can be assessed. Nodes that are under heavy preemption may have conflicting workloads or otherwise need attention. A sudden burst of preemption on a cluster in steady state could indicate pathological conditions within the scheduler or workload controllers.
1 parent 9c25b16 commit 3c44e11

File tree

3 files changed

+24
-2
lines changed

3 files changed

+24
-2
lines changed

pkg/kubelet/metrics/metrics.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@ package metrics
1818

1919
import (
2020
"fmt"
21-
"k8s.io/component-base/metrics"
22-
"k8s.io/component-base/metrics/legacyregistry"
2321
"sync"
2422
"time"
2523

24+
"k8s.io/component-base/metrics"
25+
"k8s.io/component-base/metrics/legacyregistry"
26+
2627
corev1 "k8s.io/api/core/v1"
2728
"k8s.io/apimachinery/pkg/types"
2829
utilfeature "k8s.io/apiserver/pkg/util/feature"
@@ -45,6 +46,7 @@ const (
4546
PLEGRelistIntervalKey = "pleg_relist_interval_seconds"
4647
EvictionsKey = "evictions"
4748
EvictionStatsAgeKey = "eviction_stats_age_seconds"
49+
PreemptionsKey = "preemptions"
4850
DeprecatedPodWorkerLatencyKey = "pod_worker_latency_microseconds"
4951
DeprecatedPodStartLatencyKey = "pod_start_latency_microseconds"
5052
DeprecatedCgroupManagerOperationsKey = "cgroup_manager_latency_microseconds"
@@ -242,6 +244,18 @@ var (
242244
},
243245
[]string{"eviction_signal"},
244246
)
247+
// Preemptions is a Counter that tracks the cumulative number of pod preemptions initiated by the kubelet.
248+
// Broken down by preemption signal. A preemption is only recorded for one resource, the sum of all signals
249+
// is the number of preemptions on the given node.
250+
Preemptions = metrics.NewCounterVec(
251+
&metrics.CounterOpts{
252+
Subsystem: KubeletSubsystem,
253+
Name: PreemptionsKey,
254+
Help: "Cumulative number of pod preemptions by preemption resource",
255+
StabilityLevel: metrics.ALPHA,
256+
},
257+
[]string{"preemption_signal"},
258+
)
245259
// DevicePluginRegistrationCount is a Counter that tracks the cumulative number of device plugin registrations.
246260
// Broken down by resource name.
247261
DevicePluginRegistrationCount = metrics.NewCounterVec(
@@ -502,6 +516,7 @@ func Register(containerCache kubecontainer.RuntimeCache, collectors ...metrics.C
502516
legacyregistry.MustRegister(RuntimeOperationsErrors)
503517
legacyregistry.MustRegister(Evictions)
504518
legacyregistry.MustRegister(EvictionStatsAge)
519+
legacyregistry.MustRegister(Preemptions)
505520
legacyregistry.MustRegister(DevicePluginRegistrationCount)
506521
legacyregistry.MustRegister(DevicePluginAllocationDuration)
507522
legacyregistry.MustRegister(DeprecatedPodWorkerLatency)

pkg/kubelet/preemption/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ go_library(
1616
"//pkg/kubelet/events:go_default_library",
1717
"//pkg/kubelet/eviction:go_default_library",
1818
"//pkg/kubelet/lifecycle:go_default_library",
19+
"//pkg/kubelet/metrics:go_default_library",
1920
"//pkg/kubelet/types:go_default_library",
2021
"//pkg/kubelet/util/format:go_default_library",
2122
"//pkg/scheduler/algorithm/predicates:go_default_library",

pkg/kubelet/preemption/preemption.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"k8s.io/kubernetes/pkg/kubelet/events"
2929
"k8s.io/kubernetes/pkg/kubelet/eviction"
3030
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
31+
"k8s.io/kubernetes/pkg/kubelet/metrics"
3132
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
3233
"k8s.io/kubernetes/pkg/kubelet/util/format"
3334
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
@@ -111,6 +112,11 @@ func (c *CriticalPodAdmissionHandler) evictPodsToFreeRequests(admitPod *v1.Pod,
111112
// In future syncPod loops, the kubelet will retry the pod deletion steps that it was stuck on.
112113
continue
113114
}
115+
if len(insufficientResources) > 0 {
116+
metrics.Preemptions.WithLabelValues(insufficientResources[0].resourceName.String()).Inc()
117+
} else {
118+
metrics.Preemptions.WithLabelValues("").Inc()
119+
}
114120
klog.Infof("preemption: pod %s evicted successfully", format.Pod(pod))
115121
}
116122
return nil

0 commit comments

Comments
 (0)