Skip to content

Commit 15a9430

Browse files
authored
Merge pull request kubernetes#92650 from ahg-g/ahg-attempts
breakdown PodSchedulingDuration by number of attempts
2 parents d71a092 + d1ea49b commit 15a9430

File tree

2 files changed

+13
-4
lines changed

2 files changed

+13
-4
lines changed

pkg/scheduler/metrics/metrics.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,15 +154,16 @@ var (
154154
StabilityLevel: metrics.ALPHA,
155155
}, []string{"work"})
156156

157-
PodSchedulingDuration = metrics.NewHistogram(
157+
PodSchedulingDuration = metrics.NewHistogramVec(
158158
&metrics.HistogramOpts{
159159
Subsystem: SchedulerSubsystem,
160160
Name: "pod_scheduling_duration_seconds",
161161
Help: "E2e latency for a pod being scheduled which may include multiple scheduling attempts.",
162162
// Start with 1ms with the last bucket being [~16s, Inf)
163163
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
164164
StabilityLevel: metrics.ALPHA,
165-
})
165+
},
166+
[]string{"attempts"})
166167

167168
PodSchedulingAttempts = metrics.NewHistogram(
168169
&metrics.HistogramOpts{

pkg/scheduler/scheduler.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -612,17 +612,25 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
612612
if klog.V(2).Enabled() {
613613
klog.InfoS("Successfully bound pod to node", "pod", klog.KObj(pod), "node", scheduleResult.SuggestedHost, "evaluatedNodes", scheduleResult.EvaluatedNodes, "feasibleNodes", scheduleResult.FeasibleNodes)
614614
}
615-
616615
metrics.PodScheduled(prof.Name, metrics.SinceInSeconds(start))
617616
metrics.PodSchedulingAttempts.Observe(float64(podInfo.Attempts))
618-
metrics.PodSchedulingDuration.Observe(metrics.SinceInSeconds(podInfo.InitialAttemptTimestamp))
617+
metrics.PodSchedulingDuration.WithLabelValues(getAttemptsLabel(podInfo)).Observe(metrics.SinceInSeconds(podInfo.InitialAttemptTimestamp))
619618

620619
// Run "postbind" plugins.
621620
prof.RunPostBindPlugins(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
622621
}
623622
}()
624623
}
625624

625+
func getAttemptsLabel(p *framework.QueuedPodInfo) string {
626+
// We breakdown the pod scheduling duration by attempts capped to a limit
627+
// to avoid ending up with a high cardinality metric.
628+
if p.Attempts >= 15 {
629+
return "15+"
630+
}
631+
return string(p.Attempts)
632+
}
633+
626634
func (sched *Scheduler) profileForPod(pod *v1.Pod) (*profile.Profile, error) {
627635
prof, ok := sched.Profiles[pod.Spec.SchedulerName]
628636
if !ok {

0 commit comments

Comments
 (0)