Skip to content

Commit 79a7088

Browse files
authored
Merge pull request kubernetes#92202 from alculquicondor/profile-metrics
Add profile label to schedule_attempts_total metric and e2e_scheduling_duration_seconds
2 parents a7e4973 + eb9711d commit 79a7088

File tree

6 files changed

+74
-34
lines changed

6 files changed

+74
-34
lines changed

pkg/scheduler/metrics/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ go_library(
77
srcs = [
88
"metric_recorder.go",
99
"metrics.go",
10+
"profile_metrics.go",
1011
],
1112
importpath = "k8s.io/kubernetes/pkg/scheduler/metrics",
1213
deps = [

pkg/scheduler/metrics/metrics.go

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -54,16 +54,7 @@ var (
5454
Name: "schedule_attempts_total",
5555
Help: "Number of attempts to schedule pods, by the result. 'unschedulable' means a pod could not be scheduled, while 'error' means an internal scheduler problem.",
5656
StabilityLevel: metrics.ALPHA,
57-
}, []string{"result"})
58-
// PodScheduleSuccesses counts how many pods were scheduled.
59-
// This metric will be initialized again in Register() to assure the metric is not no-op metric.
60-
PodScheduleSuccesses = scheduleAttempts.With(metrics.Labels{"result": "scheduled"})
61-
// PodScheduleFailures counts how many pods could not be scheduled.
62-
// This metric will be initialized again in Register() to assure the metric is not no-op metric.
63-
PodScheduleFailures = scheduleAttempts.With(metrics.Labels{"result": "unschedulable"})
64-
// PodScheduleErrors counts how many pods could not be scheduled due to a scheduler error.
65-
// This metric will be initialized again in Register() to assure the metric is not no-op metric.
66-
PodScheduleErrors = scheduleAttempts.With(metrics.Labels{"result": "error"})
57+
}, []string{"result", "profile"})
6758
DeprecatedSchedulingDuration = metrics.NewSummaryVec(
6859
&metrics.SummaryOpts{
6960
Subsystem: SchedulerSubsystem,
@@ -77,15 +68,14 @@ var (
7768
},
7869
[]string{OperationLabel},
7970
)
80-
E2eSchedulingLatency = metrics.NewHistogram(
71+
e2eSchedulingLatency = metrics.NewHistogramVec(
8172
&metrics.HistogramOpts{
8273
Subsystem: SchedulerSubsystem,
8374
Name: "e2e_scheduling_duration_seconds",
8475
Help: "E2e scheduling latency in seconds (scheduling algorithm + binding)",
8576
Buckets: metrics.ExponentialBuckets(0.001, 2, 15),
8677
StabilityLevel: metrics.ALPHA,
87-
},
88-
)
78+
}, []string{"result", "profile"})
8979
SchedulingAlgorithmLatency = metrics.NewHistogram(
9080
&metrics.HistogramOpts{
9181
Subsystem: SchedulerSubsystem,
@@ -235,7 +225,7 @@ var (
235225
metricsList = []metrics.Registerable{
236226
scheduleAttempts,
237227
DeprecatedSchedulingDuration,
238-
E2eSchedulingLatency,
228+
e2eSchedulingLatency,
239229
SchedulingAlgorithmLatency,
240230
BindingLatency,
241231
DeprecatedSchedulingAlgorithmPredicateEvaluationSecondsDuration,
@@ -263,9 +253,6 @@ func Register() {
263253
registerMetrics.Do(func() {
264254
RegisterMetrics(metricsList...)
265255
volumeschedulingmetrics.RegisterVolumeSchedulingMetrics()
266-
PodScheduleSuccesses = scheduleAttempts.With(metrics.Labels{"result": "scheduled"})
267-
PodScheduleFailures = scheduleAttempts.With(metrics.Labels{"result": "unschedulable"})
268-
PodScheduleErrors = scheduleAttempts.With(metrics.Labels{"result": "error"})
269256
})
270257
}
271258

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
Copyright 2020 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package metrics
18+
19+
// This file contains helpers for metrics that are associated to a profile.
20+
21+
var (
22+
scheduledResult = "scheduled"
23+
unschedulableResult = "unschedulable"
24+
errorResult = "error"
25+
)
26+
27+
// PodScheduled can records a successful scheduling attempt and the duration
28+
// since `start`.
29+
func PodScheduled(profile string, duration float64) {
30+
observeScheduleAttemptAndLatency(scheduledResult, profile, duration)
31+
}
32+
33+
// PodUnschedulable can records a scheduling attempt for an unschedulable pod
34+
// and the duration since `start`.
35+
func PodUnschedulable(profile string, duration float64) {
36+
observeScheduleAttemptAndLatency(unschedulableResult, profile, duration)
37+
}
38+
39+
// PodScheduleError can records a scheduling attempt that had an error and the
40+
// duration since `start`.
41+
func PodScheduleError(profile string, duration float64) {
42+
observeScheduleAttemptAndLatency(errorResult, profile, duration)
43+
}
44+
45+
func observeScheduleAttemptAndLatency(result, profile string, duration float64) {
46+
e2eSchedulingLatency.WithLabelValues(result, profile).Observe(duration)
47+
scheduleAttempts.WithLabelValues(result, profile).Inc()
48+
}

pkg/scheduler/profile/profile.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,19 +40,22 @@ type FrameworkFactory func(config.KubeSchedulerProfile, ...frameworkruntime.Opti
4040
type Profile struct {
4141
framework.Framework
4242
Recorder events.EventRecorder
43+
Name string
4344
}
4445

4546
// NewProfile builds a Profile for the given configuration.
4647
func NewProfile(cfg config.KubeSchedulerProfile, frameworkFact FrameworkFactory, recorderFact RecorderFactory,
4748
opts ...frameworkruntime.Option) (*Profile, error) {
48-
r := recorderFact(cfg.SchedulerName)
49-
f, err := frameworkFact(cfg, append(opts, frameworkruntime.WithEventRecorder(r))...)
49+
recorder := recorderFact(cfg.SchedulerName)
50+
opts = append(opts, frameworkruntime.WithEventRecorder(recorder))
51+
fwk, err := frameworkFact(cfg, opts...)
5052
if err != nil {
5153
return nil, err
5254
}
5355
return &Profile{
54-
Framework: f,
55-
Recorder: r,
56+
Name: cfg.SchedulerName,
57+
Framework: fwk,
58+
Recorder: recorder,
5659
}, nil
5760
}
5861

pkg/scheduler/scheduler.go

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -506,13 +506,13 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
506506
// Pod did not fit anywhere, so it is counted as a failure. If preemption
507507
// succeeds, the pod should get counted as a success the next time we try to
508508
// schedule it. (hopefully)
509-
metrics.PodScheduleFailures.Inc()
509+
metrics.PodUnschedulable(prof.Name, metrics.SinceInSeconds(start))
510510
} else if err == core.ErrNoNodesAvailable {
511511
// No nodes available is counted as unschedulable rather than an error.
512-
metrics.PodScheduleFailures.Inc()
512+
metrics.PodUnschedulable(prof.Name, metrics.SinceInSeconds(start))
513513
} else {
514514
klog.ErrorS(err, "Error selecting node for pod", "pod", klog.KObj(pod))
515-
metrics.PodScheduleErrors.Inc()
515+
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
516516
}
517517
sched.recordSchedulingFailure(prof, podInfo, err, v1.PodReasonUnschedulable, nominatedNode)
518518
return
@@ -526,7 +526,7 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
526526
// Run "reserve" plugins.
527527
if sts := prof.RunReservePlugins(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost); !sts.IsSuccess() {
528528
sched.recordSchedulingFailure(prof, assumedPodInfo, sts.AsError(), SchedulerError, "")
529-
metrics.PodScheduleErrors.Inc()
529+
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
530530
return
531531
}
532532

@@ -539,7 +539,7 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
539539
// to a node and if so will not add it back to the unscheduled pods queue
540540
// (otherwise this would cause an infinite loop).
541541
sched.recordSchedulingFailure(prof, assumedPodInfo, err, SchedulerError, "")
542-
metrics.PodScheduleErrors.Inc()
542+
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
543543
// trigger un-reserve plugins to clean up state associated with the reserved Pod
544544
prof.RunUnreservePlugins(schedulingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
545545
return
@@ -550,10 +550,10 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
550550
if runPermitStatus.Code() != framework.Wait && !runPermitStatus.IsSuccess() {
551551
var reason string
552552
if runPermitStatus.IsUnschedulable() {
553-
metrics.PodScheduleFailures.Inc()
553+
metrics.PodUnschedulable(prof.Name, metrics.SinceInSeconds(start))
554554
reason = v1.PodReasonUnschedulable
555555
} else {
556-
metrics.PodScheduleErrors.Inc()
556+
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
557557
reason = SchedulerError
558558
}
559559
if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
@@ -576,10 +576,10 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
576576
if !waitOnPermitStatus.IsSuccess() {
577577
var reason string
578578
if waitOnPermitStatus.IsUnschedulable() {
579-
metrics.PodScheduleFailures.Inc()
579+
metrics.PodUnschedulable(prof.Name, metrics.SinceInSeconds(start))
580580
reason = v1.PodReasonUnschedulable
581581
} else {
582-
metrics.PodScheduleErrors.Inc()
582+
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
583583
reason = SchedulerError
584584
}
585585
if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
@@ -595,7 +595,7 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
595595
preBindStatus := prof.RunPreBindPlugins(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
596596
if !preBindStatus.IsSuccess() {
597597
var reason string
598-
metrics.PodScheduleErrors.Inc()
598+
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
599599
reason = SchedulerError
600600
if forgetErr := sched.Cache().ForgetPod(assumedPod); forgetErr != nil {
601601
klog.Errorf("scheduler cache ForgetPod failed: %v", forgetErr)
@@ -607,9 +607,8 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
607607
}
608608

609609
err := sched.bind(bindingCycleCtx, prof, assumedPod, scheduleResult.SuggestedHost, state)
610-
metrics.E2eSchedulingLatency.Observe(metrics.SinceInSeconds(start))
611610
if err != nil {
612-
metrics.PodScheduleErrors.Inc()
611+
metrics.PodScheduleError(prof.Name, metrics.SinceInSeconds(start))
613612
// trigger un-reserve plugins to clean up state associated with the reserved Pod
614613
prof.RunUnreservePlugins(bindingCycleCtx, state, assumedPod, scheduleResult.SuggestedHost)
615614
sched.recordSchedulingFailure(prof, assumedPodInfo, fmt.Errorf("Binding rejected: %v", err), SchedulerError, "")
@@ -619,7 +618,7 @@ func (sched *Scheduler) scheduleOne(ctx context.Context) {
619618
klog.InfoS("Successfully bound pod to node", "pod", klog.KObj(pod), "node", scheduleResult.SuggestedHost, "evaluatedNodes", scheduleResult.EvaluatedNodes, "feasibleNodes", scheduleResult.FeasibleNodes)
620619
}
621620

622-
metrics.PodScheduleSuccesses.Inc()
621+
metrics.PodScheduled(prof.Name, metrics.SinceInSeconds(start))
623622
metrics.PodSchedulingAttempts.Observe(float64(podInfo.Attempts))
624623
metrics.PodSchedulingDuration.Observe(metrics.SinceInSeconds(podInfo.InitialAttemptTimestamp))
625624

pkg/scheduler/scheduler_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@ func TestSchedulerScheduleOne(t *testing.T) {
319319
testSchedulerName: &profile.Profile{
320320
Framework: fwk,
321321
Recorder: eventBroadcaster.NewRecorder(scheme.Scheme, testSchedulerName),
322+
Name: testSchedulerName,
322323
},
323324
},
324325
}
@@ -770,6 +771,7 @@ func setupTestScheduler(queuedPodStore *clientcache.FIFO, scache internalcache.C
770771
prof := &profile.Profile{
771772
Framework: fwk,
772773
Recorder: &events.FakeRecorder{},
774+
Name: testSchedulerName,
773775
}
774776
if broadcaster != nil {
775777
prof.Recorder = broadcaster.NewRecorder(scheme.Scheme, testSchedulerName)

0 commit comments

Comments
 (0)