Skip to content

Commit abe5903

Browse files
committed
implement a PrometheusSchedulingMetrics Measurement and Gatherer
1 parent 03038c5 commit abe5903

File tree

3 files changed

+229
-85
lines changed

3 files changed

+229
-85
lines changed
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package common
18+
19+
import (
20+
"fmt"
21+
"time"
22+
23+
"k8s.io/klog/v2"
24+
"k8s.io/perf-tests/clusterloader2/pkg/measurement"
25+
measurementutil "k8s.io/perf-tests/clusterloader2/pkg/measurement/util"
26+
"k8s.io/perf-tests/clusterloader2/pkg/util"
27+
)
28+
29+
const (
30+
// prometheusSchedulingMetricsMeasurementName is the identifier used in ClusterLoader2 config files.
31+
// It intentionally differs from schedulerLatencyMetricName to let users choose which implementation
32+
// they want to run (direct scrape vs Prometheus based) without changing the output payload.
33+
prometheusSchedulingMetricsMeasurementName = "PrometheusSchedulingMetrics"
34+
)
35+
36+
func init() {
37+
create := func() measurement.Measurement {
38+
return CreatePrometheusMeasurement(&prometheusSchedulerLatencyGatherer{})
39+
}
40+
if err := measurement.Register(prometheusSchedulingMetricsMeasurementName, create); err != nil {
41+
klog.Fatalf("Cannot register %s: %v", prometheusSchedulingMetricsMeasurementName, err)
42+
}
43+
}
44+
45+
// prometheusSchedulerLatencyGatherer implements the Gatherer interface and generates the same
46+
// JSON summary as scheduler_latency.go but sources the histogram data from Prometheus instead of
47+
// scraping kube-scheduler directly.
48+
//
49+
// The Gather method uses Prometheus' increase() function to calculate bucket increments over the
50+
// test window [startTime, endTime). Those increments are then converted into Histogram structures
51+
// identical to the ones produced by the in-cluster scrape implementation, after which existing
52+
// helper code is re-used to compute the 50th/90th/99th percentiles.
53+
//
54+
// No parameters are required – the measurement automatically queries all buckets for the four
55+
// relevant histograms.
56+
//
57+
// NOTE: This measurement assumes that the kube-scheduler metrics endpoint is being scraped by
58+
// Prometheus and that the metric names match the upstream defaults.
59+
60+
type prometheusSchedulerLatencyGatherer struct{}
61+
62+
func (g *prometheusSchedulerLatencyGatherer) Configure(_ *measurement.Config) error { return nil }
63+
func (g *prometheusSchedulerLatencyGatherer) IsEnabled(_ *measurement.Config) bool { return true }
64+
func (g *prometheusSchedulerLatencyGatherer) String() string {
65+
return prometheusSchedulingMetricsMeasurementName
66+
}
67+
68+
func (g *prometheusSchedulerLatencyGatherer) Gather(executor QueryExecutor, startTime, endTime time.Time, config *measurement.Config) ([]measurement.Summary, error) {
69+
window := endTime.Sub(startTime)
70+
promWindow := measurementutil.ToPrometheusTime(window)
71+
72+
queryHistogram := func(metric string, labelFilter string) (*measurementutil.Histogram, error) {
73+
var query string
74+
if labelFilter != "" {
75+
query = fmt.Sprintf(`sum(increase(%s{%s}[%s])) by (le)`, metric, labelFilter, promWindow)
76+
} else {
77+
query = fmt.Sprintf(`sum(increase(%s[%s])) by (le)`, metric, promWindow)
78+
}
79+
80+
samples, err := executor.Query(query, endTime)
81+
if err != nil {
82+
return nil, fmt.Errorf("failed to execute query %q: %w", query, err)
83+
}
84+
85+
hist := measurementutil.NewHistogram(nil)
86+
for _, s := range samples {
87+
measurementutil.ConvertSampleToHistogram(s, hist)
88+
}
89+
return hist, nil
90+
}
91+
92+
metrics := schedulerLatencyMetrics{
93+
e2eSchedulingDurationHist: measurementutil.NewHistogram(nil),
94+
schedulingAlgorithmDurationHist: measurementutil.NewHistogram(nil),
95+
preemptionEvaluationHist: measurementutil.NewHistogram(nil),
96+
frameworkExtensionPointDurationHist: make(map[string]*measurementutil.Histogram),
97+
}
98+
99+
for _, ep := range extentionsPoints {
100+
metrics.frameworkExtensionPointDurationHist[ep] = measurementutil.NewHistogram(nil)
101+
}
102+
103+
var errList []error
104+
105+
if h, err := queryHistogram(string(e2eSchedulingDurationMetricName), ""); err != nil {
106+
errList = append(errList, err)
107+
} else {
108+
metrics.e2eSchedulingDurationHist = h
109+
}
110+
111+
if h, err := queryHistogram(string(schedulingAlgorithmDurationMetricName), ""); err != nil {
112+
errList = append(errList, err)
113+
} else {
114+
metrics.schedulingAlgorithmDurationHist = h
115+
}
116+
117+
if h, err := queryHistogram(string(preemptionEvaluationMetricName), ""); err != nil {
118+
errList = append(errList, err)
119+
} else {
120+
metrics.preemptionEvaluationHist = h
121+
}
122+
123+
for _, ep := range extentionsPoints {
124+
labelSel := fmt.Sprintf(`extension_point="%s"`, ep)
125+
h, err := queryHistogram(string(frameworkExtensionPointDurationMetricName), labelSel)
126+
if err != nil {
127+
errList = append(errList, err)
128+
continue
129+
}
130+
metrics.frameworkExtensionPointDurationHist[ep] = h
131+
}
132+
133+
if len(errList) > 0 {
134+
return nil, fmt.Errorf("prometheus scheduler latency gathering errors: %v", errList)
135+
}
136+
137+
slm := &schedulerLatencyMeasurement{}
138+
result, err := slm.setQuantiles(metrics)
139+
if err != nil {
140+
return nil, fmt.Errorf("failed to compute quantiles: %w", err)
141+
}
142+
143+
content, err := util.PrettyPrintJSON(result)
144+
if err != nil {
145+
return nil, err
146+
}
147+
148+
summaries := []measurement.Summary{
149+
measurement.CreateSummary(config.Identifier+"_"+prometheusSchedulingMetricsMeasurementName, "json", content),
150+
}
151+
return summaries, nil
152+
}

clusterloader2/testing/dra/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,14 @@ export CL2_FILL_PERCENTAGE=90 # Cluster fill percentage
2222

2323
2. Run the test with:
2424
```
25+
# Make sure a Prometheus stack is deployed so that metric-based measurements work.
26+
2527
./run-e2e.sh cluster-loader2 \
2628
--provider=kind \
2729
--kubeconfig=/root/.kube/config \
2830
--report-dir=/tmp/clusterloader2-results \
2931
--testconfig=testing/dra/config.yaml \
32+
--enable-prometheus-server=true \
3033
--nodes=5
3134
```
3235

clusterloader2/testing/dra/config.yaml

Lines changed: 74 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -45,71 +45,64 @@ dependencies:
4545
steps:
4646
- name: Start measurements
4747
measurements:
48-
- Identifier: WaitForFinishedJobs
49-
Method: WaitForFinishedJobs
50-
Params:
51-
action: start
52-
labelSelector: job-type = short-lived
53-
- Identifier: WaitForControlledPodsRunning
54-
Method: WaitForControlledPodsRunning
55-
Params:
56-
action: start
57-
apiVersion: batch/v1
58-
kind: Job
59-
labelSelector: job-type = long-running
60-
operationTimeout: 120s
61-
- Identifier: FastFillSchedulingMetrics
62-
Method: SchedulingMetrics
63-
Params:
64-
action: start
65-
- Identifier: FastFillPodStartupLatency
66-
Method: PodStartupLatency
67-
Params:
68-
action: start
69-
labelSelector: job-type = long-running
70-
threshold: 20s
71-
- name: Clearing SchedulingMetrics
72-
measurements:
73-
- Identifier: FastFillSchedulingMetrics
74-
Method: SchedulingMetrics
75-
Params:
76-
action: reset
48+
- Identifier: WaitForFinishedJobs
49+
Method: WaitForFinishedJobs
50+
Params:
51+
action: start
52+
labelSelector: job-type = short-lived
53+
- Identifier: WaitForControlledPodsRunning
54+
Method: WaitForControlledPodsRunning
55+
Params:
56+
action: start
57+
apiVersion: batch/v1
58+
kind: Job
59+
labelSelector: job-type = long-running
60+
operationTimeout: 120s
61+
- Identifier: FastFillPodStartupLatency
62+
Method: PodStartupLatency
63+
Params:
64+
action: start
65+
labelSelector: job-type = long-running
66+
- Identifier: FastFillSchedulingMetrics
67+
Method: PrometheusSchedulingMetrics
68+
Params:
69+
action: start
7770
- name: Create ResourceClaimTemplates in namespaces
7871
phases:
79-
- namespaceRange:
80-
min: 1
81-
max: {{$namespaces}}
82-
replicasPerNamespace: 1
83-
tuningSet: FastFill
84-
objectBundle:
85-
- basename: single-gpu
86-
objectTemplatePath: "resourceclaimtemplate.yaml"
72+
- namespaceRange:
73+
min: 1
74+
max: {{$namespaces}}
75+
replicasPerNamespace: 1
76+
tuningSet: FastFill
77+
objectBundle:
78+
- basename: single-gpu
79+
objectTemplatePath: "resourceclaimtemplate.yaml"
8780
- name: Fill cluster to {{$fillPercentage}}% utilization
8881
phases:
89-
- namespaceRange:
90-
min: 1
91-
max: {{$namespaces}}
92-
replicasPerNamespace: {{$fillPodsPerNamespace}}
93-
tuningSet: FastFill
94-
objectBundle:
95-
- basename: long-running
96-
objectTemplatePath: "long-running-job.yaml"
97-
templateFillMap:
98-
Replicas: {{$longJobSize}}
99-
Mode: {{$MODE}}
100-
Sleep: {{$longJobRunningTime}}
82+
- namespaceRange:
83+
min: 1
84+
max: {{$namespaces}}
85+
replicasPerNamespace: {{$fillPodsPerNamespace}}
86+
tuningSet: FastFill
87+
objectBundle:
88+
- basename: long-running
89+
objectTemplatePath: "long-running-job.yaml"
90+
templateFillMap:
91+
Replicas: {{$longJobSize}}
92+
Mode: {{$MODE}}
93+
Sleep: {{$longJobRunningTime}}
10194
- name: Wait for fill pods to be running
10295
measurements:
103-
- Identifier: WaitForControlledPodsRunning
104-
Method: WaitForControlledPodsRunning
105-
Params:
106-
action: gather
107-
labelSelector: job-type = long-running
108-
timeout: 15m
96+
- Identifier: WaitForControlledPodsRunning
97+
Method: WaitForControlledPodsRunning
98+
Params:
99+
action: gather
100+
labelSelector: job-type = long-running
101+
timeout: 15m
109102
- name: Gather measurements for long running pods
110103
measurements:
111104
- Identifier: FastFillSchedulingMetrics
112-
Method: SchedulingMetrics
105+
Method: PrometheusSchedulingMetrics
113106
Params:
114107
action: gather
115108
- Identifier: FastFillPodStartupLatency
@@ -119,13 +112,9 @@ steps:
119112
- name: reset metrics for steady state churn
120113
measurements:
121114
- Identifier: ChurnSchedulingMetrics
122-
Method: SchedulingMetrics
115+
Method: PrometheusSchedulingMetrics
123116
Params:
124117
action: start
125-
- Identifier: ChurnSchedulingMetrics
126-
Method: SchedulingMetrics
127-
Params:
128-
action: reset
129118
- Identifier: ChurnPodStartupLatency
130119
Method: PodStartupLatency
131120
Params:
@@ -136,19 +125,19 @@ steps:
136125
perc99Threshold: 80s
137126
- name: Create steady state {{$MODE}} jobs
138127
phases:
139-
- namespaceRange:
140-
min: 1
141-
max: {{$namespaces}}
142-
replicasPerNamespace: {{$smallJobsPerNamespace}}
143-
tuningSet: SteadyState
144-
objectBundle:
145-
- basename: small
146-
objectTemplatePath: "job.yaml"
147-
templateFillMap:
148-
Replicas: {{$smallJobSize}}
149-
CompletionReplicas: {{$smallJobCompletions}}
150-
Mode: {{$MODE}}
151-
Sleep: {{$jobRunningTime}}
128+
- namespaceRange:
129+
min: 1
130+
max: {{$namespaces}}
131+
replicasPerNamespace: {{$smallJobsPerNamespace}}
132+
tuningSet: SteadyState
133+
objectBundle:
134+
- basename: small
135+
objectTemplatePath: "job.yaml"
136+
templateFillMap:
137+
Replicas: {{$smallJobSize}}
138+
CompletionReplicas: {{$smallJobCompletions}}
139+
Mode: {{$MODE}}
140+
Sleep: {{$jobRunningTime}}
152141
- name: Wait for short-lived jobs to finish
153142
measurements:
154143
- Identifier: WaitForFinishedJobs
@@ -159,14 +148,14 @@ steps:
159148
timeout: 15m
160149
- name: Measure scheduler metrics
161150
measurements:
162-
- Identifier: ChurnSchedulingMetrics
163-
Method: SchedulingMetrics
164-
Params:
165-
action: gather
166-
- Identifier: ChurnPodStartupLatency
167-
Method: PodStartupLatency
168-
Params:
169-
action: gather
170-
perc50Threshold: 40s
171-
perc90Threshold: 60s
172-
perc99Threshold: 80s
151+
- Identifier: ChurnSchedulingMetrics
152+
Method: PrometheusSchedulingMetrics
153+
Params:
154+
action: gather
155+
- Identifier: ChurnPodStartupLatency
156+
Method: PodStartupLatency
157+
Params:
158+
action: gather
159+
perc50Threshold: 40s
160+
perc90Threshold: 60s
161+
perc99Threshold: 80s

0 commit comments

Comments
 (0)