Skip to content

Commit 0c5e832

Browse files
authored
Merge pull request kubernetes#127154 from macsko/check_if_inflight_events_empty_in_testcase_end_scheduler_perf
Check if InFlightEvents is empty after scheduler_perf workload
2 parents 0d86c02 + 7d4c713 commit 0c5e832

File tree

4 files changed

+96
-12
lines changed

4 files changed

+96
-12
lines changed

pkg/scheduler/framework/events.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,42 @@ var (
105105
WildCardEvent = ClusterEvent{Resource: WildCard, ActionType: All, Label: "WildCardEvent"}
106106
// UnschedulableTimeout is the event when a pod stays in unschedulable for longer than timeout.
107107
UnschedulableTimeout = ClusterEvent{Resource: WildCard, ActionType: All, Label: "UnschedulableTimeout"}
108+
// AllEvents contains all events defined above.
109+
AllEvents = []ClusterEvent{
110+
AssignedPodAdd,
111+
NodeAdd,
112+
NodeDelete,
113+
AssignedPodUpdate,
114+
UnscheduledPodAdd,
115+
UnscheduledPodUpdate,
116+
UnscheduledPodDelete,
117+
assignedPodOtherUpdate,
118+
AssignedPodDelete,
119+
PodRequestScaledDown,
120+
PodLabelChange,
121+
PodTolerationChange,
122+
PodSchedulingGateEliminatedChange,
123+
NodeSpecUnschedulableChange,
124+
NodeAllocatableChange,
125+
NodeLabelChange,
126+
NodeAnnotationChange,
127+
NodeTaintChange,
128+
NodeConditionChange,
129+
PvAdd,
130+
PvUpdate,
131+
PvcAdd,
132+
PvcUpdate,
133+
StorageClassAdd,
134+
StorageClassUpdate,
135+
CSINodeAdd,
136+
CSINodeUpdate,
137+
CSIDriverAdd,
138+
CSIDriverUpdate,
139+
CSIStorageCapacityAdd,
140+
CSIStorageCapacityUpdate,
141+
WildCardEvent,
142+
UnschedulableTimeout,
143+
}
108144
)
109145

110146
// PodSchedulingPropertiesChange interprets the update of a pod and returns corresponding UpdatePodXYZ event(s).

test/integration/scheduler_perf/scheduler_perf.go

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,13 @@ import (
5252
featuregatetesting "k8s.io/component-base/featuregate/testing"
5353
logsapi "k8s.io/component-base/logs/api/v1"
5454
"k8s.io/component-base/metrics/legacyregistry"
55+
"k8s.io/component-base/metrics/testutil"
5556
"k8s.io/klog/v2"
57+
"k8s.io/kubernetes/pkg/features"
5658
"k8s.io/kubernetes/pkg/scheduler/apis/config"
5759
"k8s.io/kubernetes/pkg/scheduler/apis/config/scheme"
5860
"k8s.io/kubernetes/pkg/scheduler/apis/config/validation"
61+
schedframework "k8s.io/kubernetes/pkg/scheduler/framework"
5962
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
6063
frameworkruntime "k8s.io/kubernetes/pkg/scheduler/framework/runtime"
6164
"k8s.io/kubernetes/pkg/scheduler/metrics"
@@ -927,6 +930,13 @@ func RunBenchmarkPerfScheduling(b *testing.B, outOfTreePluginRegistry frameworkr
927930
}
928931
}
929932

933+
if tc.FeatureGates[features.SchedulerQueueingHints] {
934+
// In any case, we should make sure InFlightEvents is empty after running the scenario.
935+
if err = checkEmptyInFlightEvents(); err != nil {
936+
tCtx.Errorf("%s: %s", w.Name, err)
937+
}
938+
}
939+
930940
// Reset metrics to prevent metrics generated in current workload gets
931941
// carried over to the next workload.
932942
legacyregistry.Reset()
@@ -1027,6 +1037,23 @@ func compareMetricWithThreshold(items []DataItem, threshold float64, metricSelec
10271037
return nil
10281038
}
10291039

1040+
func checkEmptyInFlightEvents() error {
1041+
labels := []string{metrics.PodPoppedInFlightEvent}
1042+
for _, event := range schedframework.AllEvents {
1043+
labels = append(labels, event.Label)
1044+
}
1045+
for _, label := range labels {
1046+
value, err := testutil.GetGaugeMetricValue(metrics.InFlightEvents.WithLabelValues(label))
1047+
if err != nil {
1048+
return fmt.Errorf("failed to get InFlightEvents metric for label %s", label)
1049+
}
1050+
if value > 0 {
1051+
return fmt.Errorf("InFlightEvents for label %s should be empty, but has %v items", label, value)
1052+
}
1053+
}
1054+
return nil
1055+
}
1056+
10301057
func runWorkload(tCtx ktesting.TContext, tc *testCase, w *workload, informerFactory informers.SharedInformerFactory) []DataItem {
10311058
b, benchmarking := tCtx.TB().(*testing.B)
10321059
if benchmarking {
@@ -1139,7 +1166,10 @@ func runWorkload(tCtx ktesting.TContext, tc *testCase, w *workload, informerFact
11391166
for _, collector := range collectors {
11401167
// Need loop-local variable for function below.
11411168
collector := collector
1142-
collector.init()
1169+
err = collector.init()
1170+
if err != nil {
1171+
tCtx.Fatalf("op %d: Failed to initialize data collector: %v", opIndex, err)
1172+
}
11431173
collectorWG.Add(1)
11441174
go func() {
11451175
defer collectorWG.Done()
@@ -1205,13 +1235,6 @@ func runWorkload(tCtx ktesting.TContext, tc *testCase, w *workload, informerFact
12051235
}()
12061236
}
12071237

1208-
if !concreteOp.SkipWaitToCompletion {
1209-
// SkipWaitToCompletion=false indicates this step has waited for the Pods to be scheduled.
1210-
// So we reset the metrics in global registry; otherwise metrics gathered in this step
1211-
// will be carried over to next step.
1212-
legacyregistry.Reset()
1213-
}
1214-
12151238
case *churnOp:
12161239
var namespace string
12171240
if concreteOp.Namespace != nil {
@@ -1376,7 +1399,7 @@ func createNamespaceIfNotPresent(tCtx ktesting.TContext, namespace string, podsP
13761399
}
13771400

13781401
type testDataCollector interface {
1379-
init()
1402+
init() error
13801403
run(tCtx ktesting.TContext)
13811404
collect() []DataItem
13821405
}

test/integration/scheduler_perf/scheduler_test.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ package benchmark
1818

1919
import (
2020
"testing"
21+
22+
"k8s.io/component-base/metrics/legacyregistry"
23+
"k8s.io/kubernetes/pkg/features"
2124
)
2225

2326
func TestScheduling(t *testing.T) {
@@ -43,6 +46,17 @@ func TestScheduling(t *testing.T) {
4346
informerFactory, tCtx := setupTestCase(t, tc, nil, nil)
4447

4548
runWorkload(tCtx, tc, w, informerFactory)
49+
50+
if tc.FeatureGates[features.SchedulerQueueingHints] {
51+
// In any case, we should make sure InFlightEvents is empty after running the scenario.
52+
if err = checkEmptyInFlightEvents(); err != nil {
53+
tCtx.Errorf("%s: %s", w.Name, err)
54+
}
55+
}
56+
57+
// Reset metrics to prevent metrics generated in current workload gets
58+
// carried over to the next workload.
59+
legacyregistry.Reset()
4660
})
4761
}
4862
})

test/integration/scheduler_perf/util.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -263,9 +263,19 @@ func newMetricsCollector(config *metricsCollectorConfig, labels map[string]strin
263263
}
264264
}
265265

266-
func (mc *metricsCollector) init() {
266+
func (mc *metricsCollector) init() error {
267267
// Reset the metrics so that the measurements do not interfere with those collected during the previous steps.
268-
legacyregistry.Reset()
268+
m, err := legacyregistry.DefaultGatherer.Gather()
269+
if err != nil {
270+
return fmt.Errorf("failed to gather metrics to reset: %w", err)
271+
}
272+
for _, mFamily := range m {
273+
// Reset only metrics defined in the collector.
274+
if _, ok := mc.Metrics[mFamily.GetName()]; ok {
275+
mFamily.Reset()
276+
}
277+
}
278+
return nil
269279
}
270280

271281
func (*metricsCollector) run(tCtx ktesting.TContext) {
@@ -381,7 +391,8 @@ func newThroughputCollector(podInformer coreinformers.PodInformer, labels map[st
381391
}
382392
}
383393

384-
func (tc *throughputCollector) init() {
394+
func (tc *throughputCollector) init() error {
395+
return nil
385396
}
386397

387398
func (tc *throughputCollector) run(tCtx ktesting.TContext) {

0 commit comments

Comments
 (0)