yongruilin
diff --git a/‎staging/src/k8s.io/component-base/metrics/testutil/metrics.go‎
Lines changed: 46 additions & 6 deletions b/‎staging/src/k8s.io/component-base/metrics/testutil/metrics.go‎
Lines changed: 46 additions & 6 deletions
diff --git a/‎staging/src/k8s.io/component-base/metrics/testutil/metrics_test.go‎
Lines changed: 102 additions & 0 deletions b/‎staging/src/k8s.io/component-base/metrics/testutil/metrics_test.go‎
Lines changed: 102 additions & 0 deletions
diff --git a/‎test/integration/scheduler_perf/README.md‎
Lines changed: 19 additions & 0 deletions b/‎test/integration/scheduler_perf/README.md‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎test/integration/scheduler_perf/config/performance-config.yaml‎
Lines changed: 122 additions & 1 deletion b/‎test/integration/scheduler_perf/config/performance-config.yaml‎
Lines changed: 122 additions & 1 deletion
diff --git a/‎test/integration/scheduler_perf/create.go‎
Lines changed: 0 additions & 3 deletions b/‎test/integration/scheduler_perf/create.go‎
Lines changed: 0 additions & 3 deletions
@@ -258,12 +258,8 @@ func GetHistogramVecFromGatherer(gatherer metrics.Gatherer, metricName string, l
 	if err != nil {
 		return nil, err
 	}
-	for _, mFamily := range m {
-		if mFamily.GetName() == metricName {
-			metricFamily = mFamily
-			break
-		}
-	}
+
+	metricFamily = findMetricFamily(m, metricName)
 
 	if metricFamily == nil {
 		return nil, fmt.Errorf("metric %q not found", metricName)
@@ -433,3 +429,47 @@ func LabelsMatch(metric *dto.Metric, labelFilter map[string]string) bool {
 
 	return true
 }
+
+// GetCounterVecFromGatherer collects a counter that matches the given name
+// from a gatherer implementing k8s.io/component-base/metrics.Gatherer interface.
+// It returns all counter values that had a label with a certain name in a map
+// that uses the label value as keys.
+//
+// Used only for testing purposes where we need to gather metrics directly from a running binary (without metrics endpoint).
+func GetCounterValuesFromGatherer(gatherer metrics.Gatherer, metricName string, lvMap map[string]string, labelName string) (map[string]float64, error) {
+	m, err := gatherer.Gather()
+	if err != nil {
+		return nil, err
+	}
+
+	metricFamily := findMetricFamily(m, metricName)
+	if metricFamily == nil {
+		return nil, fmt.Errorf("metric %q not found", metricName)
+	}
+	if len(metricFamily.GetMetric()) == 0 {
+		return nil, fmt.Errorf("metric %q is empty", metricName)
+	}
+
+	values := make(map[string]float64)
+	for _, metric := range metricFamily.GetMetric() {
+		if LabelsMatch(metric, lvMap) {
+			if counter := metric.GetCounter(); counter != nil {
+				for _, labelPair := range metric.Label {
+					if labelPair.GetName() == labelName {
+						values[labelPair.GetValue()] = counter.GetValue()
+					}
+				}
+			}
+		}
+	}
+	return values, nil
+}
+
+func findMetricFamily(metricFamilies []*dto.MetricFamily, metricName string) *dto.MetricFamily {
+	for _, mFamily := range metricFamilies {
+		if mFamily.GetName() == metricName {
+			return mFamily
+		}
+	}
+	return nil
+}
@@ -20,6 +20,7 @@ import (
 	"fmt"
 	"math"
 	"reflect"
+	"strings"
 	"testing"
 
 	"github.com/google/go-cmp/cmp"
@@ -591,3 +592,104 @@ func TestGetHistogramVecFromGatherer(t *testing.T) {
 		})
 	}
 }
+
+func TestGetCounterValuesFromGatherer(t *testing.T) {
+	namespace := "namespace"
+	subsystem := "subsystem"
+	name := "metric_test_name"
+	metricName := fmt.Sprintf("%s_%s_%s", namespace, subsystem, name)
+
+	tests := map[string]struct {
+		metricName string // Empty is replaced with valid name.
+		lvMap      map[string]string
+		labelName  string
+
+		wantCounterValues map[string]float64
+		wantErr           string
+	}{
+		"wrong-metric": {
+			metricName: "no-such-metric",
+			wantErr:    `metric "no-such-metric" not found`,
+		},
+
+		"none": {
+			metricName: metricName,
+			lvMap:      map[string]string{"no-such-label": "a"},
+
+			wantCounterValues: map[string]float64{},
+		},
+
+		"value1-0": {
+			metricName: metricName,
+			lvMap:      map[string]string{"label1": "value1-0"},
+			labelName:  "label2",
+
+			wantCounterValues: map[string]float64{"value2-0": 1.5, "value2-1": 2.5},
+		},
+
+		"value1-1": {
+			metricName: metricName,
+			lvMap:      map[string]string{"label1": "value1-1"},
+			labelName:  "label2",
+
+			wantCounterValues: map[string]float64{"value2-0": 3.5, "value2-1": 4.5},
+		},
+
+		"value1-1-value2-0-none": {
+			metricName: metricName,
+			lvMap:      map[string]string{"label1": "value1-1", "label2": "value2-0"},
+			labelName:  "none",
+
+			wantCounterValues: map[string]float64{},
+		},
+
+		"value1-0-value2-0-one": {
+			metricName: metricName,
+			lvMap:      map[string]string{"label1": "value1-0", "label2": "value2-0"},
+			labelName:  "label2",
+
+			wantCounterValues: map[string]float64{"value2-0": 1.5},
+		},
+	}
+	for name, tt := range tests {
+		t.Run(name, func(t *testing.T) {
+			// CounterVec has two labels defined.
+			labels := []string{"label1", "label2"}
+			counterOpts := &metrics.CounterOpts{
+				Namespace: "namespace",
+				Name:      "metric_test_name",
+				Subsystem: "subsystem",
+				Help:      "counter help message",
+			}
+			vec := metrics.NewCounterVec(counterOpts, labels)
+			// Use local registry
+			var registry = metrics.NewKubeRegistry()
+			var gather metrics.Gatherer = registry
+			registry.MustRegister(vec)
+			// Observe two metrics with same value for label1 but different value of label2.
+			vec.WithLabelValues("value1-0", "value2-0").Add(1.5)
+			vec.WithLabelValues("value1-0", "value2-1").Add(2.5)
+			vec.WithLabelValues("value1-1", "value2-0").Add(3.5)
+			vec.WithLabelValues("value1-1", "value2-1").Add(4.5)
+
+			// The check for empty metric apparently cannot be tested: registering
+			// a NewCounterVec with no values has the affect that it doesn't get
+			// returned, leading to "not found".
+
+			counterValues, err := GetCounterValuesFromGatherer(gather, tt.metricName, tt.lvMap, tt.labelName)
+			if err != nil {
+				if tt.wantErr != "" && !strings.Contains(err.Error(), tt.wantErr) {
+					t.Errorf("expected error %q, got instead: %v", tt.wantErr, err)
+				}
+				return
+			}
+			if tt.wantErr != "" {
+				t.Fatalf("expected error %q, got none", tt.wantErr)
+			}
+
+			if diff := cmp.Diff(tt.wantCounterValues, counterValues); diff != "" {
+				t.Errorf("Got unexpected HistogramVec (-want +got):\n%s", diff)
+			}
+		})
+	}
+}
@@ -175,3 +175,22 @@ the ci-benchmark-scheduler-perf periodic job will fail with an error log such as
 This allows to analyze which workload failed. Make sure that the failure is not an outlier 
 by checking multiple runs of the job. If the failures are not related to any regression, 
 but to an incorrect threshold setting, it is reasonable to decrease it.
+
+### Visualization
+
+Some support for visualizing progress over time is built into the
+benchmarks. The measurement operation which creates pods writes .dat files like
+this:
+
+     test/integration/scheduler_perf/SchedulingBasic_5000Nodes_2023-03-17T14:52:09Z.dat
+
+This file is in a text format that [gnuplot](http://www.gnuplot.info/) can
+read. A wrapper script selects some suitable parameters:
+
+     test/integration/scheduler_perf/gnuplot.sh test/integration/scheduler_perf/*.dat
+
+It plots in an interactive window by default. To write into a file, use
+
+    test/integration/scheduler_perf/gnuplot.sh \
+       -e 'set term png; set output "<output>.png"' \
+       test/integration/scheduler_perf/*.dat
@@ -1167,7 +1167,9 @@
       maxClaimsPerNode: 20
 
 # SchedulingWithResourceClaimTemplateStructured uses a ResourceClaimTemplate
-# and dynamically creates ResourceClaim instances for each pod.
+# and dynamically creates ResourceClaim instances for each pod. Node, pod and
+# device counts are chosen so that the cluster gets filled up completely.
+#
 # The driver uses structured parameters.
 - name: SchedulingWithResourceClaimTemplateStructured
   featureGates:
@@ -1234,6 +1236,125 @@
       measurePods: 2500
       maxClaimsPerNode: 10
 
+# SteadyStateResourceClaimTemplateStructured uses a ResourceClaimTemplate and
+# dynamically creates ResourceClaim instances for each pod. It creates ten
+# pods, waits for them to be scheduled, deletes them, and starts again,
+# so the cluster remains at the same level of utilization.
+#
+# The number of already allocated claims can be varied, thus simulating
+# various degrees of pre-existing resource utilization.
+#
+# The driver uses structured parameters.
+- name: SteadyStateClusterResourceClaimTemplateStructured
+  featureGates:
+    DynamicResourceAllocation: true
+    # SchedulerQueueingHints: true
+  workloadTemplate:
+  - opcode: createNodes
+    countParam: $nodesWithoutDRA
+  - opcode: createNodes
+    nodeTemplatePath: config/dra/node-with-dra-test-driver.yaml
+    countParam: $nodesWithDRA
+  - opcode: createResourceDriver
+    driverName: test-driver.cdi.k8s.io
+    nodes: scheduler-perf-dra-*
+    maxClaimsPerNodeParam: $maxClaimsPerNode
+    structuredParameters: true
+  - opcode: createAny
+    templatePath: config/dra/deviceclass-structured.yaml
+  - opcode: createAny
+    templatePath: config/dra/resourceclaim-structured.yaml
+    countParam: $initClaims
+    namespace: init
+  - opcode: allocResourceClaims
+    namespace: init
+  - opcode: createAny
+    templatePath: config/dra/resourceclaimtemplate-structured.yaml
+    namespace: test
+  - opcode: createPods
+    namespace: test
+    count: 10
+    steadyState: true
+    durationParam: $duration
+    podTemplatePath: config/dra/pod-with-claim-template.yaml
+    collectMetrics: true
+  workloads:
+  - name: fast
+    labels: [integration-test, fast, short]
+    params:
+      # This testcase runs through all code paths without
+      # taking too long overall.
+      nodesWithDRA: 1
+      nodesWithoutDRA: 1
+      initClaims: 0
+      maxClaimsPerNode: 10
+      duration: 2s
+  - name: empty_100nodes
+    params:
+      nodesWithDRA: 100
+      nodesWithoutDRA: 0
+      initClaims: 0
+      maxClaimsPerNode: 10
+      duration: 10s
+  - name: empty_200nodes
+    params:
+      nodesWithDRA: 200
+      nodesWithoutDRA: 0
+      initClaims: 0
+      maxClaimsPerNode: 10
+      duration: 10s
+  - name: empty_500nodes
+    params:
+      nodesWithDRA: 500
+      nodesWithoutDRA: 0
+      initClaims: 0
+      maxClaimsPerNode: 10
+      duration: 10s
+  # In the "half" scenarios, half of the devices are in use.
+  - name: half_100nodes
+    params:
+      nodesWithDRA: 100
+      nodesWithoutDRA: 0
+      initClaims: 500
+      maxClaimsPerNode: 10
+      duration: 10s
+  - name: half_200nodes
+    params:
+      nodesWithDRA: 200
+      nodesWithoutDRA: 0
+      initClaims: 1000
+      maxClaimsPerNode: 10
+      duration: 10s
+  - name: half_500nodes
+    params:
+      nodesWithDRA: 500
+      nodesWithoutDRA: 0
+      initClaims: 2500
+      maxClaimsPerNode: 10
+      duration: 10s
+  # In the "full" scenarios, the cluster can accommodate exactly 10 additional pods.
+  - name: full_100nodes
+    params:
+      nodesWithDRA: 100
+      nodesWithoutDRA: 0
+      initClaims: 990
+      maxClaimsPerNode: 10
+      duration: 10s
+  - name: full_200nodes
+    params:
+      nodesWithDRA: 200
+      nodesWithoutDRA: 0
+      initClaims: 1990
+      maxClaimsPerNode: 10
+      duration: 10s
+  - name: full_500nodes
+    params:
+      nodesWithDRA: 500
+      nodesWithoutDRA: 0
+      initClaims: 4990
+      maxClaimsPerNode: 10
+      duration: 10s
+
 # SchedulingWithResourceClaimTemplate uses ResourceClaims
 # with deterministic names that are shared between pods.
 # There is a fixed ratio of 1:5 between claims and pods.
 
@@ -56,9 +56,6 @@ type createAny struct {
 var _ runnableOp = &createAny{}
 
 func (c *createAny) isValid(allowParameterization bool) error {
-	if c.Opcode != createAnyOpcode {
-		return fmt.Errorf("invalid opcode %q; expected %q", c.Opcode, createAnyOpcode)
-	}
 	if c.TemplatePath == "" {
 		return fmt.Errorf("TemplatePath must be set")
 	}
Original file line number	Diff line number	Diff line change
`@@ -56,9 +56,6 @@ type createAny struct {`
`56`	`56`	`var _ runnableOp = &createAny{}`
`57`	`57`
`58`	`58`	`func (c *createAny) isValid(allowParameterization bool) error {`
`59`		`- if c.Opcode != createAnyOpcode {`
`60`		`- return fmt.Errorf("invalid opcode %q; expected %q", c.Opcode, createAnyOpcode)`
`61`		`- }`
`62`	`59`	`if c.TemplatePath == "" {`
`63`	`60`	`return fmt.Errorf("TemplatePath must be set")`
`64`	`61`	`}`