Revert "Collect some of scheduling metrics and scheduling throughput"

mikedanese · web-flow · commit 38ecb30c58b1 · 2020-02-06T10:18:00.000-08:00
diff --git a/test/integration/scheduler_perf/BUILD b/test/integration/scheduler_perf/BUILD
@@ -20,10 +20,7 @@ go_library(
         "//staging/src/k8s.io/client-go/informers/core/v1:go_default_library",
         "//staging/src/k8s.io/client-go/kubernetes:go_default_library",
         "//staging/src/k8s.io/client-go/rest:go_default_library",
-        "//staging/src/k8s.io/component-base/metrics/legacyregistry:go_default_library",
         "//test/integration/util:go_default_library",
-        "//vendor/github.com/prometheus/client_model/go:go_default_library",
-        "//vendor/k8s.io/klog:go_default_library",
     ],
 )
 
diff --git a/test/integration/scheduler_perf/scheduler_perf_test.go b/test/integration/scheduler_perf/scheduler_perf_test.go
@@ -38,15 +38,6 @@ const (
 	configFile = "config/performance-config.yaml"
 )
 
-var (
-	defaultMetrics = []string{
-		"scheduler_scheduling_algorithm_predicate_evaluation_seconds",
-		"scheduler_scheduling_algorithm_priority_evaluation_seconds",
-		"scheduler_binding_duration_seconds",
-		"scheduler_e2e_scheduling_duration_seconds",
-	}
-)
-
 // testCase configures a test case to run the scheduler performance test. Users should be able to
 // provide this via a YAML file.
 //
@@ -101,7 +92,6 @@ type testParams struct {
 }
 
 func BenchmarkPerfScheduling(b *testing.B) {
-	dataItems := DataItems{Version: "v1"}
 	tests := getSimpleTestCases(configFile)
 
 	for _, test := range tests {
@@ -110,15 +100,12 @@ func BenchmarkPerfScheduling(b *testing.B) {
 			for feature, flag := range test.FeatureGates {
 				defer featuregatetesting.SetFeatureGateDuringTest(b, utilfeature.DefaultFeatureGate, feature, flag)()
 			}
-			dataItems.DataItems = append(dataItems.DataItems, perfScheduling(test, b)...)
+			perfScheduling(test, b)
 		})
 	}
-	if err := dataItems2JSONFile(dataItems, b.Name()); err != nil {
-		klog.Fatalf("%v: unable to write measured data: %v", b.Name(), err)
-	}
 }
 
-func perfScheduling(test testCase, b *testing.B) []DataItem {
+func perfScheduling(test testCase, b *testing.B) {
 	var nodeStrategy testutils.PrepareNodeStrategy = &testutils.TrivialNodePrepareStrategy{}
 	if test.Nodes.NodeAllocatableStrategy != nil {
 		nodeStrategy = test.Nodes.NodeAllocatableStrategy
@@ -193,45 +180,15 @@ func perfScheduling(test testCase, b *testing.B) []DataItem {
 
 	// start benchmark
 	b.ResetTimer()
-
-	// Start measuring throughput
-	stopCh := make(chan struct{})
-	throughputCollector := newThroughputCollector(podInformer)
-	go throughputCollector.run(stopCh)
-
-	// Scheduling the main workload
 	config = testutils.NewTestPodCreatorConfig()
 	config.AddStrategy(testNamespace, test.PodsToSchedule.Num, testPodStrategy)
 	podCreator = testutils.NewTestPodCreator(clientset, config)
 	podCreator.CreatePods()
 
 	<-completedCh
-	close(stopCh)
 
 	// Note: without this line we're taking the overhead of defer() into account.
 	b.StopTimer()
-
-	setNameLabel := func(dataItem *DataItem) DataItem {
-		if dataItem.Labels == nil {
-			dataItem.Labels = map[string]string{}
-		}
-		dataItem.Labels["Name"] = b.Name()
-		return *dataItem
-	}
-
-	dataItems := []DataItem{
-		setNameLabel(throughputCollector.collect()),
-	}
-
-	for _, metric := range defaultMetrics {
-		dataItem := newPrometheusCollector(metric).collect()
-		if dataItem == nil {
-			continue
-		}
-		dataItems = append(dataItems, setNameLabel(dataItem))
-	}
-
-	return dataItems
 }
 
 func getPodStrategy(pc podCase) testutils.TestPodCreateStrategy {
diff --git a/test/integration/scheduler_perf/util.go b/test/integration/scheduler_perf/util.go
@@ -17,34 +17,15 @@ limitations under the License.
 package benchmark
 
 import (
-	"encoding/json"
-	"flag"
-	"fmt"
-	"io/ioutil"
-	"math"
-	"path"
-	"sort"
-	"time"
-
-	dto "github.com/prometheus/client_model/go"
 	v1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/runtime/schema"
 	coreinformers "k8s.io/client-go/informers/core/v1"
 	clientset "k8s.io/client-go/kubernetes"
 	restclient "k8s.io/client-go/rest"
-	"k8s.io/component-base/metrics/legacyregistry"
-	"k8s.io/klog"
 	"k8s.io/kubernetes/test/integration/util"
 )
 
-const (
-	dateFormat                = "2006-01-02T15:04:05Z"
-	throughputSampleFrequency = time.Second
-)
-
-var dataItemsDir = flag.String("data-items-dir", "", "destination directory for storing generated data items for perf dashboard")
-
 // mustSetupScheduler starts the following components:
 // - k8s api server (a.k.a. master)
 // - scheduler
@@ -85,250 +66,3 @@ func getScheduledPods(podInformer coreinformers.PodInformer) ([]*v1.Pod, error)
 	}
 	return scheduled, nil
 }
-
-// DataItem is the data point.
-type DataItem struct {
-	// Data is a map from bucket to real data point (e.g. "Perc90" -> 23.5). Notice
-	// that all data items with the same label combination should have the same buckets.
-	Data map[string]float64 `json:"data"`
-	// Unit is the data unit. Notice that all data items with the same label combination
-	// should have the same unit.
-	Unit string `json:"unit"`
-	// Labels is the labels of the data item.
-	Labels map[string]string `json:"labels,omitempty"`
-}
-
-// DataItems is the data point set. It is the struct that perf dashboard expects.
-type DataItems struct {
-	Version   string     `json:"version"`
-	DataItems []DataItem `json:"dataItems"`
-}
-
-func dataItems2JSONFile(dataItems DataItems, namePrefix string) error {
-	b, err := json.Marshal(dataItems)
-	if err != nil {
-		return err
-	}
-
-	destFile := fmt.Sprintf("%v_%v.json", namePrefix, time.Now().Format(dateFormat))
-	if *dataItemsDir != "" {
-		destFile = path.Join(*dataItemsDir, destFile)
-	}
-
-	return ioutil.WriteFile(destFile, b, 0644)
-}
-
-// prometheusCollector collects metrics from legacyregistry.DefaultGatherer.Gather() endpoint.
-// Currently only Histrogram metrics are supported.
-type prometheusCollector struct {
-	metric string
-	cache  *dto.MetricFamily
-}
-
-func newPrometheusCollector(metric string) *prometheusCollector {
-	return &prometheusCollector{
-		metric: metric,
-	}
-}
-
-func (pc *prometheusCollector) collect() *DataItem {
-	var metricFamily *dto.MetricFamily
-	m, err := legacyregistry.DefaultGatherer.Gather()
-	if err != nil {
-		klog.Error(err)
-		return nil
-	}
-	for _, mFamily := range m {
-		if mFamily.Name != nil && *mFamily.Name == pc.metric {
-			metricFamily = mFamily
-			break
-		}
-	}
-
-	if metricFamily == nil {
-		klog.Infof("Metric %q not found", pc.metric)
-		return nil
-	}
-
-	if metricFamily.GetMetric() == nil {
-		klog.Infof("Metric %q is empty", pc.metric)
-		return nil
-	}
-
-	if len(metricFamily.GetMetric()) == 0 {
-		klog.Infof("Metric %q is empty", pc.metric)
-		return nil
-	}
-
-	// Histograms are stored under the first index (based on observation).
-	// Given there's only one histogram registered per each metric name, accessaing
-	// the first index is sufficient.
-	dataItem := pc.promHist2Summary(metricFamily.GetMetric()[0].GetHistogram())
-	if dataItem.Data == nil {
-		return nil
-	}
-
-	// clear the metrics so that next test always starts with empty prometheus
-	// metrics (since the metrics are shared among all tests run inside the same binary)
-	clearPromHistogram(metricFamily.GetMetric()[0].GetHistogram())
-
-	return dataItem
-}
-
-// Bucket of a histogram
-type bucket struct {
-	upperBound float64
-	count      float64
-}
-
-func bucketQuantile(q float64, buckets []bucket) float64 {
-	if q < 0 {
-		return math.Inf(-1)
-	}
-	if q > 1 {
-		return math.Inf(+1)
-	}
-
-	if len(buckets) < 2 {
-		return math.NaN()
-	}
-
-	rank := q * buckets[len(buckets)-1].count
-	b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank })
-
-	if b == 0 {
-		return buckets[0].upperBound * (rank / buckets[0].count)
-	}
-
-	// linear approximation of b-th bucket
-	brank := rank - buckets[b-1].count
-	bSize := buckets[b].upperBound - buckets[b-1].upperBound
-	bCount := buckets[b].count - buckets[b-1].count
-
-	return buckets[b-1].upperBound + bSize*(brank/bCount)
-}
-
-func (pc *prometheusCollector) promHist2Summary(hist *dto.Histogram) *DataItem {
-	buckets := []bucket{}
-
-	if hist.SampleCount == nil || *hist.SampleCount == 0 {
-		return &DataItem{}
-	}
-
-	if hist.SampleSum == nil || *hist.SampleSum == 0 {
-		return &DataItem{}
-	}
-
-	for _, bckt := range hist.Bucket {
-		if bckt == nil {
-			return &DataItem{}
-		}
-		if bckt.UpperBound == nil || *bckt.UpperBound < 0 {
-			return &DataItem{}
-		}
-		buckets = append(buckets, bucket{
-			count:      float64(*bckt.CumulativeCount),
-			upperBound: *bckt.UpperBound,
-		})
-	}
-
-	// bucketQuantile expects the upper bound of the last bucket to be +inf
-	buckets[len(buckets)-1].upperBound = math.Inf(+1)
-
-	q50 := bucketQuantile(0.50, buckets)
-	q90 := bucketQuantile(0.90, buckets)
-	q99 := bucketQuantile(0.95, buckets)
-
-	msFactor := float64(time.Second) / float64(time.Millisecond)
-
-	return &DataItem{
-		Labels: map[string]string{
-			"Metric": pc.metric,
-		},
-		Data: map[string]float64{
-			"Perc50":  q50 * msFactor,
-			"Perc90":  q90 * msFactor,
-			"Perc99":  q99 * msFactor,
-			"Average": (*hist.SampleSum / float64(*hist.SampleCount)) * msFactor,
-		},
-		Unit: "ms",
-	}
-}
-
-func clearPromHistogram(hist *dto.Histogram) {
-	if hist.SampleCount != nil {
-		*hist.SampleCount = 0
-	}
-	if hist.SampleSum != nil {
-		*hist.SampleSum = 0
-	}
-	for _, b := range hist.Bucket {
-		if b.CumulativeCount != nil {
-			*b.CumulativeCount = 0
-		}
-		if b.UpperBound != nil {
-			*b.UpperBound = 0
-		}
-	}
-}
-
-type throughputCollector struct {
-	podInformer           coreinformers.PodInformer
-	schedulingThroughputs []float64
-}
-
-func newThroughputCollector(podInformer coreinformers.PodInformer) *throughputCollector {
-	return &throughputCollector{
-		podInformer: podInformer,
-	}
-}
-
-func (tc *throughputCollector) run(stopCh chan struct{}) {
-	podsScheduled, err := getScheduledPods(tc.podInformer)
-	if err != nil {
-		klog.Fatalf("%v", err)
-	}
-	lastScheduledCount := len(podsScheduled)
-	for {
-		select {
-		case <-stopCh:
-			return
-		case <-time.After(throughputSampleFrequency):
-			podsScheduled, err := getScheduledPods(tc.podInformer)
-			if err != nil {
-				klog.Fatalf("%v", err)
-			}
-
-			scheduled := len(podsScheduled)
-			samplingRatioSeconds := float64(throughputSampleFrequency) / float64(time.Second)
-			throughput := float64(scheduled-lastScheduledCount) / samplingRatioSeconds
-			tc.schedulingThroughputs = append(tc.schedulingThroughputs, throughput)
-			lastScheduledCount = scheduled
-
-			klog.Infof("%d pods scheduled", lastScheduledCount)
-		}
-	}
-}
-
-func (tc *throughputCollector) collect() *DataItem {
-	throughputSummary := &DataItem{}
-	if length := len(tc.schedulingThroughputs); length > 0 {
-		sort.Float64s(tc.schedulingThroughputs)
-		sum := 0.0
-		for i := range tc.schedulingThroughputs {
-			sum += tc.schedulingThroughputs[i]
-		}
-
-		throughputSummary.Labels = map[string]string{
-			"Metric": "SchedulingThroughput",
-		}
-		throughputSummary.Data = map[string]float64{
-			"Average": sum / float64(length),
-			"Perc50":  tc.schedulingThroughputs[int(math.Ceil(float64(length*50)/100))-1],
-			"Perc90":  tc.schedulingThroughputs[int(math.Ceil(float64(length*90)/100))-1],
-			"Perc99":  tc.schedulingThroughputs[int(math.Ceil(float64(length*99)/100))-1],
-		}
-		throughputSummary.Unit = "pods/s"
-	}
-	return throughputSummary
-}