Merge pull request kubernetes#84987 from RainbowMango/pr_migrate_custom_collector_kubelet_part2

k8s-ci-robot · web-flow · commit ec86baf00ba6 · 2019-11-10T19:44:04.000-08:00
migrate kubelet custom metrics to stability framework part 2
diff --git a/pkg/kubelet/apis/resourcemetrics/v1alpha1/BUILD b/pkg/kubelet/apis/resourcemetrics/v1alpha1/BUILD
@@ -8,6 +8,7 @@ go_library(
     deps = [
         "//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
         "//pkg/kubelet/server/stats:go_default_library",
+        "//staging/src/k8s.io/component-base/metrics:go_default_library",
     ],
 )
 
diff --git a/pkg/kubelet/apis/resourcemetrics/v1alpha1/config.go b/pkg/kubelet/apis/resourcemetrics/v1alpha1/config.go
@@ -19,62 +19,101 @@ package v1alpha1
 import (
 	"time"
 
+	"k8s.io/component-base/metrics"
 	summary "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
 	"k8s.io/kubernetes/pkg/kubelet/server/stats"
 )
 
 // Version is the string representation of the version of this configuration
 const Version = "v1alpha1"
 
+var (
+	nodeCPUUsageDesc = metrics.NewDesc("node_cpu_usage_seconds_total",
+		"Cumulative cpu time consumed by the node in core-seconds",
+		nil,
+		nil,
+		metrics.ALPHA,
+		"")
+
+	nodeMemoryUsageDesc = metrics.NewDesc("node_memory_working_set_bytes",
+		"Current working set of the node in bytes",
+		nil,
+		nil,
+		metrics.ALPHA,
+		"")
+
+	containerCPUUsageDesc = metrics.NewDesc("container_cpu_usage_seconds_total",
+		"Cumulative cpu time consumed by the container in core-seconds",
+		[]string{"container", "pod", "namespace"},
+		nil,
+		metrics.ALPHA,
+		"")
+
+	containerMemoryUsageDesc = metrics.NewDesc("container_memory_working_set_bytes",
+		"Current working set of the container in bytes",
+		[]string{"container", "pod", "namespace"},
+		nil,
+		metrics.ALPHA,
+		"")
+)
+
+// getNodeCPUMetrics returns CPU utilization of a node.
+func getNodeCPUMetrics(s summary.NodeStats) (*float64, time.Time) {
+	if s.CPU == nil {
+		return nil, time.Time{}
+	}
+	v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
+	return &v, s.CPU.Time.Time
+}
+
+// getNodeMemoryMetrics returns memory utilization of a node.
+func getNodeMemoryMetrics(s summary.NodeStats) (*float64, time.Time) {
+	if s.Memory == nil {
+		return nil, time.Time{}
+	}
+	v := float64(*s.Memory.WorkingSetBytes)
+	return &v, s.Memory.Time.Time
+}
+
+// getContainerCPUMetrics returns CPU utilization of a container.
+func getContainerCPUMetrics(s summary.ContainerStats) (*float64, time.Time) {
+	if s.CPU == nil {
+		return nil, time.Time{}
+	}
+	v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
+	return &v, s.CPU.Time.Time
+}
+
+// getContainerMemoryMetrics returns memory utilization of a container.
+func getContainerMemoryMetrics(s summary.ContainerStats) (*float64, time.Time) {
+	if s.Memory == nil {
+		return nil, time.Time{}
+	}
+	v := float64(*s.Memory.WorkingSetBytes)
+	return &v, s.Memory.Time.Time
+}
+
 // Config is the v1alpha1 resource metrics definition
 func Config() stats.ResourceMetricsConfig {
 	return stats.ResourceMetricsConfig{
 		NodeMetrics: []stats.NodeResourceMetric{
 			{
-				Name:        "node_cpu_usage_seconds_total",
-				Description: "Cumulative cpu time consumed by the node in core-seconds",
-				ValueFn: func(s summary.NodeStats) (*float64, time.Time) {
-					if s.CPU == nil {
-						return nil, time.Time{}
-					}
-					v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
-					return &v, s.CPU.Time.Time
-				},
+				Desc:    nodeCPUUsageDesc,
+				ValueFn: getNodeCPUMetrics,
 			},
 			{
-				Name:        "node_memory_working_set_bytes",
-				Description: "Current working set of the node in bytes",
-				ValueFn: func(s summary.NodeStats) (*float64, time.Time) {
-					if s.Memory == nil {
-						return nil, time.Time{}
-					}
-					v := float64(*s.Memory.WorkingSetBytes)
-					return &v, s.Memory.Time.Time
-				},
+				Desc:    nodeMemoryUsageDesc,
+				ValueFn: getNodeMemoryMetrics,
 			},
 		},
 		ContainerMetrics: []stats.ContainerResourceMetric{
 			{
-				Name:        "container_cpu_usage_seconds_total",
-				Description: "Cumulative cpu time consumed by the container in core-seconds",
-				ValueFn: func(s summary.ContainerStats) (*float64, time.Time) {
-					if s.CPU == nil {
-						return nil, time.Time{}
-					}
-					v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
-					return &v, s.CPU.Time.Time
-				},
+				Desc:    containerCPUUsageDesc,
+				ValueFn: getContainerCPUMetrics,
 			},
 			{
-				Name:        "container_memory_working_set_bytes",
-				Description: "Current working set of the container in bytes",
-				ValueFn: func(s summary.ContainerStats) (*float64, time.Time) {
-					if s.Memory == nil {
-						return nil, time.Time{}
-					}
-					v := float64(*s.Memory.WorkingSetBytes)
-					return &v, s.Memory.Time.Time
-				},
+				Desc:    containerMemoryUsageDesc,
+				ValueFn: getContainerMemoryMetrics,
 			},
 		},
 	}
diff --git a/pkg/kubelet/server/BUILD b/pkg/kubelet/server/BUILD
@@ -52,7 +52,6 @@ go_library(
         "//vendor/github.com/google/cadvisor/container:go_default_library",
         "//vendor/github.com/google/cadvisor/info/v1:go_default_library",
         "//vendor/github.com/google/cadvisor/metrics:go_default_library",
-        "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
         "//vendor/google.golang.org/grpc:go_default_library",
         "//vendor/k8s.io/klog:go_default_library",
     ],
diff --git a/pkg/kubelet/server/server.go b/pkg/kubelet/server/server.go
@@ -36,7 +36,6 @@ import (
 	cadvisormetrics "github.com/google/cadvisor/container"
 	cadvisorapi "github.com/google/cadvisor/info/v1"
 	"github.com/google/cadvisor/metrics"
-	"github.com/prometheus/client_golang/prometheus"
 	"google.golang.org/grpc"
 	"k8s.io/klog"
 
@@ -302,7 +301,7 @@ func (s *Server) InstallDefaultHandlers(enableCAdvisorJSONEndpoints bool) {
 	s.restfulCont.Handle(metricsPath, legacyregistry.Handler())
 
 	// cAdvisor metrics are exposed under the secured handler as well
-	r := prometheus.NewRegistry()
+	r := compbasemetrics.NewKubeRegistry()
 
 	includedMetrics := cadvisormetrics.MetricSet{
 		cadvisormetrics.CpuUsageMetrics:         struct{}{},
@@ -315,13 +314,13 @@ func (s *Server) InstallDefaultHandlers(enableCAdvisorJSONEndpoints bool) {
 		cadvisormetrics.AppMetrics:              struct{}{},
 		cadvisormetrics.ProcessMetrics:          struct{}{},
 	}
-	r.MustRegister(metrics.NewPrometheusCollector(prometheusHostAdapter{s.host}, containerPrometheusLabelsFunc(s.host), includedMetrics))
+	r.RawMustRegister(metrics.NewPrometheusCollector(prometheusHostAdapter{s.host}, containerPrometheusLabelsFunc(s.host), includedMetrics))
 	s.restfulCont.Handle(cadvisorMetricsPath,
 		compbasemetrics.HandlerFor(r, compbasemetrics.HandlerOpts{ErrorHandling: compbasemetrics.ContinueOnError}),
 	)
 
-	v1alpha1ResourceRegistry := prometheus.NewRegistry()
-	v1alpha1ResourceRegistry.MustRegister(stats.NewPrometheusResourceMetricCollector(s.resourceAnalyzer, v1alpha1.Config()))
+	v1alpha1ResourceRegistry := compbasemetrics.NewKubeRegistry()
+	v1alpha1ResourceRegistry.CustomMustRegister(stats.NewPrometheusResourceMetricCollector(s.resourceAnalyzer, v1alpha1.Config()))
 	s.restfulCont.Handle(path.Join(resourceMetricsPathPrefix, v1alpha1.Version),
 		compbasemetrics.HandlerFor(v1alpha1ResourceRegistry, compbasemetrics.HandlerOpts{ErrorHandling: compbasemetrics.ContinueOnError}),
 	)
diff --git a/pkg/kubelet/server/stats/BUILD b/pkg/kubelet/server/stats/BUILD
@@ -26,9 +26,9 @@ go_library(
         "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
         "//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
         "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
+        "//staging/src/k8s.io/component-base/metrics:go_default_library",
         "//vendor/github.com/emicklei/go-restful:go_default_library",
         "//vendor/github.com/google/cadvisor/info/v1:go_default_library",
-        "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
         "//vendor/k8s.io/klog:go_default_library",
     ],
 )
@@ -49,8 +49,8 @@ go_test(
         "//staging/src/k8s.io/api/core/v1:go_default_library",
         "//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
         "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
-        "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
-        "//vendor/github.com/prometheus/client_model/go:go_default_library",
+        "//staging/src/k8s.io/component-base/metrics:go_default_library",
+        "//staging/src/k8s.io/component-base/metrics/testutil:go_default_library",
         "//vendor/github.com/stretchr/testify/assert:go_default_library",
         "//vendor/github.com/stretchr/testify/mock:go_default_library",
     ] + select({
diff --git a/pkg/kubelet/server/stats/prometheus_resource_metrics.go b/pkg/kubelet/server/stats/prometheus_resource_metrics.go
@@ -19,32 +19,29 @@ package stats
 import (
 	"time"
 
+	"k8s.io/component-base/metrics"
 	"k8s.io/klog"
 	stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
-
-	"github.com/prometheus/client_golang/prometheus"
 )
 
 // NodeResourceMetric describes a metric for the node
 type NodeResourceMetric struct {
-	Name        string
-	Description string
-	ValueFn     func(stats.NodeStats) (*float64, time.Time)
+	Desc    *metrics.Desc
+	ValueFn func(stats.NodeStats) (*float64, time.Time)
 }
 
-func (n *NodeResourceMetric) desc() *prometheus.Desc {
-	return prometheus.NewDesc(n.Name, n.Description, []string{}, nil)
+func (n *NodeResourceMetric) desc() *metrics.Desc {
+	return n.Desc
 }
 
 // ContainerResourceMetric describes a metric for containers
 type ContainerResourceMetric struct {
-	Name        string
-	Description string
-	ValueFn     func(stats.ContainerStats) (*float64, time.Time)
+	Desc    *metrics.Desc
+	ValueFn func(stats.ContainerStats) (*float64, time.Time)
 }
 
-func (n *ContainerResourceMetric) desc() *prometheus.Desc {
-	return prometheus.NewDesc(n.Name, n.Description, []string{"container", "pod", "namespace"}, nil)
+func (n *ContainerResourceMetric) desc() *metrics.Desc {
+	return n.Desc
 }
 
 // ResourceMetricsConfig specifies which metrics to collect and export
@@ -53,29 +50,34 @@ type ResourceMetricsConfig struct {
 	ContainerMetrics []ContainerResourceMetric
 }
 
-// NewPrometheusResourceMetricCollector returns a prometheus.Collector which exports resource metrics
-func NewPrometheusResourceMetricCollector(provider SummaryProvider, config ResourceMetricsConfig) prometheus.Collector {
+// NewPrometheusResourceMetricCollector returns a metrics.StableCollector which exports resource metrics
+func NewPrometheusResourceMetricCollector(provider SummaryProvider, config ResourceMetricsConfig) metrics.StableCollector {
 	return &resourceMetricCollector{
 		provider: provider,
 		config:   config,
-		errors: prometheus.NewGauge(prometheus.GaugeOpts{
-			Name: "scrape_error",
-			Help: "1 if there was an error while getting container metrics, 0 otherwise",
-		}),
+		errors: metrics.NewDesc("scrape_error",
+			"1 if there was an error while getting container metrics, 0 otherwise",
+			nil,
+			nil,
+			metrics.ALPHA,
+			""),
 	}
 }
 
 type resourceMetricCollector struct {
+	metrics.BaseStableCollector
+
 	provider SummaryProvider
 	config   ResourceMetricsConfig
-	errors   prometheus.Gauge
+	errors   *metrics.Desc
 }
 
-var _ prometheus.Collector = &resourceMetricCollector{}
+var _ metrics.StableCollector = &resourceMetricCollector{}
+
+// DescribeWithStability implements metrics.StableCollector
+func (rc *resourceMetricCollector) DescribeWithStability(ch chan<- *metrics.Desc) {
+	ch <- rc.errors
 
-// Describe implements prometheus.Collector
-func (rc *resourceMetricCollector) Describe(ch chan<- *prometheus.Desc) {
-	rc.errors.Describe(ch)
 	for _, metric := range rc.config.NodeMetrics {
 		ch <- metric.desc()
 	}
@@ -84,33 +86,35 @@ func (rc *resourceMetricCollector) Describe(ch chan<- *prometheus.Desc) {
 	}
 }
 
-// Collect implements prometheus.Collector
-// Since new containers are frequently created and removed, using the prometheus.Gauge Collector would
+// CollectWithStability implements metrics.StableCollector
+// Since new containers are frequently created and removed, using the Gauge would
 // leak metric collectors for containers or pods that no longer exist.  Instead, implement
-// prometheus.Collector in a way that only collects metrics for active containers.
-func (rc *resourceMetricCollector) Collect(ch chan<- prometheus.Metric) {
-	rc.errors.Set(0)
-	defer rc.errors.Collect(ch)
+// custom collector in a way that only collects metrics for active containers.
+func (rc *resourceMetricCollector) CollectWithStability(ch chan<- metrics.Metric) {
+	var errorCount float64
+	defer func() {
+		ch <- metrics.NewLazyConstMetric(rc.errors, metrics.GaugeValue, errorCount)
+	}()
 	summary, err := rc.provider.GetCPUAndMemoryStats()
 	if err != nil {
-		rc.errors.Set(1)
+		errorCount = 1
 		klog.Warningf("Error getting summary for resourceMetric prometheus endpoint: %v", err)
 		return
 	}
 
 	for _, metric := range rc.config.NodeMetrics {
 		if value, timestamp := metric.ValueFn(summary.Node); value != nil {
-			ch <- prometheus.NewMetricWithTimestamp(timestamp,
-				prometheus.MustNewConstMetric(metric.desc(), prometheus.GaugeValue, *value))
+			ch <- metrics.NewLazyMetricWithTimestamp(timestamp,
+				metrics.NewLazyConstMetric(metric.desc(), metrics.GaugeValue, *value))
 		}
 	}
 
 	for _, pod := range summary.Pods {
 		for _, container := range pod.Containers {
 			for _, metric := range rc.config.ContainerMetrics {
 				if value, timestamp := metric.ValueFn(container); value != nil {
-					ch <- prometheus.NewMetricWithTimestamp(timestamp,
-						prometheus.MustNewConstMetric(metric.desc(), prometheus.GaugeValue, *value, container.Name, pod.PodRef.Name, pod.PodRef.Namespace))
+					ch <- metrics.NewLazyMetricWithTimestamp(timestamp,
+						metrics.NewLazyConstMetric(metric.desc(), metrics.GaugeValue, *value, container.Name, pod.PodRef.Name, pod.PodRef.Namespace))
 				}
 			}
 		}
diff --git a/pkg/kubelet/server/stats/prometheus_resource_metrics_test.go b/pkg/kubelet/server/stats/prometheus_resource_metrics_test.go
diff --git a/staging/src/k8s.io/component-base/metrics/value.go b/staging/src/k8s.io/component-base/metrics/value.go

Original file line number	Diff line number	Diff line change
`@@ -8,6 +8,7 @@ go_library(`
`8`	`8`	`deps = [`
`9`	`9`	`"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",`
`10`	`10`	`"//pkg/kubelet/server/stats:go_default_library",`
	`11`	`+ "//staging/src/k8s.io/component-base/metrics:go_default_library",`
`11`	`12`	`],`
`12`	`13`	`)`
`13`	`14`