Skip to content

Commit ec86baf

Browse files
authored
Merge pull request kubernetes#84987 from RainbowMango/pr_migrate_custom_collector_kubelet_part2
migrate kubelet custom metrics to stability framework part 2
2 parents b3dde20 + 159f56b commit ec86baf

File tree

8 files changed

+296
-293
lines changed

8 files changed

+296
-293
lines changed

pkg/kubelet/apis/resourcemetrics/v1alpha1/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ go_library(
88
deps = [
99
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
1010
"//pkg/kubelet/server/stats:go_default_library",
11+
"//staging/src/k8s.io/component-base/metrics:go_default_library",
1112
],
1213
)
1314

pkg/kubelet/apis/resourcemetrics/v1alpha1/config.go

Lines changed: 75 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -19,62 +19,101 @@ package v1alpha1
1919
import (
2020
"time"
2121

22+
"k8s.io/component-base/metrics"
2223
summary "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
2324
"k8s.io/kubernetes/pkg/kubelet/server/stats"
2425
)
2526

2627
// Version is the string representation of the version of this configuration
2728
const Version = "v1alpha1"
2829

30+
var (
31+
nodeCPUUsageDesc = metrics.NewDesc("node_cpu_usage_seconds_total",
32+
"Cumulative cpu time consumed by the node in core-seconds",
33+
nil,
34+
nil,
35+
metrics.ALPHA,
36+
"")
37+
38+
nodeMemoryUsageDesc = metrics.NewDesc("node_memory_working_set_bytes",
39+
"Current working set of the node in bytes",
40+
nil,
41+
nil,
42+
metrics.ALPHA,
43+
"")
44+
45+
containerCPUUsageDesc = metrics.NewDesc("container_cpu_usage_seconds_total",
46+
"Cumulative cpu time consumed by the container in core-seconds",
47+
[]string{"container", "pod", "namespace"},
48+
nil,
49+
metrics.ALPHA,
50+
"")
51+
52+
containerMemoryUsageDesc = metrics.NewDesc("container_memory_working_set_bytes",
53+
"Current working set of the container in bytes",
54+
[]string{"container", "pod", "namespace"},
55+
nil,
56+
metrics.ALPHA,
57+
"")
58+
)
59+
60+
// getNodeCPUMetrics returns CPU utilization of a node.
61+
func getNodeCPUMetrics(s summary.NodeStats) (*float64, time.Time) {
62+
if s.CPU == nil {
63+
return nil, time.Time{}
64+
}
65+
v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
66+
return &v, s.CPU.Time.Time
67+
}
68+
69+
// getNodeMemoryMetrics returns memory utilization of a node.
70+
func getNodeMemoryMetrics(s summary.NodeStats) (*float64, time.Time) {
71+
if s.Memory == nil {
72+
return nil, time.Time{}
73+
}
74+
v := float64(*s.Memory.WorkingSetBytes)
75+
return &v, s.Memory.Time.Time
76+
}
77+
78+
// getContainerCPUMetrics returns CPU utilization of a container.
79+
func getContainerCPUMetrics(s summary.ContainerStats) (*float64, time.Time) {
80+
if s.CPU == nil {
81+
return nil, time.Time{}
82+
}
83+
v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
84+
return &v, s.CPU.Time.Time
85+
}
86+
87+
// getContainerMemoryMetrics returns memory utilization of a container.
88+
func getContainerMemoryMetrics(s summary.ContainerStats) (*float64, time.Time) {
89+
if s.Memory == nil {
90+
return nil, time.Time{}
91+
}
92+
v := float64(*s.Memory.WorkingSetBytes)
93+
return &v, s.Memory.Time.Time
94+
}
95+
2996
// Config is the v1alpha1 resource metrics definition
3097
func Config() stats.ResourceMetricsConfig {
3198
return stats.ResourceMetricsConfig{
3299
NodeMetrics: []stats.NodeResourceMetric{
33100
{
34-
Name: "node_cpu_usage_seconds_total",
35-
Description: "Cumulative cpu time consumed by the node in core-seconds",
36-
ValueFn: func(s summary.NodeStats) (*float64, time.Time) {
37-
if s.CPU == nil {
38-
return nil, time.Time{}
39-
}
40-
v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
41-
return &v, s.CPU.Time.Time
42-
},
101+
Desc: nodeCPUUsageDesc,
102+
ValueFn: getNodeCPUMetrics,
43103
},
44104
{
45-
Name: "node_memory_working_set_bytes",
46-
Description: "Current working set of the node in bytes",
47-
ValueFn: func(s summary.NodeStats) (*float64, time.Time) {
48-
if s.Memory == nil {
49-
return nil, time.Time{}
50-
}
51-
v := float64(*s.Memory.WorkingSetBytes)
52-
return &v, s.Memory.Time.Time
53-
},
105+
Desc: nodeMemoryUsageDesc,
106+
ValueFn: getNodeMemoryMetrics,
54107
},
55108
},
56109
ContainerMetrics: []stats.ContainerResourceMetric{
57110
{
58-
Name: "container_cpu_usage_seconds_total",
59-
Description: "Cumulative cpu time consumed by the container in core-seconds",
60-
ValueFn: func(s summary.ContainerStats) (*float64, time.Time) {
61-
if s.CPU == nil {
62-
return nil, time.Time{}
63-
}
64-
v := float64(*s.CPU.UsageCoreNanoSeconds) / float64(time.Second)
65-
return &v, s.CPU.Time.Time
66-
},
111+
Desc: containerCPUUsageDesc,
112+
ValueFn: getContainerCPUMetrics,
67113
},
68114
{
69-
Name: "container_memory_working_set_bytes",
70-
Description: "Current working set of the container in bytes",
71-
ValueFn: func(s summary.ContainerStats) (*float64, time.Time) {
72-
if s.Memory == nil {
73-
return nil, time.Time{}
74-
}
75-
v := float64(*s.Memory.WorkingSetBytes)
76-
return &v, s.Memory.Time.Time
77-
},
115+
Desc: containerMemoryUsageDesc,
116+
ValueFn: getContainerMemoryMetrics,
78117
},
79118
},
80119
}

pkg/kubelet/server/BUILD

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ go_library(
5252
"//vendor/github.com/google/cadvisor/container:go_default_library",
5353
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
5454
"//vendor/github.com/google/cadvisor/metrics:go_default_library",
55-
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
5655
"//vendor/google.golang.org/grpc:go_default_library",
5756
"//vendor/k8s.io/klog:go_default_library",
5857
],

pkg/kubelet/server/server.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ import (
3636
cadvisormetrics "github.com/google/cadvisor/container"
3737
cadvisorapi "github.com/google/cadvisor/info/v1"
3838
"github.com/google/cadvisor/metrics"
39-
"github.com/prometheus/client_golang/prometheus"
4039
"google.golang.org/grpc"
4140
"k8s.io/klog"
4241

@@ -302,7 +301,7 @@ func (s *Server) InstallDefaultHandlers(enableCAdvisorJSONEndpoints bool) {
302301
s.restfulCont.Handle(metricsPath, legacyregistry.Handler())
303302

304303
// cAdvisor metrics are exposed under the secured handler as well
305-
r := prometheus.NewRegistry()
304+
r := compbasemetrics.NewKubeRegistry()
306305

307306
includedMetrics := cadvisormetrics.MetricSet{
308307
cadvisormetrics.CpuUsageMetrics: struct{}{},
@@ -315,13 +314,13 @@ func (s *Server) InstallDefaultHandlers(enableCAdvisorJSONEndpoints bool) {
315314
cadvisormetrics.AppMetrics: struct{}{},
316315
cadvisormetrics.ProcessMetrics: struct{}{},
317316
}
318-
r.MustRegister(metrics.NewPrometheusCollector(prometheusHostAdapter{s.host}, containerPrometheusLabelsFunc(s.host), includedMetrics))
317+
r.RawMustRegister(metrics.NewPrometheusCollector(prometheusHostAdapter{s.host}, containerPrometheusLabelsFunc(s.host), includedMetrics))
319318
s.restfulCont.Handle(cadvisorMetricsPath,
320319
compbasemetrics.HandlerFor(r, compbasemetrics.HandlerOpts{ErrorHandling: compbasemetrics.ContinueOnError}),
321320
)
322321

323-
v1alpha1ResourceRegistry := prometheus.NewRegistry()
324-
v1alpha1ResourceRegistry.MustRegister(stats.NewPrometheusResourceMetricCollector(s.resourceAnalyzer, v1alpha1.Config()))
322+
v1alpha1ResourceRegistry := compbasemetrics.NewKubeRegistry()
323+
v1alpha1ResourceRegistry.CustomMustRegister(stats.NewPrometheusResourceMetricCollector(s.resourceAnalyzer, v1alpha1.Config()))
325324
s.restfulCont.Handle(path.Join(resourceMetricsPathPrefix, v1alpha1.Version),
326325
compbasemetrics.HandlerFor(v1alpha1ResourceRegistry, compbasemetrics.HandlerOpts{ErrorHandling: compbasemetrics.ContinueOnError}),
327326
)

pkg/kubelet/server/stats/BUILD

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ go_library(
2626
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
2727
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
2828
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
29+
"//staging/src/k8s.io/component-base/metrics:go_default_library",
2930
"//vendor/github.com/emicklei/go-restful:go_default_library",
3031
"//vendor/github.com/google/cadvisor/info/v1:go_default_library",
31-
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
3232
"//vendor/k8s.io/klog:go_default_library",
3333
],
3434
)
@@ -49,8 +49,8 @@ go_test(
4949
"//staging/src/k8s.io/api/core/v1:go_default_library",
5050
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
5151
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
52-
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
53-
"//vendor/github.com/prometheus/client_model/go:go_default_library",
52+
"//staging/src/k8s.io/component-base/metrics:go_default_library",
53+
"//staging/src/k8s.io/component-base/metrics/testutil:go_default_library",
5454
"//vendor/github.com/stretchr/testify/assert:go_default_library",
5555
"//vendor/github.com/stretchr/testify/mock:go_default_library",
5656
] + select({

pkg/kubelet/server/stats/prometheus_resource_metrics.go

Lines changed: 38 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -19,32 +19,29 @@ package stats
1919
import (
2020
"time"
2121

22+
"k8s.io/component-base/metrics"
2223
"k8s.io/klog"
2324
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
24-
25-
"github.com/prometheus/client_golang/prometheus"
2625
)
2726

2827
// NodeResourceMetric describes a metric for the node
2928
type NodeResourceMetric struct {
30-
Name string
31-
Description string
32-
ValueFn func(stats.NodeStats) (*float64, time.Time)
29+
Desc *metrics.Desc
30+
ValueFn func(stats.NodeStats) (*float64, time.Time)
3331
}
3432

35-
func (n *NodeResourceMetric) desc() *prometheus.Desc {
36-
return prometheus.NewDesc(n.Name, n.Description, []string{}, nil)
33+
func (n *NodeResourceMetric) desc() *metrics.Desc {
34+
return n.Desc
3735
}
3836

3937
// ContainerResourceMetric describes a metric for containers
4038
type ContainerResourceMetric struct {
41-
Name string
42-
Description string
43-
ValueFn func(stats.ContainerStats) (*float64, time.Time)
39+
Desc *metrics.Desc
40+
ValueFn func(stats.ContainerStats) (*float64, time.Time)
4441
}
4542

46-
func (n *ContainerResourceMetric) desc() *prometheus.Desc {
47-
return prometheus.NewDesc(n.Name, n.Description, []string{"container", "pod", "namespace"}, nil)
43+
func (n *ContainerResourceMetric) desc() *metrics.Desc {
44+
return n.Desc
4845
}
4946

5047
// ResourceMetricsConfig specifies which metrics to collect and export
@@ -53,29 +50,34 @@ type ResourceMetricsConfig struct {
5350
ContainerMetrics []ContainerResourceMetric
5451
}
5552

56-
// NewPrometheusResourceMetricCollector returns a prometheus.Collector which exports resource metrics
57-
func NewPrometheusResourceMetricCollector(provider SummaryProvider, config ResourceMetricsConfig) prometheus.Collector {
53+
// NewPrometheusResourceMetricCollector returns a metrics.StableCollector which exports resource metrics
54+
func NewPrometheusResourceMetricCollector(provider SummaryProvider, config ResourceMetricsConfig) metrics.StableCollector {
5855
return &resourceMetricCollector{
5956
provider: provider,
6057
config: config,
61-
errors: prometheus.NewGauge(prometheus.GaugeOpts{
62-
Name: "scrape_error",
63-
Help: "1 if there was an error while getting container metrics, 0 otherwise",
64-
}),
58+
errors: metrics.NewDesc("scrape_error",
59+
"1 if there was an error while getting container metrics, 0 otherwise",
60+
nil,
61+
nil,
62+
metrics.ALPHA,
63+
""),
6564
}
6665
}
6766

6867
type resourceMetricCollector struct {
68+
metrics.BaseStableCollector
69+
6970
provider SummaryProvider
7071
config ResourceMetricsConfig
71-
errors prometheus.Gauge
72+
errors *metrics.Desc
7273
}
7374

74-
var _ prometheus.Collector = &resourceMetricCollector{}
75+
var _ metrics.StableCollector = &resourceMetricCollector{}
76+
77+
// DescribeWithStability implements metrics.StableCollector
78+
func (rc *resourceMetricCollector) DescribeWithStability(ch chan<- *metrics.Desc) {
79+
ch <- rc.errors
7580

76-
// Describe implements prometheus.Collector
77-
func (rc *resourceMetricCollector) Describe(ch chan<- *prometheus.Desc) {
78-
rc.errors.Describe(ch)
7981
for _, metric := range rc.config.NodeMetrics {
8082
ch <- metric.desc()
8183
}
@@ -84,33 +86,35 @@ func (rc *resourceMetricCollector) Describe(ch chan<- *prometheus.Desc) {
8486
}
8587
}
8688

87-
// Collect implements prometheus.Collector
88-
// Since new containers are frequently created and removed, using the prometheus.Gauge Collector would
89+
// CollectWithStability implements metrics.StableCollector
90+
// Since new containers are frequently created and removed, using the Gauge would
8991
// leak metric collectors for containers or pods that no longer exist. Instead, implement
90-
// prometheus.Collector in a way that only collects metrics for active containers.
91-
func (rc *resourceMetricCollector) Collect(ch chan<- prometheus.Metric) {
92-
rc.errors.Set(0)
93-
defer rc.errors.Collect(ch)
92+
// custom collector in a way that only collects metrics for active containers.
93+
func (rc *resourceMetricCollector) CollectWithStability(ch chan<- metrics.Metric) {
94+
var errorCount float64
95+
defer func() {
96+
ch <- metrics.NewLazyConstMetric(rc.errors, metrics.GaugeValue, errorCount)
97+
}()
9498
summary, err := rc.provider.GetCPUAndMemoryStats()
9599
if err != nil {
96-
rc.errors.Set(1)
100+
errorCount = 1
97101
klog.Warningf("Error getting summary for resourceMetric prometheus endpoint: %v", err)
98102
return
99103
}
100104

101105
for _, metric := range rc.config.NodeMetrics {
102106
if value, timestamp := metric.ValueFn(summary.Node); value != nil {
103-
ch <- prometheus.NewMetricWithTimestamp(timestamp,
104-
prometheus.MustNewConstMetric(metric.desc(), prometheus.GaugeValue, *value))
107+
ch <- metrics.NewLazyMetricWithTimestamp(timestamp,
108+
metrics.NewLazyConstMetric(metric.desc(), metrics.GaugeValue, *value))
105109
}
106110
}
107111

108112
for _, pod := range summary.Pods {
109113
for _, container := range pod.Containers {
110114
for _, metric := range rc.config.ContainerMetrics {
111115
if value, timestamp := metric.ValueFn(container); value != nil {
112-
ch <- prometheus.NewMetricWithTimestamp(timestamp,
113-
prometheus.MustNewConstMetric(metric.desc(), prometheus.GaugeValue, *value, container.Name, pod.PodRef.Name, pod.PodRef.Namespace))
116+
ch <- metrics.NewLazyMetricWithTimestamp(timestamp,
117+
metrics.NewLazyConstMetric(metric.desc(), metrics.GaugeValue, *value, container.Name, pod.PodRef.Name, pod.PodRef.Namespace))
114118
}
115119
}
116120
}

0 commit comments

Comments
 (0)