Skip to content

Commit be26fbc

Browse files
authored
Merge pull request kubernetes#86282 from RainbowMango/pr_refactor_resource_endpoint
Refactor kubelet resource metrics
2 parents fa6b8e8 + c394d82 commit be26fbc

File tree

8 files changed

+373
-13
lines changed

8 files changed

+373
-13
lines changed

pkg/kubelet/apis/resourcemetrics/v1alpha1/config.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ import (
2424
"k8s.io/kubernetes/pkg/kubelet/server/stats"
2525
)
2626

27+
// This file contains a series of deprecated metrics which we emit them by endpoint `/metrics/resource/v1alpha1`.
28+
// These metrics have been adapted to new endpoint `/metrics/resource` as well as new `Desc`s.
29+
// In general, we don't need to maintain these deprecated metrics any more.
30+
// TODO(RainbowMango): Remove this file in release 1.20.0+.
31+
2732
// Version is the string representation of the version of this configuration
2833
const Version = "v1alpha1"
2934

@@ -33,28 +38,28 @@ var (
3338
nil,
3439
nil,
3540
metrics.ALPHA,
36-
"")
41+
"1.18.0")
3742

3843
nodeMemoryUsageDesc = metrics.NewDesc("node_memory_working_set_bytes",
3944
"Current working set of the node in bytes",
4045
nil,
4146
nil,
4247
metrics.ALPHA,
43-
"")
48+
"1.18.0")
4449

4550
containerCPUUsageDesc = metrics.NewDesc("container_cpu_usage_seconds_total",
4651
"Cumulative cpu time consumed by the container in core-seconds",
4752
[]string{"container", "pod", "namespace"},
4853
nil,
4954
metrics.ALPHA,
50-
"")
55+
"1.18.0")
5156

5257
containerMemoryUsageDesc = metrics.NewDesc("container_memory_working_set_bytes",
5358
"Current working set of the container in bytes",
5459
[]string{"container", "pod", "namespace"},
5560
nil,
5661
metrics.ALPHA,
57-
"")
62+
"1.18.0")
5863
)
5964

6065
// getNodeCPUMetrics returns CPU utilization of a node.

pkg/kubelet/metrics/collectors/BUILD

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ go_library(
44
name = "go_default_library",
55
srcs = [
66
"log_metrics.go",
7+
"resource_metrics.go",
78
"volume_stats.go",
89
],
910
importpath = "k8s.io/kubernetes/pkg/kubelet/metrics/collectors",
@@ -22,6 +23,7 @@ go_test(
2223
name = "go_default_test",
2324
srcs = [
2425
"log_metrics_test.go",
26+
"resource_metrics_test.go",
2527
"volume_stats_test.go",
2628
],
2729
embed = [":go_default_library"],
@@ -30,6 +32,7 @@ go_test(
3032
"//pkg/kubelet/server/stats/testing:go_default_library",
3133
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
3234
"//staging/src/k8s.io/component-base/metrics/testutil:go_default_library",
35+
"//vendor/github.com/stretchr/testify/mock:go_default_library",
3336
],
3437
)
3538

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
/*
2+
Copyright 2019 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package collectors
18+
19+
import (
20+
"time"
21+
22+
"k8s.io/component-base/metrics"
23+
"k8s.io/klog"
24+
summary "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
25+
"k8s.io/kubernetes/pkg/kubelet/server/stats"
26+
)
27+
28+
var (
29+
nodeCPUUsageDesc = metrics.NewDesc("node_cpu_usage_seconds",
30+
"Cumulative cpu time consumed by the node in core-seconds",
31+
nil,
32+
nil,
33+
metrics.ALPHA,
34+
"")
35+
36+
nodeMemoryUsageDesc = metrics.NewDesc("node_memory_working_set_bytes",
37+
"Current working set of the node in bytes",
38+
nil,
39+
nil,
40+
metrics.ALPHA,
41+
"")
42+
43+
containerCPUUsageDesc = metrics.NewDesc("container_cpu_usage_seconds",
44+
"Cumulative cpu time consumed by the container in core-seconds",
45+
[]string{"container", "pod", "namespace"},
46+
nil,
47+
metrics.ALPHA,
48+
"")
49+
50+
containerMemoryUsageDesc = metrics.NewDesc("container_memory_working_set_bytes",
51+
"Current working set of the container in bytes",
52+
[]string{"container", "pod", "namespace"},
53+
nil,
54+
metrics.ALPHA,
55+
"")
56+
57+
resouceScrapeResultDesc = metrics.NewDesc("scrape_error",
58+
"1 if there was an error while getting container metrics, 0 otherwise",
59+
nil,
60+
nil,
61+
metrics.ALPHA,
62+
"")
63+
)
64+
65+
// NewResourceMetricsCollector returns a metrics.StableCollector which exports resource metrics
66+
func NewResourceMetricsCollector(provider stats.SummaryProvider) metrics.StableCollector {
67+
return &resourceMetricsCollector{
68+
provider: provider,
69+
}
70+
}
71+
72+
type resourceMetricsCollector struct {
73+
metrics.BaseStableCollector
74+
75+
provider stats.SummaryProvider
76+
}
77+
78+
// Check if resourceMetricsCollector implements necessary interface
79+
var _ metrics.StableCollector = &resourceMetricsCollector{}
80+
81+
// DescribeWithStability implements metrics.StableCollector
82+
func (rc *resourceMetricsCollector) DescribeWithStability(ch chan<- *metrics.Desc) {
83+
ch <- nodeCPUUsageDesc
84+
ch <- nodeMemoryUsageDesc
85+
ch <- containerCPUUsageDesc
86+
ch <- containerMemoryUsageDesc
87+
ch <- resouceScrapeResultDesc
88+
}
89+
90+
// CollectWithStability implements metrics.StableCollector
91+
// Since new containers are frequently created and removed, using the Gauge would
92+
// leak metric collectors for containers or pods that no longer exist. Instead, implement
93+
// custom collector in a way that only collects metrics for active containers.
94+
func (rc *resourceMetricsCollector) CollectWithStability(ch chan<- metrics.Metric) {
95+
var errorCount float64
96+
defer func() {
97+
ch <- metrics.NewLazyConstMetric(resouceScrapeResultDesc, metrics.GaugeValue, errorCount)
98+
}()
99+
statsSummary, err := rc.provider.GetCPUAndMemoryStats()
100+
if err != nil {
101+
errorCount = 1
102+
klog.Warningf("Error getting summary for resourceMetric prometheus endpoint: %v", err)
103+
return
104+
}
105+
106+
rc.collectNodeCPUMetrics(ch, statsSummary.Node)
107+
rc.collectNodeMemoryMetrics(ch, statsSummary.Node)
108+
109+
for _, pod := range statsSummary.Pods {
110+
for _, container := range pod.Containers {
111+
rc.collectContainerCPUMetrics(ch, pod, container)
112+
rc.collectContainerMemoryMetrics(ch, pod, container)
113+
}
114+
}
115+
}
116+
117+
func (rc *resourceMetricsCollector) collectNodeCPUMetrics(ch chan<- metrics.Metric, s summary.NodeStats) {
118+
if s.CPU == nil {
119+
return
120+
}
121+
122+
ch <- metrics.NewLazyMetricWithTimestamp(s.CPU.Time.Time,
123+
metrics.NewLazyConstMetric(nodeCPUUsageDesc, metrics.GaugeValue, float64(*s.CPU.UsageCoreNanoSeconds)/float64(time.Second)))
124+
}
125+
126+
func (rc *resourceMetricsCollector) collectNodeMemoryMetrics(ch chan<- metrics.Metric, s summary.NodeStats) {
127+
if s.Memory == nil {
128+
return
129+
}
130+
131+
ch <- metrics.NewLazyMetricWithTimestamp(s.Memory.Time.Time,
132+
metrics.NewLazyConstMetric(nodeMemoryUsageDesc, metrics.GaugeValue, float64(*s.Memory.WorkingSetBytes)))
133+
}
134+
135+
func (rc *resourceMetricsCollector) collectContainerCPUMetrics(ch chan<- metrics.Metric, pod summary.PodStats, s summary.ContainerStats) {
136+
if s.CPU == nil {
137+
return
138+
}
139+
140+
ch <- metrics.NewLazyMetricWithTimestamp(s.CPU.Time.Time,
141+
metrics.NewLazyConstMetric(containerCPUUsageDesc, metrics.GaugeValue,
142+
float64(*s.CPU.UsageCoreNanoSeconds)/float64(time.Second), s.Name, pod.PodRef.Name, pod.PodRef.Namespace))
143+
}
144+
145+
func (rc *resourceMetricsCollector) collectContainerMemoryMetrics(ch chan<- metrics.Metric, pod summary.PodStats, s summary.ContainerStats) {
146+
if s.Memory == nil {
147+
return
148+
}
149+
150+
ch <- metrics.NewLazyMetricWithTimestamp(s.Memory.Time.Time,
151+
metrics.NewLazyConstMetric(containerMemoryUsageDesc, metrics.GaugeValue,
152+
float64(*s.Memory.WorkingSetBytes), s.Name, pod.PodRef.Name, pod.PodRef.Namespace))
153+
}

0 commit comments

Comments
 (0)