Skip to content

Commit cd83a52

Browse files
committed
integration: add Prometheus metrics endpoint tests
Add comprehensive integration tests for the /metrics Prometheus endpoint: - Add MetricsClient to framework for fetching and parsing Prometheus metrics - Add core Prometheus endpoint tests (format, types, labels, help text) - Add Docker container metrics tests (CPU, memory, network, labels, etc.) - Add containerd container metrics tests with API-based container discovery - Update build scripts to compile and run metrics tests - Fix containerd cleanup to handle already-exited containers gracefully This improves test coverage for the Prometheus metrics exposition, which was previously not validated in integration tests. Signed-off-by: Davanum Srinivas <[email protected]>
1 parent 1b3e95c commit cd83a52

File tree

8 files changed

+1425
-7
lines changed

8 files changed

+1425
-7
lines changed

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ test-integration:
3939
GO_FLAGS=$(or $(GO_FLAGS),-race) ./build/build.sh
4040
$(GO_TEST) -c github.com/google/cadvisor/integration/tests/api
4141
$(GO_TEST) -c github.com/google/cadvisor/integration/tests/common
42+
$(GO_TEST) -c github.com/google/cadvisor/integration/tests/metrics
4243
@./build/integration.sh
4344

4445
docker-test-integration:

build/integration-in-docker.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ function run_tests() {
3333
# Add safe.directory as workaround for https://github.com/actions/runner/issues/2033
3434
BUILD_CMD="git config --global safe.directory /go/src/github.com/google/cadvisor && env GOOS=linux GOARCH=amd64 GO_FLAGS='$GO_FLAGS' ./build/build.sh && \
3535
env GOOS=linux GOFLAGS='$GO_FLAGS' go test -c github.com/google/cadvisor/integration/tests/api && \
36-
env GOOS=linux GOFLAGS='$GO_FLAGS' go test -c github.com/google/cadvisor/integration/tests/common"
36+
env GOOS=linux GOFLAGS='$GO_FLAGS' go test -c github.com/google/cadvisor/integration/tests/common && \
37+
env GOOS=linux GOFLAGS='$GO_FLAGS' go test -c github.com/google/cadvisor/integration/tests/metrics"
3738

3839
if [ "$BUILD_PACKAGES" != "" ]; then
3940
BUILD_CMD="apt update && apt install -y $BUILD_PACKAGES && \

build/integration.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,10 +132,11 @@ if [[ "${DOCKER_IN_DOCKER_ENABLED:-}" == "true" ]]; then
132132
fi
133133

134134
echo ">> running integration tests against local cAdvisor"
135-
if ! [ -f ./api.test ] || ! [ -f ./common.test ]; then
136-
echo You must compile the ./api.test binary and ./common.test binary before
137-
echo running the integration tests.
135+
if ! [ -f ./api.test ] || ! [ -f ./common.test ] || ! [ -f ./metrics.test ]; then
136+
echo You must compile the ./api.test, ./common.test, and ./metrics.test binaries
137+
echo before running the integration tests.
138138
exit 1
139139
fi
140140
./api.test --vmodule=*=2 -test.v
141141
./common.test -test.v
142+
./metrics.test -test.v

integration/framework/framework.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -668,22 +668,24 @@ func (a *containerdActions) Run(args ContainerdRunArgs, cmd ...string) string {
668668
klog.Infof("Created containerd container with ID: %s", containerID)
669669

670670
// Register cleanup function
671+
// Use RunStress for cleanup commands to avoid test failures when containers have already exited
671672
a.fm.cleanups = append(a.fm.cleanups, func() {
672673
klog.Infof("Cleaning up containerd container %s", containerID)
673674
// Kill the task with SIGKILL to ensure it stops immediately
675+
// Use RunStress so we don't fail if the task has already exited
674676
killArgs := append([]string{"ctr", "--address", a.socket, "--namespace", a.namespace},
675677
"task", "kill", "--signal", "SIGKILL", containerID)
676-
a.fm.Shell().Run("sudo", killArgs...)
678+
a.fm.Shell().RunStress("sudo", killArgs...)
677679
// Wait a moment for the task to stop
678680
time.Sleep(500 * time.Millisecond)
679681
// Delete the task (with force flag)
680682
deleteTaskArgs := append([]string{"ctr", "--address", a.socket, "--namespace", a.namespace},
681683
"task", "delete", "-f", containerID)
682-
a.fm.Shell().Run("sudo", deleteTaskArgs...)
684+
a.fm.Shell().RunStress("sudo", deleteTaskArgs...)
683685
// Delete the container
684686
deleteArgs := append([]string{"ctr", "--address", a.socket, "--namespace", a.namespace},
685687
"container", "delete", containerID)
686-
a.fm.Shell().Run("sudo", deleteArgs...)
688+
a.fm.Shell().RunStress("sudo", deleteArgs...)
687689
})
688690

689691
return containerID

integration/framework/metrics.go

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
// Copyright 2024 Google Inc. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package framework
16+
17+
import (
18+
"fmt"
19+
"io"
20+
"net/http"
21+
"strings"
22+
"time"
23+
24+
dto "github.com/prometheus/client_model/go"
25+
"github.com/prometheus/common/expfmt"
26+
)
27+
28+
// MetricsClient provides methods for fetching and parsing Prometheus metrics
29+
// from cAdvisor's /metrics endpoint.
30+
type MetricsClient struct {
31+
baseURL string
32+
httpClient *http.Client
33+
}
34+
35+
// NewMetricsClient creates a new client for the /metrics endpoint.
36+
func NewMetricsClient(hostname HostnameInfo) *MetricsClient {
37+
return &MetricsClient{
38+
baseURL: hostname.FullHostname(),
39+
httpClient: &http.Client{
40+
Timeout: 30 * time.Second,
41+
},
42+
}
43+
}
44+
45+
// Fetch retrieves raw metrics text from the /metrics endpoint.
46+
func (m *MetricsClient) Fetch() (string, error) {
47+
return m.FetchWithParams("")
48+
}
49+
50+
// FetchWithParams retrieves metrics with optional query parameters.
51+
// Parameters can be "type=docker" or "type=name" to filter containers.
52+
func (m *MetricsClient) FetchWithParams(params string) (string, error) {
53+
url := m.baseURL + "metrics"
54+
if params != "" {
55+
url += "?" + params
56+
}
57+
58+
resp, err := m.httpClient.Get(url)
59+
if err != nil {
60+
return "", fmt.Errorf("failed to fetch metrics: %w", err)
61+
}
62+
defer resp.Body.Close()
63+
64+
if resp.StatusCode != http.StatusOK {
65+
body, _ := io.ReadAll(resp.Body)
66+
return "", fmt.Errorf("metrics endpoint returned %d: %s", resp.StatusCode, string(body))
67+
}
68+
69+
body, err := io.ReadAll(resp.Body)
70+
if err != nil {
71+
return "", fmt.Errorf("failed to read response: %w", err)
72+
}
73+
74+
return string(body), nil
75+
}
76+
77+
// Parse converts Prometheus text format to metric families.
78+
func (m *MetricsClient) Parse(metricsText string) (map[string]*dto.MetricFamily, error) {
79+
parser := expfmt.TextParser{}
80+
return parser.TextToMetricFamilies(strings.NewReader(metricsText))
81+
}
82+
83+
// FetchAndParse combines Fetch and Parse into one call.
84+
func (m *MetricsClient) FetchAndParse() (map[string]*dto.MetricFamily, error) {
85+
text, err := m.Fetch()
86+
if err != nil {
87+
return nil, err
88+
}
89+
return m.Parse(text)
90+
}
91+
92+
// HasMetric checks if a metric family exists by name.
93+
func HasMetric(families map[string]*dto.MetricFamily, name string) bool {
94+
_, ok := families[name]
95+
return ok
96+
}
97+
98+
// GetMetricFamily returns a specific metric family by name.
99+
func GetMetricFamily(families map[string]*dto.MetricFamily, name string) (*dto.MetricFamily, bool) {
100+
mf, ok := families[name]
101+
return mf, ok
102+
}
103+
104+
// FindMetricWithLabels finds a metric matching all specified labels.
105+
// Returns nil if no matching metric is found.
106+
func FindMetricWithLabels(mf *dto.MetricFamily, labels map[string]string) *dto.Metric {
107+
if mf == nil {
108+
return nil
109+
}
110+
for _, metric := range mf.GetMetric() {
111+
if matchesLabels(metric, labels) {
112+
return metric
113+
}
114+
}
115+
return nil
116+
}
117+
118+
// FindMetricsWithLabelSubstring finds all metrics where the specified label
119+
// contains the given substring.
120+
func FindMetricsWithLabelSubstring(mf *dto.MetricFamily, labelName, substring string) []*dto.Metric {
121+
if mf == nil {
122+
return nil
123+
}
124+
var result []*dto.Metric
125+
for _, metric := range mf.GetMetric() {
126+
for _, lp := range metric.GetLabel() {
127+
if lp.GetName() == labelName && strings.Contains(lp.GetValue(), substring) {
128+
result = append(result, metric)
129+
break
130+
}
131+
}
132+
}
133+
return result
134+
}
135+
136+
// GetGaugeValue extracts the value from a gauge metric.
137+
func GetGaugeValue(metric *dto.Metric) float64 {
138+
if metric == nil || metric.GetGauge() == nil {
139+
return 0
140+
}
141+
return metric.GetGauge().GetValue()
142+
}
143+
144+
// GetCounterValue extracts the value from a counter metric.
145+
func GetCounterValue(metric *dto.Metric) float64 {
146+
if metric == nil || metric.GetCounter() == nil {
147+
return 0
148+
}
149+
return metric.GetCounter().GetValue()
150+
}
151+
152+
// GetLabelValue returns the value of a specific label from a metric.
153+
// Returns empty string if label is not found.
154+
func GetLabelValue(metric *dto.Metric, labelName string) string {
155+
if metric == nil {
156+
return ""
157+
}
158+
for _, lp := range metric.GetLabel() {
159+
if lp.GetName() == labelName {
160+
return lp.GetValue()
161+
}
162+
}
163+
return ""
164+
}
165+
166+
// ContainsLabelValue checks if any metric in the family has the label
167+
// containing the given substring.
168+
func ContainsLabelValue(mf *dto.MetricFamily, labelName, substring string) bool {
169+
if mf == nil {
170+
return false
171+
}
172+
for _, metric := range mf.GetMetric() {
173+
for _, lp := range metric.GetLabel() {
174+
if lp.GetName() == labelName && strings.Contains(lp.GetValue(), substring) {
175+
return true
176+
}
177+
}
178+
}
179+
return false
180+
}
181+
182+
// GetMetricType returns the type of a metric family as a string.
183+
func GetMetricType(mf *dto.MetricFamily) string {
184+
if mf == nil {
185+
return "unknown"
186+
}
187+
return mf.GetType().String()
188+
}
189+
190+
// matchesLabels checks if a metric has all the specified labels with exact values.
191+
func matchesLabels(metric *dto.Metric, targetLabels map[string]string) bool {
192+
if metric == nil {
193+
return false
194+
}
195+
labelMap := make(map[string]string)
196+
for _, lp := range metric.GetLabel() {
197+
labelMap[lp.GetName()] = lp.GetValue()
198+
}
199+
for k, v := range targetLabels {
200+
if labelMap[k] != v {
201+
return false
202+
}
203+
}
204+
return true
205+
}
206+
207+
// CountMetrics returns the number of metric samples in a metric family.
208+
func CountMetrics(mf *dto.MetricFamily) int {
209+
if mf == nil {
210+
return 0
211+
}
212+
return len(mf.GetMetric())
213+
}
214+
215+
// GetAllLabelValues returns all unique values for a given label name across
216+
// all metrics in the family.
217+
func GetAllLabelValues(mf *dto.MetricFamily, labelName string) []string {
218+
if mf == nil {
219+
return nil
220+
}
221+
seen := make(map[string]bool)
222+
var values []string
223+
for _, metric := range mf.GetMetric() {
224+
for _, lp := range metric.GetLabel() {
225+
if lp.GetName() == labelName {
226+
val := lp.GetValue()
227+
if !seen[val] {
228+
seen[val] = true
229+
values = append(values, val)
230+
}
231+
}
232+
}
233+
}
234+
return values
235+
}

0 commit comments

Comments
 (0)