Skip to content

Commit 0969e66

Browse files
authored
Merge pull request kubernetes#93066 from AnishShah/network-plugin-metrics
kubelet: add operations count and error count metrics to network plugin manager
2 parents 64f8bf2 + 1b9885d commit 0969e66

File tree

4 files changed

+121
-5
lines changed

4 files changed

+121
-5
lines changed

pkg/kubelet/dockershim/network/BUILD

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
load("@io_bazel_rules_go//go:def.bzl", "go_library")
1+
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
22

33
go_library(
44
name = "go_default_library",
@@ -46,3 +46,14 @@ filegroup(
4646
tags = ["automanaged"],
4747
visibility = ["//visibility:public"],
4848
)
49+
50+
go_test(
51+
name = "go_default_test",
52+
srcs = ["plugins_test.go"],
53+
embed = [":go_default_library"],
54+
deps = [
55+
"//pkg/kubelet/dockershim/network/metrics:go_default_library",
56+
"//staging/src/k8s.io/component-base/metrics/legacyregistry:go_default_library",
57+
"//staging/src/k8s.io/component-base/metrics/testutil:go_default_library",
58+
],
59+
)

pkg/kubelet/dockershim/network/metrics/metrics.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@ import (
2828

2929
const (
3030
// NetworkPluginOperationsKey is the key for operation count metrics.
31-
NetworkPluginOperationsKey = "network_plugin_operations"
31+
NetworkPluginOperationsKey = "network_plugin_operations_total"
3232
// NetworkPluginOperationsLatencyKey is the key for the operation latency metrics.
3333
NetworkPluginOperationsLatencyKey = "network_plugin_operations_duration_seconds"
34+
// NetworkPluginOperationsErrorsKey is the key for the operations error metrics.
35+
NetworkPluginOperationsErrorsKey = "network_plugin_operations_errors_total"
3436

3537
// Keep the "kubelet" subsystem for backward compatibility.
3638
kubeletSubsystem = "kubelet"
@@ -49,6 +51,28 @@ var (
4951
},
5052
[]string{"operation_type"},
5153
)
54+
55+
// NetworkPluginOperations collects operation counts by operation type.
56+
NetworkPluginOperations = metrics.NewCounterVec(
57+
&metrics.CounterOpts{
58+
Subsystem: kubeletSubsystem,
59+
Name: NetworkPluginOperationsKey,
60+
Help: "Cumulative number of network plugin operations by operation type.",
61+
StabilityLevel: metrics.ALPHA,
62+
},
63+
[]string{"operation_type"},
64+
)
65+
66+
// NetworkPluginOperationsErrors collects operation errors by operation type.
67+
NetworkPluginOperationsErrors = metrics.NewCounterVec(
68+
&metrics.CounterOpts{
69+
Subsystem: kubeletSubsystem,
70+
Name: NetworkPluginOperationsErrorsKey,
71+
Help: "Cumulative number of network plugin operation errors by operation type.",
72+
StabilityLevel: metrics.ALPHA,
73+
},
74+
[]string{"operation_type"},
75+
)
5276
)
5377

5478
var registerMetrics sync.Once
@@ -57,6 +81,8 @@ var registerMetrics sync.Once
5781
func Register() {
5882
registerMetrics.Do(func() {
5983
legacyregistry.MustRegister(NetworkPluginOperationsLatency)
84+
legacyregistry.MustRegister(NetworkPluginOperations)
85+
legacyregistry.MustRegister(NetworkPluginOperationsErrors)
6086
})
6187
}
6288

pkg/kubelet/dockershim/network/plugins.go

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -382,45 +382,57 @@ func (pm *PluginManager) podUnlock(fullPodName string) {
382382

383383
// recordOperation records operation and duration
384384
func recordOperation(operation string, start time.Time) {
385+
metrics.NetworkPluginOperations.WithLabelValues(operation).Inc()
385386
metrics.NetworkPluginOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInSeconds(start))
386387
}
387388

389+
// recordError records errors for metric.
390+
func recordError(operation string) {
391+
metrics.NetworkPluginOperationsErrors.WithLabelValues(operation).Inc()
392+
}
393+
388394
func (pm *PluginManager) GetPodNetworkStatus(podNamespace, podName string, id kubecontainer.ContainerID) (*PodNetworkStatus, error) {
389-
defer recordOperation("get_pod_network_status", time.Now())
395+
const operation = "get_pod_network_status"
396+
defer recordOperation(operation, time.Now())
390397
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
391398
pm.podLock(fullPodName).Lock()
392399
defer pm.podUnlock(fullPodName)
393400

394401
netStatus, err := pm.plugin.GetPodNetworkStatus(podNamespace, podName, id)
395402
if err != nil {
403+
recordError(operation)
396404
return nil, fmt.Errorf("networkPlugin %s failed on the status hook for pod %q: %v", pm.plugin.Name(), fullPodName, err)
397405
}
398406

399407
return netStatus, nil
400408
}
401409

402410
func (pm *PluginManager) SetUpPod(podNamespace, podName string, id kubecontainer.ContainerID, annotations, options map[string]string) error {
403-
defer recordOperation("set_up_pod", time.Now())
411+
const operation = "set_up_pod"
412+
defer recordOperation(operation, time.Now())
404413
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
405414
pm.podLock(fullPodName).Lock()
406415
defer pm.podUnlock(fullPodName)
407416

408417
klog.V(3).Infof("Calling network plugin %s to set up pod %q", pm.plugin.Name(), fullPodName)
409418
if err := pm.plugin.SetUpPod(podNamespace, podName, id, annotations, options); err != nil {
419+
recordError(operation)
410420
return fmt.Errorf("networkPlugin %s failed to set up pod %q network: %v", pm.plugin.Name(), fullPodName, err)
411421
}
412422

413423
return nil
414424
}
415425

416426
func (pm *PluginManager) TearDownPod(podNamespace, podName string, id kubecontainer.ContainerID) error {
417-
defer recordOperation("tear_down_pod", time.Now())
427+
const operation = "tear_down_pod"
428+
defer recordOperation(operation, time.Now())
418429
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
419430
pm.podLock(fullPodName).Lock()
420431
defer pm.podUnlock(fullPodName)
421432

422433
klog.V(3).Infof("Calling network plugin %s to tear down pod %q", pm.plugin.Name(), fullPodName)
423434
if err := pm.plugin.TearDownPod(podNamespace, podName, id); err != nil {
435+
recordError(operation)
424436
return fmt.Errorf("networkPlugin %s failed to teardown pod %q network: %v", pm.plugin.Name(), fullPodName, err)
425437
}
426438

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// +build !dockerless
2+
3+
/*
4+
Copyright 2020 The Kubernetes Authors.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
*/
18+
19+
package network
20+
21+
import (
22+
"strings"
23+
"testing"
24+
"time"
25+
26+
"k8s.io/component-base/metrics/legacyregistry"
27+
"k8s.io/component-base/metrics/testutil"
28+
"k8s.io/kubernetes/pkg/kubelet/dockershim/network/metrics"
29+
)
30+
31+
func TestNetworkPluginManagerMetrics(t *testing.T) {
32+
metrics.Register()
33+
34+
operation := "test_operation"
35+
recordOperation(operation, time.Now())
36+
recordError(operation)
37+
38+
cases := []struct {
39+
metricName string
40+
want string
41+
}{
42+
{
43+
metricName: "kubelet_network_plugin_operations_total",
44+
want: `
45+
# HELP kubelet_network_plugin_operations_total [ALPHA] Cumulative number of network plugin operations by operation type.
46+
# TYPE kubelet_network_plugin_operations_total counter
47+
kubelet_network_plugin_operations_total{operation_type="test_operation"} 1
48+
`,
49+
},
50+
{
51+
metricName: "kubelet_network_plugin_operations_errors_total",
52+
want: `
53+
# HELP kubelet_network_plugin_operations_errors_total [ALPHA] Cumulative number of network plugin operation errors by operation type.
54+
# TYPE kubelet_network_plugin_operations_errors_total counter
55+
kubelet_network_plugin_operations_errors_total{operation_type="test_operation"} 1
56+
`,
57+
},
58+
}
59+
60+
for _, tc := range cases {
61+
t.Run(tc.metricName, func(t *testing.T) {
62+
if err := testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(tc.want), tc.metricName); err != nil {
63+
t.Fatal(err)
64+
}
65+
})
66+
}
67+
}

0 commit comments

Comments
 (0)