Skip to content

Commit 0ffe89e

Browse files
committed
kubelet: add operations count and error count metrics to network plugin manager
1 parent 6079ceb commit 0ffe89e

File tree

2 files changed

+42
-4
lines changed

2 files changed

+42
-4
lines changed

pkg/kubelet/dockershim/network/metrics/metrics.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@ import (
2828

2929
const (
3030
// NetworkPluginOperationsKey is the key for operation count metrics.
31-
NetworkPluginOperationsKey = "network_plugin_operations"
31+
NetworkPluginOperationsKey = "network_plugin_operations_total"
3232
// NetworkPluginOperationsLatencyKey is the key for the operation latency metrics.
3333
NetworkPluginOperationsLatencyKey = "network_plugin_operations_duration_seconds"
34+
// NetworkPluginOperationsErrorsKey is the key for the operations error metrics.
35+
NetworkPluginOperationsErrorsKey = "network_plugin_operations_errors_total"
3436

3537
// Keep the "kubelet" subsystem for backward compatibility.
3638
kubeletSubsystem = "kubelet"
@@ -49,6 +51,28 @@ var (
4951
},
5052
[]string{"operation_type"},
5153
)
54+
55+
// NetworkPluginOperations collects operation counts by operation type.
56+
NetworkPluginOperations = metrics.NewCounterVec(
57+
&metrics.CounterOpts{
58+
Subsystem: kubeletSubsystem,
59+
Name: NetworkPluginOperationsKey,
60+
Help: "Cumulative number of network plugin operations by operation type.",
61+
StabilityLevel: metrics.ALPHA,
62+
},
63+
[]string{"operation_type"},
64+
)
65+
66+
// NetworkPluginOperationsErrors collects operation errors by operation type.
67+
NetworkPluginOperationsErrors = metrics.NewCounterVec(
68+
&metrics.CounterOpts{
69+
Subsystem: kubeletSubsystem,
70+
Name: NetworkPluginOperationsErrorsKey,
71+
Help: "Cumulative number of network plugin operation errors by operation type.",
72+
StabilityLevel: metrics.ALPHA,
73+
},
74+
[]string{"operation_type"},
75+
)
5276
)
5377

5478
var registerMetrics sync.Once
@@ -57,6 +81,8 @@ var registerMetrics sync.Once
5781
func Register() {
5882
registerMetrics.Do(func() {
5983
legacyregistry.MustRegister(NetworkPluginOperationsLatency)
84+
legacyregistry.MustRegister(NetworkPluginOperations)
85+
legacyregistry.MustRegister(NetworkPluginOperationsErrors)
6086
})
6187
}
6288

pkg/kubelet/dockershim/network/plugins.go

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -382,45 +382,57 @@ func (pm *PluginManager) podUnlock(fullPodName string) {
382382

383383
// recordOperation records operation and duration
384384
func recordOperation(operation string, start time.Time) {
385+
metrics.NetworkPluginOperations.WithLabelValues(operation).Inc()
385386
metrics.NetworkPluginOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInSeconds(start))
386387
}
387388

389+
// recordError records errors for metric.
390+
func recordError(operation string) {
391+
metrics.NetworkPluginOperationsErrors.WithLabelValues(operation).Inc()
392+
}
393+
388394
func (pm *PluginManager) GetPodNetworkStatus(podNamespace, podName string, id kubecontainer.ContainerID) (*PodNetworkStatus, error) {
389-
defer recordOperation("get_pod_network_status", time.Now())
395+
const operation = "get_pod_network_status"
396+
defer recordOperation(operation, time.Now())
390397
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
391398
pm.podLock(fullPodName).Lock()
392399
defer pm.podUnlock(fullPodName)
393400

394401
netStatus, err := pm.plugin.GetPodNetworkStatus(podNamespace, podName, id)
395402
if err != nil {
403+
recordError(operation)
396404
return nil, fmt.Errorf("networkPlugin %s failed on the status hook for pod %q: %v", pm.plugin.Name(), fullPodName, err)
397405
}
398406

399407
return netStatus, nil
400408
}
401409

402410
func (pm *PluginManager) SetUpPod(podNamespace, podName string, id kubecontainer.ContainerID, annotations, options map[string]string) error {
403-
defer recordOperation("set_up_pod", time.Now())
411+
const operation = "set_up_pod"
412+
defer recordOperation(operation, time.Now())
404413
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
405414
pm.podLock(fullPodName).Lock()
406415
defer pm.podUnlock(fullPodName)
407416

408417
klog.V(3).Infof("Calling network plugin %s to set up pod %q", pm.plugin.Name(), fullPodName)
409418
if err := pm.plugin.SetUpPod(podNamespace, podName, id, annotations, options); err != nil {
419+
recordError(operation)
410420
return fmt.Errorf("networkPlugin %s failed to set up pod %q network: %v", pm.plugin.Name(), fullPodName, err)
411421
}
412422

413423
return nil
414424
}
415425

416426
func (pm *PluginManager) TearDownPod(podNamespace, podName string, id kubecontainer.ContainerID) error {
417-
defer recordOperation("tear_down_pod", time.Now())
427+
const operation = "tear_down_pod"
428+
defer recordOperation(operation, time.Now())
418429
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
419430
pm.podLock(fullPodName).Lock()
420431
defer pm.podUnlock(fullPodName)
421432

422433
klog.V(3).Infof("Calling network plugin %s to tear down pod %q", pm.plugin.Name(), fullPodName)
423434
if err := pm.plugin.TearDownPod(podNamespace, podName, id); err != nil {
435+
recordError(operation)
424436
return fmt.Errorf("networkPlugin %s failed to teardown pod %q network: %v", pm.plugin.Name(), fullPodName, err)
425437
}
426438

0 commit comments

Comments
 (0)