Skip to content

Commit d5f134d

Browse files
feat: [NPM] perf metrics for pod/ns/policy CRUD (#1220)
* add dataplane health metrics * change counters to countervecs * wip * uncomment metrics.ReinitializeAll() * add comment about ReinitializeAll * restructure prometheus-metrics.go, address comments, and finish UTs for v1 * properly record exec times and include error labels * add error label to add_policy_exec_time * add v2 UTs, test NoOp, and address comment * resolve lints
1 parent 6631ba2 commit d5f134d

25 files changed

+876
-294
lines changed

npm/http/server/server.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ func NPMRestServerListenAndServe(config npmconfig.Config, npmEncoder json.Marsha
2828

2929
// prometheus handlers
3030
if config.Toggles.EnablePrometheusMetrics {
31-
rs.router.Handle(api.NodeMetricsPath, metrics.GetHandler(true))
32-
rs.router.Handle(api.ClusterMetricsPath, metrics.GetHandler(false))
31+
rs.router.Handle(api.NodeMetricsPath, metrics.GetHandler(metrics.NodeMetrics))
32+
rs.router.Handle(api.ClusterMetricsPath, metrics.GetHandler(metrics.ClusterMetrics))
3333
}
3434

3535
if config.Toggles.EnableHTTPDebugAPI {

npm/metrics/ipsets.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import (
55
"github.com/prometheus/client_golang/prometheus"
66
)
77

8-
var ipsetInventoryMap = make(map[string]int)
8+
var ipsetInventoryMap map[string]int
99

1010
// IncNumIPSets increments the number of IPSets.
1111
func IncNumIPSets() {

npm/metrics/namespaces.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package metrics
2+
3+
// RecordControllerNamespaceExecTime adds an observation of namespace exec time (unless the operation is NoOp).
4+
// The execution time is from the timer's start until now.
5+
func RecordControllerNamespaceExecTime(timer *Timer, op OperationKind, hadError bool) {
6+
timer.stopAndRecordCRUDExecTime(controllerNamespaceExecTime, op, hadError)
7+
}
8+
9+
// GetControllerNamespaceExecCount returns the number of observations for namespace exec time for the specified operation.
10+
// This function is slow.
11+
func GetControllerNamespaceExecCount(op OperationKind, hadError bool) (int, error) {
12+
return getCountVecValue(controllerNamespaceExecTime, getCRUDExecTimeLabels(op, hadError))
13+
}

npm/metrics/namespaces_test.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
package metrics
2+
3+
import "testing"
4+
5+
func TestRecordControllerNamespaceExecTime(t *testing.T) {
6+
testStopAndRecordCRUDExecTime(t, &crudExecMetric{
7+
RecordControllerNamespaceExecTime,
8+
GetControllerNamespaceExecCount,
9+
})
10+
}

npm/metrics/pods.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package metrics
2+
3+
// RecordControllerPodExecTime adds an observation of pod exec time for the specified operation (unless the operation is NoOp).
4+
// The execution time is from the timer's start until now.
5+
func RecordControllerPodExecTime(timer *Timer, op OperationKind, hadError bool) {
6+
timer.stopAndRecordCRUDExecTime(controllerPodExecTime, op, hadError)
7+
}
8+
9+
// GetControllerPodExecCount returns the number of observations for pod exec time for the specified operation.
10+
// This function is slow.
11+
func GetControllerPodExecCount(op OperationKind, hadError bool) (int, error) {
12+
return getCountVecValue(controllerPodExecTime, getCRUDExecTimeLabels(op, hadError))
13+
}

npm/metrics/pods_test.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
package metrics
2+
3+
import "testing"
4+
5+
func TestRecordControllerPodExecTime(t *testing.T) {
6+
testStopAndRecordCRUDExecTime(t, &crudExecMetric{
7+
RecordControllerPodExecTime,
8+
GetControllerPodExecCount,
9+
})
10+
}

npm/metrics/policies.go

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,14 @@ func ResetNumPolicies() {
1515
numPolicies.Set(0)
1616
}
1717

18-
// RecordPolicyExecTime adds an observation of execution time for adding a policy.
18+
// RecordControllerPolicyExecTime adds an observation of policy exec time (unless the operation is NoOp).
1919
// The execution time is from the timer's start until now.
20-
func RecordPolicyExecTime(timer *Timer) {
21-
timer.stopAndRecord(addPolicyExecTime)
20+
func RecordControllerPolicyExecTime(timer *Timer, op OperationKind, hadError bool) {
21+
if op == CreateOp {
22+
timer.stopAndRecordExecTimeWithError(addPolicyExecTime, hadError)
23+
} else {
24+
timer.stopAndRecordCRUDExecTime(controllerPolicyExecTime, op, hadError)
25+
}
2226
}
2327

2428
// GetNumPolicies returns the number of policies.
@@ -27,8 +31,11 @@ func GetNumPolicies() (int, error) {
2731
return getValue(numPolicies)
2832
}
2933

30-
// GetPolicyExecCount returns the number of observations for execution time of adding policies.
34+
// GetControllerPolicyExecCount returns the number of observations for policy exec time for the specified operation.
3135
// This function is slow.
32-
func GetPolicyExecCount() (int, error) {
33-
return getCountValue(addPolicyExecTime)
36+
func GetControllerPolicyExecCount(op OperationKind, hadError bool) (int, error) {
37+
if op == CreateOp {
38+
return getCountVecValue(addPolicyExecTime, getErrorLabels(hadError))
39+
}
40+
return getCountVecValue(controllerPolicyExecTime, getCRUDExecTimeLabels(op, hadError))
3441
}

npm/metrics/policies_test.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@ package metrics
22

33
import "testing"
44

5-
var (
6-
numPoliciesMetric = &basicMetric{ResetNumPolicies, IncNumPolicies, DecNumPolicies, GetNumPolicies}
7-
policyExecMetric = &recordingMetric{RecordPolicyExecTime, GetPolicyExecCount}
8-
)
5+
var numPoliciesMetric = &basicMetric{ResetNumPolicies, IncNumPolicies, DecNumPolicies, GetNumPolicies}
96

10-
func TestRecordPolicyExecTime(t *testing.T) {
11-
testStopAndRecord(t, policyExecMetric)
7+
func TestRecordControllerPolicyExecTime(t *testing.T) {
8+
testStopAndRecordCRUDExecTime(t, &crudExecMetric{
9+
RecordControllerPolicyExecTime,
10+
GetControllerPolicyExecCount,
11+
})
1212
}
1313

1414
func TestIncNumPolicies(t *testing.T) {

0 commit comments

Comments
 (0)