Skip to content

Commit 41f451a

Browse files
authored
Added IP Usage metrics at Rest server. (#1932)
* Added IP Usage metrics at Rest server.
1 parent 66acf01 commit 41f451a

File tree

4 files changed

+166
-45
lines changed

4 files changed

+166
-45
lines changed

cns/restserver/api.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,11 @@ func (service *HTTPRestService) reserveIPAddress(w http.ResponseWriter, r *http.
408408
Message: returnMessage,
409409
}
410410

411+
if resp.ReturnCode == 0 {
412+
// If Response is success i.e. code 0, then publish metrics.
413+
publishIPStateMetrics(service.buildIPState())
414+
}
415+
411416
reserveResp := &cns.ReserveIPAddressResponse{Response: resp, IPAddress: address}
412417
err = service.Listener.Encode(w, &reserveResp)
413418
logger.Response(service.Name, reserveResp, resp.ReturnCode, err)
@@ -475,6 +480,11 @@ func (service *HTTPRestService) releaseIPAddress(w http.ResponseWriter, r *http.
475480
Message: returnMessage,
476481
}
477482

483+
if resp.ReturnCode == 0 {
484+
// If Response is success i.e. code 0, then publish metrics.
485+
publishIPStateMetrics(service.buildIPState())
486+
}
487+
478488
err = service.Listener.Encode(w, &resp)
479489
logger.Response(service.Name, resp, resp.ReturnCode, err)
480490
}

cns/restserver/internalapi.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,8 @@ func (service *HTTPRestService) CreateOrUpdateNetworkContainerInternal(req *cns.
395395
// If the NC was created successfully, log NC snapshot.
396396
if returnCode == 0 {
397397
logNCSnapshot(*req)
398+
399+
publishIPStateMetrics(service.buildIPState())
398400
} else {
399401
logger.Errorf(returnMessage)
400402
}

cns/restserver/ipusage.go

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
package restserver
2+
3+
import (
4+
"github.com/Azure/azure-container-networking/cns/logger"
5+
"github.com/Azure/azure-container-networking/cns/types"
6+
)
7+
8+
type ipState struct {
9+
// allocatedIPs are all the IPs given to CNS by DNC.
10+
allocatedIPs int64
11+
// assignedIPs are the IPs CNS gives to Pods.
12+
assignedIPs int64
13+
// availableIPs are the IPs in state "Available".
14+
availableIPs int64
15+
// programmingIPs are the IPs in state "PendingProgramming".
16+
programmingIPs int64
17+
// releasingIPs are the IPs in state "PendingReleasr".
18+
releasingIPs int64
19+
}
20+
21+
func (service *HTTPRestService) buildIPState() *ipState {
22+
service.Lock()
23+
defer service.Unlock()
24+
25+
state := ipState{
26+
allocatedIPs: 0,
27+
assignedIPs: 0,
28+
availableIPs: 0,
29+
}
30+
31+
//nolint:gocritic // This has to iterate over the IP Config state to get the counts.
32+
for _, ipConfig := range service.PodIPConfigState {
33+
state.allocatedIPs++
34+
if ipConfig.GetState() == types.Assigned {
35+
state.assignedIPs++
36+
}
37+
if ipConfig.GetState() == types.Available {
38+
state.availableIPs++
39+
}
40+
if ipConfig.GetState() == types.PendingProgramming {
41+
state.programmingIPs++
42+
}
43+
if ipConfig.GetState() == types.PendingRelease {
44+
state.releasingIPs++
45+
}
46+
}
47+
48+
logger.Printf("[IP Usage] allocated IPs: %d, assigned IPs: %d, available IPs: %d", state.allocatedIPs, state.assignedIPs, state.availableIPs)
49+
return &state
50+
}

cns/restserver/metrics.go

Lines changed: 104 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -10,51 +10,98 @@ import (
1010
"sigs.k8s.io/controller-runtime/pkg/metrics"
1111
)
1212

13-
var httpRequestLatency = prometheus.NewHistogramVec(
14-
prometheus.HistogramOpts{
15-
Name: "http_request_latency_seconds",
16-
Help: "Request latency in seconds by endpoint, verb, and response code.",
17-
//nolint:gomnd // default bucket consts
18-
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
19-
},
20-
[]string{"url", "verb", "cns_return_code"},
13+
const (
14+
subnetLabel = "subnet"
15+
subnetCIDRLabel = "subnet_cidr"
16+
podnetARMIDLabel = "podnet_arm_id"
17+
cnsReturnCode = "cns_return_code"
18+
customerMetricLabel = "customer_metric"
19+
customerMetricLabelValue = "customer metric"
2120
)
2221

23-
var ipAssignmentLatency = prometheus.NewHistogram(
24-
prometheus.HistogramOpts{
25-
Name: "ip_assignment_latency_seconds",
26-
Help: "Pod IP assignment latency in seconds",
27-
//nolint:gomnd // default bucket consts
28-
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
29-
},
30-
)
31-
32-
var ipConfigStatusStateTransitionTime = prometheus.NewHistogramVec(
33-
prometheus.HistogramOpts{
34-
Name: "ipconfigstatus_state_transition_seconds",
35-
Help: "Time spent by the IP Configuration Status in each state transition",
36-
//nolint:gomnd // default bucket consts
37-
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
38-
},
39-
[]string{"previous_state", "next_state"},
40-
)
41-
42-
var syncHostNCVersionCount = prometheus.NewCounterVec(
43-
prometheus.CounterOpts{
44-
Name: "sync_host_nc_version_total",
45-
Help: "Count of Sync Host NC by success or failure",
46-
},
47-
[]string{"ok"},
48-
)
49-
50-
var syncHostNCVersionLatency = prometheus.NewHistogramVec(
51-
prometheus.HistogramOpts{
52-
Name: "sync_host_nc_version_latency_seconds",
53-
Help: "Sync Host NC Latency",
54-
//nolint:gomnd // default bucket consts
55-
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
56-
},
57-
[]string{"ok"},
22+
var (
23+
httpRequestLatency = prometheus.NewHistogramVec(
24+
prometheus.HistogramOpts{
25+
Name: "http_request_latency_seconds",
26+
Help: "Request latency in seconds by endpoint, verb, and response code.",
27+
//nolint:gomnd // default bucket consts
28+
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
29+
},
30+
[]string{"url", "verb", "cns_return_code"},
31+
)
32+
ipAssignmentLatency = prometheus.NewHistogram(
33+
prometheus.HistogramOpts{
34+
Name: "ip_assignment_latency_seconds",
35+
Help: "Pod IP assignment latency in seconds",
36+
//nolint:gomnd // default bucket consts
37+
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
38+
},
39+
)
40+
ipConfigStatusStateTransitionTime = prometheus.NewHistogramVec(
41+
prometheus.HistogramOpts{
42+
Name: "ipconfigstatus_state_transition_seconds",
43+
Help: "Time spent by the IP Configuration Status in each state transition",
44+
//nolint:gomnd // default bucket consts
45+
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
46+
},
47+
[]string{"previous_state", "next_state"},
48+
)
49+
syncHostNCVersionCount = prometheus.NewCounterVec(
50+
prometheus.CounterOpts{
51+
Name: "sync_host_nc_version_total",
52+
Help: "Count of Sync Host NC by success or failure",
53+
},
54+
[]string{"ok"},
55+
)
56+
syncHostNCVersionLatency = prometheus.NewHistogramVec(
57+
prometheus.HistogramOpts{
58+
Name: "sync_host_nc_version_latency_seconds",
59+
Help: "Sync Host NC Latency",
60+
//nolint:gomnd // default bucket consts
61+
Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), // 1 ms to ~16 seconds
62+
},
63+
[]string{"ok"},
64+
)
65+
allocatedIPCount = prometheus.NewGaugeVec(
66+
prometheus.GaugeOpts{
67+
Name: "cx_allocated_ips_v2",
68+
Help: "Count of IPs CNS has Allocated",
69+
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
70+
},
71+
[]string{},
72+
)
73+
assignedIPCount = prometheus.NewGaugeVec(
74+
prometheus.GaugeOpts{
75+
Name: "cx_assigned_ips_v2",
76+
Help: "Count of IPs CNS has Assigned to Pods",
77+
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
78+
},
79+
[]string{},
80+
)
81+
availableIPCount = prometheus.NewGaugeVec(
82+
prometheus.GaugeOpts{
83+
Name: "cx_available_ips_v2",
84+
Help: "Count of IPs Available",
85+
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
86+
},
87+
[]string{},
88+
)
89+
pendingProgrammingIPCount = prometheus.NewGaugeVec(
90+
prometheus.GaugeOpts{
91+
Name: "cx_pending_programming_ips_v2",
92+
Help: "Count of IPs in Pending Programming State",
93+
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
94+
},
95+
[]string{},
96+
)
97+
pendingReleaseIPCount = prometheus.NewGaugeVec(
98+
prometheus.GaugeOpts{
99+
Name: "cx_pending_release_ips_v2",
100+
Help: "Count of IPs in Pending Release State",
101+
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
102+
},
103+
[]string{},
104+
)
58105
)
59106

60107
func init() {
@@ -64,11 +111,14 @@ func init() {
64111
ipConfigStatusStateTransitionTime,
65112
syncHostNCVersionCount,
66113
syncHostNCVersionLatency,
114+
allocatedIPCount,
115+
assignedIPCount,
116+
availableIPCount,
117+
pendingProgrammingIPCount,
118+
pendingReleaseIPCount,
67119
)
68120
}
69121

70-
const cnsReturnCode = "Cns-Return-Code"
71-
72122
// Every http response is 200 so we really want cns response code.
73123
// Hard tto do with middleware unless we derserialize the responses but making it an explit header works around it.
74124
// if that doesn't work we could have a separate countervec just for response codes.
@@ -91,3 +141,12 @@ func stateTransitionMiddleware(i *cns.IPConfigurationStatus, s types.IPState) {
91141
}
92142
ipConfigStatusStateTransitionTime.WithLabelValues(string(i.GetState()), string(s)).Observe(time.Since(i.LastStateTransition).Seconds())
93143
}
144+
145+
func publishIPStateMetrics(state *ipState) {
146+
labels := []string{} // TODO. ragasthya Add dimensions to the IP Usage metrics.
147+
allocatedIPCount.WithLabelValues(labels...).Set(float64(state.allocatedIPs))
148+
assignedIPCount.WithLabelValues(labels...).Set(float64(state.assignedIPs))
149+
availableIPCount.WithLabelValues(labels...).Set(float64(state.availableIPs))
150+
pendingProgrammingIPCount.WithLabelValues(labels...).Set(float64(state.programmingIPs))
151+
pendingReleaseIPCount.WithLabelValues(labels...).Set(float64(state.releasingIPs))
152+
}

0 commit comments

Comments
 (0)