Skip to content

Commit 0228a2b

Browse files
rbtrsivakami
authored andcommitted
feat: add IPConfig state logs/metrics to Request/Release handlers (#3050)
* feat: add IPConfig state logs/metrics to Request/Release handlers Signed-off-by: Evan BaKer <[email protected]> * make metrics recording async so that it will not block ip requests Signed-off-by: Evan Baker <[email protected]> --------- Signed-off-by: Evan BaKer <[email protected]> Signed-off-by: Evan Baker <[email protected]>
1 parent 7b67717 commit 0228a2b

File tree

4 files changed

+87
-61
lines changed

4 files changed

+87
-61
lines changed

cns/restserver/internalapi.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -615,7 +615,7 @@ func (service *HTTPRestService) CreateOrUpdateNetworkContainerInternal(req *cns.
615615
// If the NC was created successfully, log NC snapshot.
616616
if returnCode == 0 {
617617
logNCSnapshot(*req)
618-
publishIPStateMetrics(service.buildIPState())
618+
service.publishIPStateMetrics()
619619
} else {
620620
logger.Errorf(returnMessage)
621621
}

cns/restserver/ipam.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ func (service *HTTPRestService) updatePodInfoWithInterfaces(ctx context.Context,
197197
// RequestIPConfigHandler requests an IPConfig from the CNS state
198198
func (service *HTTPRestService) RequestIPConfigHandler(w http.ResponseWriter, r *http.Request) {
199199
opName := "requestIPConfigHandler"
200+
defer service.publishIPStateMetrics()
200201
var ipconfigRequest cns.IPConfigRequest
201202
err := common.Decode(w, r, &ipconfigRequest)
202203
logger.Request(opName, ipconfigRequest, err)
@@ -272,6 +273,7 @@ func (service *HTTPRestService) RequestIPConfigHandler(w http.ResponseWriter, r
272273
// RequestIPConfigsHandler requests multiple IPConfigs from the CNS state
273274
func (service *HTTPRestService) RequestIPConfigsHandler(w http.ResponseWriter, r *http.Request) {
274275
opName := "requestIPConfigsHandler"
276+
defer service.publishIPStateMetrics()
275277
var ipconfigsRequest cns.IPConfigsRequest
276278
err := common.Decode(w, r, &ipconfigsRequest)
277279
logger.Request(opName, ipconfigsRequest, err)
@@ -415,6 +417,7 @@ func (service *HTTPRestService) ReleaseIPConfigHandlerHelper(ctx context.Context
415417
// ReleaseIPConfigHandler frees the IP assigned to a pod from CNS
416418
func (service *HTTPRestService) ReleaseIPConfigHandler(w http.ResponseWriter, r *http.Request) {
417419
opName := "releaseIPConfigHandler"
420+
defer service.publishIPStateMetrics()
418421
var ipconfigRequest cns.IPConfigRequest
419422
err := common.Decode(w, r, &ipconfigRequest)
420423
logger.Request(opName, ipconfigRequest, err)
@@ -469,6 +472,7 @@ func (service *HTTPRestService) ReleaseIPConfigHandler(w http.ResponseWriter, r
469472
// ReleaseIPConfigsHandler frees multiple IPConfigs from the CNS state
470473
func (service *HTTPRestService) ReleaseIPConfigsHandler(w http.ResponseWriter, r *http.Request) {
471474
opName := "releaseIPConfigsHandler"
475+
defer service.publishIPStateMetrics()
472476
var ipconfigsRequest cns.IPConfigsRequest
473477
err := common.Decode(w, r, &ipconfigsRequest)
474478
logger.Request("releaseIPConfigsHandler", ipconfigsRequest, err)
@@ -518,6 +522,7 @@ func (service *HTTPRestService) removeEndpointState(podInfo cns.PodInfo) error {
518522
// MarkIPAsPendingRelease will set the IPs which are in PendingProgramming or Available to PendingRelease state
519523
// It will try to update [totalIpsToRelease] number of ips.
520524
func (service *HTTPRestService) MarkIPAsPendingRelease(totalIpsToRelease int) (map[string]cns.IPConfigurationStatus, error) {
525+
defer service.publishIPStateMetrics()
521526
pendingReleasedIps := make(map[string]cns.IPConfigurationStatus)
522527
service.Lock()
523528
defer service.Unlock()
@@ -563,6 +568,7 @@ func (service *HTTPRestService) MarkIPAsPendingRelease(totalIpsToRelease int) (m
563568
// and return an error.
564569
// MarkNIPsPendingRelease is no-op if [n] is not a positive integer.
565570
func (service *HTTPRestService) MarkNIPsPendingRelease(n int) (map[string]cns.IPConfigurationStatus, error) {
571+
defer service.publishIPStateMetrics()
566572
service.Lock()
567573
defer service.Unlock()
568574
// try to release from PendingProgramming

cns/restserver/ipusage.go

Lines changed: 0 additions & 58 deletions
This file was deleted.

cns/restserver/metrics.go

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
package restserver
22

33
import (
4+
"maps"
45
"net/http"
6+
"sync"
57
"time"
68

79
"github.com/Azure/azure-container-networking/cns"
10+
"github.com/Azure/azure-container-networking/cns/logger"
811
"github.com/Azure/azure-container-networking/cns/types"
912
"github.com/prometheus/client_golang/prometheus"
1013
"sigs.k8s.io/controller-runtime/pkg/metrics"
@@ -122,7 +125,6 @@ func init() {
122125
// Every http response is 200 so we really want cns response code.
123126
// Hard tto do with middleware unless we derserialize the responses but making it an explit header works around it.
124127
// if that doesn't work we could have a separate countervec just for response codes.
125-
126128
func NewHandlerFuncWithHistogram(handler http.HandlerFunc, histogram *prometheus.HistogramVec) http.HandlerFunc {
127129
return func(w http.ResponseWriter, req *http.Request) {
128130
start := time.Now()
@@ -142,11 +144,87 @@ func stateTransitionMiddleware(i *cns.IPConfigurationStatus, s types.IPState) {
142144
ipConfigStatusStateTransitionTime.WithLabelValues(string(i.GetState()), string(s)).Observe(time.Since(i.LastStateTransition).Seconds())
143145
}
144146

145-
func publishIPStateMetrics(state *ipState) {
147+
type ipState struct {
148+
// allocatedIPs are all the IPs given to CNS by DNC.
149+
allocatedIPs int64
150+
// assignedIPs are the IPs CNS gives to Pods.
151+
assignedIPs int64
152+
// availableIPs are the IPs in state "Available".
153+
availableIPs int64
154+
// programmingIPs are the IPs in state "PendingProgramming".
155+
programmingIPs int64
156+
// releasingIPs are the IPs in state "PendingReleasr".
157+
releasingIPs int64
158+
}
159+
160+
type asyncMetricsRecorder struct {
161+
podIPConfigSrc func() map[string]cns.IPConfigurationStatus
162+
sig chan struct{}
163+
once sync.Once
164+
}
165+
166+
// singleton recorder
167+
var recorder asyncMetricsRecorder
168+
169+
// run starts the asyncMetricsRecorder and listens for signals to record the metrics.
170+
func (a *asyncMetricsRecorder) run() {
171+
for range a.sig {
172+
a.record()
173+
}
174+
}
175+
176+
// record records the IP Config state metrics to Prometheus.
177+
func (a *asyncMetricsRecorder) record() {
178+
var state ipState
179+
for ipConfig := range maps.Values(a.podIPConfigSrc()) {
180+
state.allocatedIPs++
181+
if ipConfig.GetState() == types.Assigned {
182+
state.assignedIPs++
183+
}
184+
if ipConfig.GetState() == types.Available {
185+
state.availableIPs++
186+
}
187+
if ipConfig.GetState() == types.PendingProgramming {
188+
state.programmingIPs++
189+
}
190+
if ipConfig.GetState() == types.PendingRelease {
191+
state.releasingIPs++
192+
}
193+
}
194+
195+
logger.Printf("Allocated IPs: %d, Assigned IPs: %d, Available IPs: %d, PendingProgramming IPs: %d, PendingRelease IPs: %d",
196+
state.allocatedIPs,
197+
state.assignedIPs,
198+
state.availableIPs,
199+
state.programmingIPs,
200+
state.releasingIPs,
201+
)
202+
146203
labels := []string{}
147204
allocatedIPCount.WithLabelValues(labels...).Set(float64(state.allocatedIPs))
148205
assignedIPCount.WithLabelValues(labels...).Set(float64(state.assignedIPs))
149206
availableIPCount.WithLabelValues(labels...).Set(float64(state.availableIPs))
150207
pendingProgrammingIPCount.WithLabelValues(labels...).Set(float64(state.programmingIPs))
151208
pendingReleaseIPCount.WithLabelValues(labels...).Set(float64(state.releasingIPs))
152209
}
210+
211+
// publishIPStateMetrics logs and publishes the IP Config state metrics to Prometheus.
212+
func (service *HTTPRestService) publishIPStateMetrics() {
213+
recorder.once.Do(func() {
214+
recorder.podIPConfigSrc = service.PodIPConfigStates
215+
recorder.sig = make(chan struct{})
216+
go recorder.run()
217+
})
218+
select {
219+
case recorder.sig <- struct{}{}: // signal the recorder to record the metrics
220+
default: // drop the signal if the recorder already has an event queued
221+
}
222+
}
223+
224+
// PodIPConfigStates returns a clone of the IP Config State map.
225+
func (service *HTTPRestService) PodIPConfigStates() map[string]cns.IPConfigurationStatus {
226+
// copy state
227+
service.RLock()
228+
defer service.RUnlock()
229+
return maps.Clone(service.PodIPConfigState)
230+
}

0 commit comments

Comments
 (0)