diff --git a/cns/restserver/internalapi.go b/cns/restserver/internalapi.go index a73d9dcb2a..fa41525030 100644 --- a/cns/restserver/internalapi.go +++ b/cns/restserver/internalapi.go @@ -615,7 +615,7 @@ func (service *HTTPRestService) CreateOrUpdateNetworkContainerInternal(req *cns. // If the NC was created successfully, log NC snapshot. if returnCode == 0 { logNCSnapshot(*req) - publishIPStateMetrics(service.buildIPState()) + service.publishIPStateMetrics() } else { logger.Errorf(returnMessage) } diff --git a/cns/restserver/ipam.go b/cns/restserver/ipam.go index 2802ed295a..db5784c5fa 100644 --- a/cns/restserver/ipam.go +++ b/cns/restserver/ipam.go @@ -197,6 +197,7 @@ func (service *HTTPRestService) updatePodInfoWithInterfaces(ctx context.Context, // RequestIPConfigHandler requests an IPConfig from the CNS state func (service *HTTPRestService) RequestIPConfigHandler(w http.ResponseWriter, r *http.Request) { opName := "requestIPConfigHandler" + defer service.publishIPStateMetrics() var ipconfigRequest cns.IPConfigRequest err := common.Decode(w, r, &ipconfigRequest) logger.Request(opName, ipconfigRequest, err) @@ -272,6 +273,7 @@ func (service *HTTPRestService) RequestIPConfigHandler(w http.ResponseWriter, r // RequestIPConfigsHandler requests multiple IPConfigs from the CNS state func (service *HTTPRestService) RequestIPConfigsHandler(w http.ResponseWriter, r *http.Request) { opName := "requestIPConfigsHandler" + defer service.publishIPStateMetrics() var ipconfigsRequest cns.IPConfigsRequest err := common.Decode(w, r, &ipconfigsRequest) logger.Request(opName, ipconfigsRequest, err) @@ -415,6 +417,7 @@ func (service *HTTPRestService) ReleaseIPConfigHandlerHelper(ctx context.Context // ReleaseIPConfigHandler frees the IP assigned to a pod from CNS func (service *HTTPRestService) ReleaseIPConfigHandler(w http.ResponseWriter, r *http.Request) { opName := "releaseIPConfigHandler" + defer service.publishIPStateMetrics() var ipconfigRequest cns.IPConfigRequest err := common.Decode(w, r, &ipconfigRequest) logger.Request(opName, ipconfigRequest, err) @@ -469,6 +472,7 @@ func (service *HTTPRestService) ReleaseIPConfigHandler(w http.ResponseWriter, r // ReleaseIPConfigsHandler frees multiple IPConfigs from the CNS state func (service *HTTPRestService) ReleaseIPConfigsHandler(w http.ResponseWriter, r *http.Request) { opName := "releaseIPConfigsHandler" + defer service.publishIPStateMetrics() var ipconfigsRequest cns.IPConfigsRequest err := common.Decode(w, r, &ipconfigsRequest) logger.Request("releaseIPConfigsHandler", ipconfigsRequest, err) @@ -518,6 +522,7 @@ func (service *HTTPRestService) removeEndpointState(podInfo cns.PodInfo) error { // MarkIPAsPendingRelease will set the IPs which are in PendingProgramming or Available to PendingRelease state // It will try to update [totalIpsToRelease] number of ips. func (service *HTTPRestService) MarkIPAsPendingRelease(totalIpsToRelease int) (map[string]cns.IPConfigurationStatus, error) { + defer service.publishIPStateMetrics() pendingReleasedIps := make(map[string]cns.IPConfigurationStatus) service.Lock() defer service.Unlock() @@ -563,6 +568,7 @@ func (service *HTTPRestService) MarkIPAsPendingRelease(totalIpsToRelease int) (m // and return an error. // MarkNIPsPendingRelease is no-op if [n] is not a positive integer. func (service *HTTPRestService) MarkNIPsPendingRelease(n int) (map[string]cns.IPConfigurationStatus, error) { + defer service.publishIPStateMetrics() service.Lock() defer service.Unlock() // try to release from PendingProgramming diff --git a/cns/restserver/ipusage.go b/cns/restserver/ipusage.go deleted file mode 100644 index e2ae30382b..0000000000 --- a/cns/restserver/ipusage.go +++ /dev/null @@ -1,58 +0,0 @@ -package restserver - -import ( - "github.com/Azure/azure-container-networking/cns/logger" - "github.com/Azure/azure-container-networking/cns/types" -) - -type ipState struct { - // allocatedIPs are all the IPs given to CNS by DNC. - allocatedIPs int64 - // assignedIPs are the IPs CNS gives to Pods. - assignedIPs int64 - // availableIPs are the IPs in state "Available". - availableIPs int64 - // programmingIPs are the IPs in state "PendingProgramming". - programmingIPs int64 - // releasingIPs are the IPs in state "PendingReleasr". - releasingIPs int64 -} - -func (service *HTTPRestService) buildIPState() *ipState { - service.Lock() - defer service.Unlock() - - state := ipState{ - allocatedIPs: 0, - assignedIPs: 0, - availableIPs: 0, - programmingIPs: 0, - releasingIPs: 0, - } - - //nolint:gocritic // This has to iterate over the IP Config state to get the counts. - for _, ipConfig := range service.PodIPConfigState { - state.allocatedIPs++ - if ipConfig.GetState() == types.Assigned { - state.assignedIPs++ - } - if ipConfig.GetState() == types.Available { - state.availableIPs++ - } - if ipConfig.GetState() == types.PendingProgramming { - state.programmingIPs++ - } - if ipConfig.GetState() == types.PendingRelease { - state.releasingIPs++ - } - } - - logger.Printf("[IP Usage] Allocated IPs: %d, Assigned IPs: %d, Available IPs: %d, PendingProgramming IPs: %d, PendingRelease IPs: %d", - state.allocatedIPs, - state.assignedIPs, - state.availableIPs, - state.programmingIPs, - state.releasingIPs, - ) - return &state -} diff --git a/cns/restserver/metrics.go b/cns/restserver/metrics.go index 15adb86d76..010779ef20 100644 --- a/cns/restserver/metrics.go +++ b/cns/restserver/metrics.go @@ -1,10 +1,13 @@ package restserver import ( + "maps" "net/http" + "sync" "time" "github.com/Azure/azure-container-networking/cns" + "github.com/Azure/azure-container-networking/cns/logger" "github.com/Azure/azure-container-networking/cns/types" "github.com/prometheus/client_golang/prometheus" "sigs.k8s.io/controller-runtime/pkg/metrics" @@ -122,7 +125,6 @@ func init() { // Every http response is 200 so we really want cns response code. // Hard tto do with middleware unless we derserialize the responses but making it an explit header works around it. // if that doesn't work we could have a separate countervec just for response codes. - func NewHandlerFuncWithHistogram(handler http.HandlerFunc, histogram *prometheus.HistogramVec) http.HandlerFunc { return func(w http.ResponseWriter, req *http.Request) { start := time.Now() @@ -142,7 +144,62 @@ func stateTransitionMiddleware(i *cns.IPConfigurationStatus, s types.IPState) { ipConfigStatusStateTransitionTime.WithLabelValues(string(i.GetState()), string(s)).Observe(time.Since(i.LastStateTransition).Seconds()) } -func publishIPStateMetrics(state *ipState) { +type ipState struct { + // allocatedIPs are all the IPs given to CNS by DNC. + allocatedIPs int64 + // assignedIPs are the IPs CNS gives to Pods. + assignedIPs int64 + // availableIPs are the IPs in state "Available". + availableIPs int64 + // programmingIPs are the IPs in state "PendingProgramming". + programmingIPs int64 + // releasingIPs are the IPs in state "PendingReleasr". + releasingIPs int64 +} + +type asyncMetricsRecorder struct { + podIPConfigSrc func() map[string]cns.IPConfigurationStatus + sig chan struct{} + once sync.Once +} + +// singleton recorder +var recorder asyncMetricsRecorder + +// run starts the asyncMetricsRecorder and listens for signals to record the metrics. +func (a *asyncMetricsRecorder) run() { + for range a.sig { + a.record() + } +} + +// record records the IP Config state metrics to Prometheus. +func (a *asyncMetricsRecorder) record() { + var state ipState + for ipConfig := range maps.Values(a.podIPConfigSrc()) { + state.allocatedIPs++ + if ipConfig.GetState() == types.Assigned { + state.assignedIPs++ + } + if ipConfig.GetState() == types.Available { + state.availableIPs++ + } + if ipConfig.GetState() == types.PendingProgramming { + state.programmingIPs++ + } + if ipConfig.GetState() == types.PendingRelease { + state.releasingIPs++ + } + } + + logger.Printf("Allocated IPs: %d, Assigned IPs: %d, Available IPs: %d, PendingProgramming IPs: %d, PendingRelease IPs: %d", + state.allocatedIPs, + state.assignedIPs, + state.availableIPs, + state.programmingIPs, + state.releasingIPs, + ) + labels := []string{} allocatedIPCount.WithLabelValues(labels...).Set(float64(state.allocatedIPs)) assignedIPCount.WithLabelValues(labels...).Set(float64(state.assignedIPs)) @@ -150,3 +207,24 @@ func publishIPStateMetrics(state *ipState) { pendingProgrammingIPCount.WithLabelValues(labels...).Set(float64(state.programmingIPs)) pendingReleaseIPCount.WithLabelValues(labels...).Set(float64(state.releasingIPs)) } + +// publishIPStateMetrics logs and publishes the IP Config state metrics to Prometheus. +func (service *HTTPRestService) publishIPStateMetrics() { + recorder.once.Do(func() { + recorder.podIPConfigSrc = service.PodIPConfigStates + recorder.sig = make(chan struct{}) + go recorder.run() + }) + select { + case recorder.sig <- struct{}{}: // signal the recorder to record the metrics + default: // drop the signal if the recorder already has an event queued + } +} + +// PodIPConfigStates returns a clone of the IP Config State map. +func (service *HTTPRestService) PodIPConfigStates() map[string]cns.IPConfigurationStatus { + // copy state + service.RLock() + defer service.RUnlock() + return maps.Clone(service.PodIPConfigState) +}