@@ -10,6 +10,7 @@ import (
1010 "github.com/Azure/azure-container-networking/crd/clustersubnetstate/api/v1alpha1"
1111 "github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha"
1212 "github.com/pkg/errors"
13+ "golang.org/x/sync/errgroup"
1314)
1415
1516// Subnet ARM ID /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/$(SUBNET)
@@ -46,14 +47,22 @@ type metaState struct {
4647 subnetCIDR string
4748}
4849
50+ type observer struct {
51+ ipSrc func () map [string ]cns.IPConfigurationStatus
52+ nncSrc func (context.Context ) (* v1alpha.NodeNetworkConfig , error )
53+ cssSrc func (context.Context ) ([]v1alpha1.ClusterSubnetState , error )
54+ }
55+
4956// NewLegacyMetricsObserver creates a closed functional scope which can be invoked to
5057// observe the legacy IPAM pool metrics.
5158//
5259//nolint:lll // ignore line length
53- func NewLegacyMetricsObserver (ctx context.Context , ipcli func () map [string ]cns.IPConfigurationStatus , nnccli func (context.Context ) (* v1alpha.NodeNetworkConfig , error ), csscli func (context.Context ) ([]v1alpha1.ClusterSubnetState , error )) func () error {
54- return func () error {
55- return observeMetrics (ctx , ipcli , nnccli , csscli )
56- }
60+ func NewLegacyMetricsObserver (ipSrc func () map [string ]cns.IPConfigurationStatus , nncSrc func (context.Context ) (* v1alpha.NodeNetworkConfig , error ), cssSrc func (context.Context ) ([]v1alpha1.ClusterSubnetState , error )) func (context.Context ) error {
61+ return (& observer {
62+ ipSrc : ipSrc ,
63+ nncSrc : nncSrc ,
64+ cssSrc : cssSrc ,
65+ }).observeMetrics
5766}
5867
5968// generateARMID uses the Subnet ARM ID format to populate the ARM ID with the metadata.
@@ -73,68 +82,98 @@ func generateARMID(nc *v1alpha.NetworkContainer) string {
7382// observeMetrics observes the IP pool and updates the metrics. Blocking.
7483//
7584//nolint:lll // ignore line length
76- func observeMetrics (ctx context.Context , ipcli func () map [string ]cns.IPConfigurationStatus , nnccli func (context.Context ) (* v1alpha.NodeNetworkConfig , error ), csscli func (context.Context ) ([]v1alpha1.ClusterSubnetState , error )) error {
77- csslist , err := csscli (ctx )
78- if err != nil {
79- return err
80- }
81- nnc , err := nnccli (ctx )
82- if err != nil {
83- return err
84- }
85- ips := ipcli ()
85+ func (o * observer ) observeMetrics (ctx context.Context ) error {
86+ // The error group is used to allow individual metrics sources to fail without
87+ // failing out the entire attempt to observe the Pool. This may happen if there is a
88+ // transient issue with the source of the data, or if the source is not available
89+ // (like if the CRD is not installed).
90+ var g errgroup.Group
8691
92+ // Get the current state of world.
8793 var meta metaState
88- for i := range csslist {
89- if csslist [i ].Status .Exhausted {
90- meta .exhausted = true
91- break
92- }
93- }
94- if len (nnc .Status .NetworkContainers ) > 0 {
95- // Set SubnetName, SubnetAddressSpace and Pod Network ARM ID values to the global subnet, subnetCIDR and subnetARM variables.
96- meta .subnet = nnc .Status .NetworkContainers [0 ].SubnetName
97- meta .subnetCIDR = nnc .Status .NetworkContainers [0 ].SubnetAddressSpace
98- meta .subnetARMID = generateARMID (& nnc .Status .NetworkContainers [0 ])
99- }
100- meta .primaryIPAddresses = make (map [string ]struct {})
101- // Add Primary IP to Map, if not present.
102- // This is only for Swift i.e. if NC Type is vnet.
103- for i := 0 ; i < len (nnc .Status .NetworkContainers ); i ++ {
104- nc := nnc .Status .NetworkContainers [i ]
105- if nc .Type == "" || nc .Type == v1alpha .VNET {
106- meta .primaryIPAddresses [nc .PrimaryIP ] = struct {}{}
94+ g .Go (func () error {
95+ // Try to fetch the ClusterSubnetState, if available.
96+ if o .cssSrc != nil {
97+ csslist , err := o .cssSrc (ctx )
98+ if err != nil {
99+ return err
100+ }
101+ for i := range csslist {
102+ if csslist [i ].Status .Exhausted {
103+ meta .exhausted = true
104+ break
105+ }
106+ }
107107 }
108+ return nil
109+ })
108110
109- if nc .Type == v1alpha .VNETBlock {
110- primaryPrefix , err := netip .ParsePrefix (nc .PrimaryIP )
111+ var state ipPoolState
112+ g .Go (func () error {
113+ // Try to fetch the NodeNetworkConfig, if available.
114+ if o .nncSrc != nil {
115+ nnc , err := o .nncSrc (ctx )
111116 if err != nil {
112- return errors .Wrapf (err , "unable to parse ip prefix: %s" , nc .PrimaryIP )
117+ return err
118+ }
119+ if len (nnc .Status .NetworkContainers ) > 0 {
120+ // Set SubnetName, SubnetAddressSpace and Pod Network ARM ID values to the global subnet, subnetCIDR and subnetARM variables.
121+ meta .subnet = nnc .Status .NetworkContainers [0 ].SubnetName
122+ meta .subnetCIDR = nnc .Status .NetworkContainers [0 ].SubnetAddressSpace
123+ meta .subnetARMID = generateARMID (& nnc .Status .NetworkContainers [0 ])
124+ }
125+ meta .primaryIPAddresses = make (map [string ]struct {})
126+ // Add Primary IP to Map, if not present.
127+ // This is only for Swift i.e. if NC Type is vnet.
128+ for i := 0 ; i < len (nnc .Status .NetworkContainers ); i ++ {
129+ nc := nnc .Status .NetworkContainers [i ]
130+ if nc .Type == "" || nc .Type == v1alpha .VNET {
131+ meta .primaryIPAddresses [nc .PrimaryIP ] = struct {}{}
132+ }
133+
134+ if nc .Type == v1alpha .VNETBlock {
135+ primaryPrefix , err := netip .ParsePrefix (nc .PrimaryIP )
136+ if err != nil {
137+ return errors .Wrapf (err , "unable to parse ip prefix: %s" , nc .PrimaryIP )
138+ }
139+ meta .primaryIPAddresses [primaryPrefix .Addr ().String ()] = struct {}{}
140+ }
113141 }
114- meta .primaryIPAddresses [primaryPrefix .Addr ().String ()] = struct {}{}
142+ state .requestedIPs = nnc .Spec .RequestedIPCount
143+ meta .batch = nnc .Status .Scaler .BatchSize
144+ meta .max = nnc .Status .Scaler .MaxIPCount
115145 }
116- }
146+ return nil
147+ })
117148
118- state := ipPoolState {
119- secondaryIPs : int64 (len (ips )),
120- requestedIPs : nnc .Spec .RequestedIPCount ,
121- }
122- for i := range ips {
123- ip := ips [i ]
124- switch ip .GetState () {
125- case types .Assigned :
126- state .allocatedToPods ++
127- case types .Available :
128- state .available ++
129- case types .PendingProgramming :
130- state .pendingProgramming ++
131- case types .PendingRelease :
132- state .pendingRelease ++
149+ g .Go (func () error {
150+ // Try to fetch the IPConfigurations, if available.
151+ if o .ipSrc != nil {
152+ ips := o .ipSrc ()
153+ state .secondaryIPs = int64 (len (ips ))
154+ for i := range ips {
155+ ip := ips [i ]
156+ switch ip .GetState () {
157+ case types .Assigned :
158+ state .allocatedToPods ++
159+ case types .Available :
160+ state .available ++
161+ case types .PendingProgramming :
162+ state .pendingProgramming ++
163+ case types .PendingRelease :
164+ state .pendingRelease ++
165+ }
166+ }
133167 }
134- }
168+ return nil
169+ })
170+
171+ err := g .Wait ()
172+
135173 state .currentAvailableIPs = state .secondaryIPs - state .allocatedToPods - state .pendingRelease
136174 state .expectedAvailableIPs = state .requestedIPs - state .allocatedToPods
137175
176+ // Update the metrics.
138177 labels := []string {meta .subnet , meta .subnetCIDR , meta .subnetARMID }
139178 IpamAllocatedIPCount .WithLabelValues (labels ... ).Set (float64 (state .allocatedToPods ))
140179 IpamAvailableIPCount .WithLabelValues (labels ... ).Set (float64 (state .available ))
@@ -153,5 +192,8 @@ func observeMetrics(ctx context.Context, ipcli func() map[string]cns.IPConfigura
153192 } else {
154193 IpamSubnetExhaustionState .WithLabelValues (labels ... ).Set (float64 (SubnetIPNotExhausted ))
155194 }
195+ if err != nil {
196+ return errors .Wrap (err , "failed to collect all metrics" )
197+ }
156198 return nil
157199}
0 commit comments