Skip to content

Commit ff46b57

Browse files
authored
fix: add legacy IPAM metrics back to IPAMv2 (#2970)
Signed-off-by: Evan Baker <[email protected]>
1 parent b7ce09f commit ff46b57

File tree

6 files changed

+240
-62
lines changed

6 files changed

+240
-62
lines changed
Lines changed: 19 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
1-
package ipampool
1+
package metrics
22

33
import (
44
"github.com/prometheus/client_golang/prometheus"
55
"sigs.k8s.io/controller-runtime/pkg/metrics"
66
)
77

88
const (
9-
subnetLabel = "subnet"
10-
subnetCIDRLabel = "subnet_cidr"
11-
podnetARMIDLabel = "podnet_arm_id"
9+
SubnetLabel = "subnet"
10+
SubnetCIDRLabel = "subnet_cidr"
11+
PodnetARMIDLabel = "podnet_arm_id"
1212
customerMetricLabel = "customer_metric"
1313
customerMetricLabelValue = "customer metric"
14-
subnetExhaustionStateLabel = "subnet_exhaustion_state"
14+
SubnetExhaustionStateLabel = "subnet_exhaustion_state"
1515
SubnetIPExhausted = 1
1616
SubnetIPNotExhausted = 0
1717
)
@@ -23,110 +23,110 @@ var (
2323
Help: "IPs currently in use by Pods on this CNS Node.",
2424
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
2525
},
26-
[]string{subnetLabel, subnetCIDRLabel, podnetARMIDLabel},
26+
[]string{SubnetLabel, SubnetCIDRLabel, PodnetARMIDLabel},
2727
)
2828
IpamAvailableIPCount = prometheus.NewGaugeVec(
2929
prometheus.GaugeOpts{
3030
Name: "cx_ipam_available_ips",
3131
Help: "IPs available on this CNS Node for use by a Pod.",
3232
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
3333
},
34-
[]string{subnetLabel, subnetCIDRLabel, podnetARMIDLabel},
34+
[]string{SubnetLabel, SubnetCIDRLabel, PodnetARMIDLabel},
3535
)
3636
IpamBatchSize = prometheus.NewGaugeVec(
3737
prometheus.GaugeOpts{
3838
Name: "cx_ipam_batch_size",
3939
Help: "IPAM IP pool scaling batch size.",
4040
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
4141
},
42-
[]string{subnetLabel, subnetCIDRLabel, podnetARMIDLabel},
42+
[]string{SubnetLabel, SubnetCIDRLabel, PodnetARMIDLabel},
4343
)
4444
IpamCurrentAvailableIPcount = prometheus.NewGaugeVec(
4545
prometheus.GaugeOpts{
4646
Name: "cx_ipam_current_available_ips",
4747
Help: "Current available IP count.",
4848
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
4949
},
50-
[]string{subnetLabel, subnetCIDRLabel, podnetARMIDLabel},
50+
[]string{SubnetLabel, SubnetCIDRLabel, PodnetARMIDLabel},
5151
)
5252
IpamExpectedAvailableIPCount = prometheus.NewGaugeVec(
5353
prometheus.GaugeOpts{
5454
Name: "cx_ipam_expect_available_ips",
5555
Help: "Expected future available IP count assuming the Requested IP count is honored.",
5656
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
5757
},
58-
[]string{subnetLabel, subnetCIDRLabel, podnetARMIDLabel},
58+
[]string{SubnetLabel, SubnetCIDRLabel, PodnetARMIDLabel},
5959
)
6060
IpamMaxIPCount = prometheus.NewGaugeVec(
6161
prometheus.GaugeOpts{
6262
Name: "cx_ipam_max_ips",
6363
Help: "Maximum Secondary IPs allowed on this Node.",
6464
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
6565
},
66-
[]string{subnetLabel, subnetCIDRLabel, podnetARMIDLabel},
66+
[]string{SubnetLabel, SubnetCIDRLabel, PodnetARMIDLabel},
6767
)
6868
IpamPendingProgramIPCount = prometheus.NewGaugeVec(
6969
prometheus.GaugeOpts{
7070
Name: "cx_ipam_pending_programming_ips",
7171
Help: "IPs reserved but not yet available (Pending Programming).",
7272
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
7373
},
74-
[]string{subnetLabel, subnetCIDRLabel, podnetARMIDLabel},
74+
[]string{SubnetLabel, SubnetCIDRLabel, PodnetARMIDLabel},
7575
)
7676
IpamPendingReleaseIPCount = prometheus.NewGaugeVec(
7777
prometheus.GaugeOpts{
7878
Name: "cx_ipam_pending_release_ips",
7979
Help: "IPs reserved but not available anymore (Pending Release).",
8080
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
8181
},
82-
[]string{subnetLabel, subnetCIDRLabel, podnetARMIDLabel},
82+
[]string{SubnetLabel, SubnetCIDRLabel, PodnetARMIDLabel},
8383
)
8484
IpamPrimaryIPCount = prometheus.NewGaugeVec(
8585
prometheus.GaugeOpts{
8686
Name: "cx_ipam_primary_ips",
8787
Help: "NC Primary IP count (reserved from Pod Subnet for DNS and IMDS SNAT).",
8888
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
8989
},
90-
[]string{subnetLabel, subnetCIDRLabel, podnetARMIDLabel},
90+
[]string{SubnetLabel, SubnetCIDRLabel, PodnetARMIDLabel},
9191
)
9292
IpamRequestedIPConfigCount = prometheus.NewGaugeVec(
9393
prometheus.GaugeOpts{
9494
Name: "cx_ipam_requested_ips",
9595
Help: "Secondary Pod Subnet IPs requested by this CNS Node (for Pods).",
9696
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
9797
},
98-
[]string{subnetLabel, subnetCIDRLabel, podnetARMIDLabel},
98+
[]string{SubnetLabel, SubnetCIDRLabel, PodnetARMIDLabel},
9999
)
100100
IpamSecondaryIPCount = prometheus.NewGaugeVec(
101101
prometheus.GaugeOpts{
102102
Name: "cx_ipam_secondary_ips",
103103
Help: "Node NC Secondary IP count (reserved usable by Pods).",
104104
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
105105
},
106-
[]string{subnetLabel, subnetCIDRLabel, podnetARMIDLabel},
106+
[]string{SubnetLabel, SubnetCIDRLabel, PodnetARMIDLabel},
107107
)
108108
IpamTotalIPCount = prometheus.NewGaugeVec(
109109
prometheus.GaugeOpts{
110110
Name: "cx_ipam_total_ips",
111111
Help: "Count of total IP pool size allocated to CNS by DNC.",
112112
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
113113
},
114-
[]string{subnetLabel, subnetCIDRLabel, podnetARMIDLabel},
114+
[]string{SubnetLabel, SubnetCIDRLabel, PodnetARMIDLabel},
115115
)
116116
IpamSubnetExhaustionState = prometheus.NewGaugeVec(
117117
prometheus.GaugeOpts{
118118
Name: "cx_ipam_subnet_exhaustion_state",
119119
Help: "IPAM view of subnet exhaustion state",
120120
ConstLabels: prometheus.Labels{customerMetricLabel: customerMetricLabelValue},
121121
},
122-
[]string{subnetLabel, subnetCIDRLabel, podnetARMIDLabel},
122+
[]string{SubnetLabel, SubnetCIDRLabel, PodnetARMIDLabel},
123123
)
124124
IpamSubnetExhaustionCount = prometheus.NewCounterVec(
125125
prometheus.CounterOpts{
126126
Name: "cx_ipam_subnet_exhaustion_state_count_total",
127127
Help: "Count of the number of times the ipam pool monitor sees subnet exhaustion",
128128
},
129-
[]string{subnetLabel, subnetCIDRLabel, podnetARMIDLabel, subnetExhaustionStateLabel},
129+
[]string{SubnetLabel, SubnetCIDRLabel, PodnetARMIDLabel, SubnetExhaustionStateLabel},
130130
)
131131
)
132132

@@ -148,24 +148,3 @@ func init() {
148148
IpamSubnetExhaustionCount,
149149
)
150150
}
151-
152-
func observeIPPoolState(state ipPoolState, meta metaState) {
153-
labels := []string{meta.subnet, meta.subnetCIDR, meta.subnetARMID}
154-
IpamAllocatedIPCount.WithLabelValues(labels...).Set(float64(state.allocatedToPods))
155-
IpamAvailableIPCount.WithLabelValues(labels...).Set(float64(state.available))
156-
IpamBatchSize.WithLabelValues(labels...).Set(float64(meta.batch))
157-
IpamCurrentAvailableIPcount.WithLabelValues(labels...).Set(float64(state.currentAvailableIPs))
158-
IpamExpectedAvailableIPCount.WithLabelValues(labels...).Set(float64(state.expectedAvailableIPs))
159-
IpamMaxIPCount.WithLabelValues(labels...).Set(float64(meta.max))
160-
IpamPendingProgramIPCount.WithLabelValues(labels...).Set(float64(state.pendingProgramming))
161-
IpamPendingReleaseIPCount.WithLabelValues(labels...).Set(float64(state.pendingRelease))
162-
IpamPrimaryIPCount.WithLabelValues(labels...).Set(float64(len(meta.primaryIPAddresses)))
163-
IpamRequestedIPConfigCount.WithLabelValues(labels...).Set(float64(state.requestedIPs))
164-
IpamSecondaryIPCount.WithLabelValues(labels...).Set(float64(state.secondaryIPs))
165-
IpamTotalIPCount.WithLabelValues(labels...).Set(float64(state.secondaryIPs + int64(len(meta.primaryIPAddresses))))
166-
if meta.exhausted {
167-
IpamSubnetExhaustionState.WithLabelValues(labels...).Set(float64(SubnetIPExhausted))
168-
} else {
169-
IpamSubnetExhaustionState.WithLabelValues(labels...).Set(float64(SubnetIPNotExhausted))
170-
}
171-
}

cns/ipampool/metrics/observer.go

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
package metrics
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"net/netip"
7+
8+
"github.com/Azure/azure-container-networking/cns"
9+
"github.com/Azure/azure-container-networking/cns/types"
10+
"github.com/Azure/azure-container-networking/crd/clustersubnetstate/api/v1alpha1"
11+
"github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha"
12+
"github.com/pkg/errors"
13+
)
14+
15+
// Subnet ARM ID /subscriptions/$(SUB)/resourceGroups/$(GROUP)/providers/Microsoft.Network/virtualNetworks/$(VNET)/subnets/$(SUBNET)
16+
const subnetARMIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Network/virtualNetworks/%s/subnets/%s"
17+
18+
// ipPoolState is the current actual state of the CNS IP pool.
19+
type ipPoolState struct {
20+
// allocatedToPods are the IPs CNS gives to Pods.
21+
allocatedToPods int64
22+
// available are the IPs in state "Available".
23+
available int64
24+
// currentAvailableIPs are the current available IPs: allocated - assigned - pendingRelease.
25+
currentAvailableIPs int64
26+
// expectedAvailableIPs are the "future" available IPs, if the requested IP count is honored: requested - assigned.
27+
expectedAvailableIPs int64
28+
// pendingProgramming are the IPs in state "PendingProgramming".
29+
pendingProgramming int64
30+
// pendingRelease are the IPs in state "PendingRelease".
31+
pendingRelease int64
32+
// requestedIPs are the IPs CNS has requested that it be allocated by DNC.
33+
requestedIPs int64
34+
// secondaryIPs are all the IPs given to CNS by DNC, not including the primary IP of the NC.
35+
secondaryIPs int64
36+
}
37+
38+
// metaState is the Monitor's configuration state for the IP pool.
39+
type metaState struct {
40+
batch int64
41+
exhausted bool
42+
max int64
43+
primaryIPAddresses map[string]struct{}
44+
subnet string
45+
subnetARMID string
46+
subnetCIDR string
47+
}
48+
49+
// NewLegacyMetricsObserver creates a closed functional scope which can be invoked to
50+
// observe the legacy IPAM pool metrics.
51+
//
52+
//nolint:lll // ignore line length
53+
func NewLegacyMetricsObserver(ctx context.Context, ipcli func() map[string]cns.IPConfigurationStatus, nnccli func(context.Context) (*v1alpha.NodeNetworkConfig, error), csscli func(context.Context) ([]v1alpha1.ClusterSubnetState, error)) func() error {
54+
return func() error {
55+
return observeMetrics(ctx, ipcli, nnccli, csscli)
56+
}
57+
}
58+
59+
// generateARMID uses the Subnet ARM ID format to populate the ARM ID with the metadata.
60+
// If either of the metadata attributes are empty, then the ARM ID will be an empty string.
61+
func generateARMID(nc *v1alpha.NetworkContainer) string {
62+
subscription := nc.SubscriptionID
63+
resourceGroup := nc.ResourceGroupID
64+
vnetID := nc.VNETID
65+
subnetID := nc.SubnetID
66+
67+
if subscription == "" || resourceGroup == "" || vnetID == "" || subnetID == "" {
68+
return ""
69+
}
70+
return fmt.Sprintf(subnetARMIDTemplate, subscription, resourceGroup, vnetID, subnetID)
71+
}
72+
73+
// observeMetrics observes the IP pool and updates the metrics. Blocking.
74+
//
75+
//nolint:lll // ignore line length
76+
func observeMetrics(ctx context.Context, ipcli func() map[string]cns.IPConfigurationStatus, nnccli func(context.Context) (*v1alpha.NodeNetworkConfig, error), csscli func(context.Context) ([]v1alpha1.ClusterSubnetState, error)) error {
77+
csslist, err := csscli(ctx)
78+
if err != nil {
79+
return err
80+
}
81+
nnc, err := nnccli(ctx)
82+
if err != nil {
83+
return err
84+
}
85+
ips := ipcli()
86+
87+
var meta metaState
88+
for i := range csslist {
89+
if csslist[i].Status.Exhausted {
90+
meta.exhausted = true
91+
break
92+
}
93+
}
94+
if len(nnc.Status.NetworkContainers) > 0 {
95+
// Set SubnetName, SubnetAddressSpace and Pod Network ARM ID values to the global subnet, subnetCIDR and subnetARM variables.
96+
meta.subnet = nnc.Status.NetworkContainers[0].SubnetName
97+
meta.subnetCIDR = nnc.Status.NetworkContainers[0].SubnetAddressSpace
98+
meta.subnetARMID = generateARMID(&nnc.Status.NetworkContainers[0])
99+
}
100+
meta.primaryIPAddresses = make(map[string]struct{})
101+
// Add Primary IP to Map, if not present.
102+
// This is only for Swift i.e. if NC Type is vnet.
103+
for i := 0; i < len(nnc.Status.NetworkContainers); i++ {
104+
nc := nnc.Status.NetworkContainers[i]
105+
if nc.Type == "" || nc.Type == v1alpha.VNET {
106+
meta.primaryIPAddresses[nc.PrimaryIP] = struct{}{}
107+
}
108+
109+
if nc.Type == v1alpha.VNETBlock {
110+
primaryPrefix, err := netip.ParsePrefix(nc.PrimaryIP)
111+
if err != nil {
112+
return errors.Wrapf(err, "unable to parse ip prefix: %s", nc.PrimaryIP)
113+
}
114+
meta.primaryIPAddresses[primaryPrefix.Addr().String()] = struct{}{}
115+
}
116+
}
117+
118+
state := ipPoolState{
119+
secondaryIPs: int64(len(ips)),
120+
requestedIPs: nnc.Spec.RequestedIPCount,
121+
}
122+
for i := range ips {
123+
ip := ips[i]
124+
switch ip.GetState() {
125+
case types.Assigned:
126+
state.allocatedToPods++
127+
case types.Available:
128+
state.available++
129+
case types.PendingProgramming:
130+
state.pendingProgramming++
131+
case types.PendingRelease:
132+
state.pendingRelease++
133+
}
134+
}
135+
state.currentAvailableIPs = state.secondaryIPs - state.allocatedToPods - state.pendingRelease
136+
state.expectedAvailableIPs = state.requestedIPs - state.allocatedToPods
137+
138+
labels := []string{meta.subnet, meta.subnetCIDR, meta.subnetARMID}
139+
IpamAllocatedIPCount.WithLabelValues(labels...).Set(float64(state.allocatedToPods))
140+
IpamAvailableIPCount.WithLabelValues(labels...).Set(float64(state.available))
141+
IpamBatchSize.WithLabelValues(labels...).Set(float64(meta.batch))
142+
IpamCurrentAvailableIPcount.WithLabelValues(labels...).Set(float64(state.currentAvailableIPs))
143+
IpamExpectedAvailableIPCount.WithLabelValues(labels...).Set(float64(state.expectedAvailableIPs))
144+
IpamMaxIPCount.WithLabelValues(labels...).Set(float64(meta.max))
145+
IpamPendingProgramIPCount.WithLabelValues(labels...).Set(float64(state.pendingProgramming))
146+
IpamPendingReleaseIPCount.WithLabelValues(labels...).Set(float64(state.pendingRelease))
147+
IpamPrimaryIPCount.WithLabelValues(labels...).Set(float64(len(meta.primaryIPAddresses)))
148+
IpamRequestedIPConfigCount.WithLabelValues(labels...).Set(float64(state.requestedIPs))
149+
IpamSecondaryIPCount.WithLabelValues(labels...).Set(float64(state.secondaryIPs))
150+
IpamTotalIPCount.WithLabelValues(labels...).Set(float64(state.secondaryIPs + int64(len(meta.primaryIPAddresses))))
151+
if meta.exhausted {
152+
IpamSubnetExhaustionState.WithLabelValues(labels...).Set(float64(SubnetIPExhausted))
153+
} else {
154+
IpamSubnetExhaustionState.WithLabelValues(labels...).Set(float64(SubnetIPNotExhausted))
155+
}
156+
return nil
157+
}

cns/ipampool/monitor.go

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"time"
1010

1111
"github.com/Azure/azure-container-networking/cns"
12+
"github.com/Azure/azure-container-networking/cns/ipampool/metrics"
1213
"github.com/Azure/azure-container-networking/cns/logger"
1314
"github.com/Azure/azure-container-networking/cns/metric"
1415
"github.com/Azure/azure-container-networking/cns/types"
@@ -105,9 +106,9 @@ func (pm *Monitor) Start(ctx context.Context) error {
105106
case css := <-pm.cssSource: // received an updated ClusterSubnetState
106107
pm.metastate.exhausted = css.Status.Exhausted
107108
logger.Printf("subnet exhausted status = %t", pm.metastate.exhausted)
108-
IpamSubnetExhaustionCount.With(prometheus.Labels{
109-
subnetLabel: pm.metastate.subnet, subnetCIDRLabel: pm.metastate.subnetCIDR,
110-
podnetARMIDLabel: pm.metastate.subnetARMID, subnetExhaustionStateLabel: strconv.FormatBool(pm.metastate.exhausted),
109+
metrics.IpamSubnetExhaustionCount.With(prometheus.Labels{
110+
metrics.SubnetLabel: pm.metastate.subnet, metrics.SubnetCIDRLabel: pm.metastate.subnetCIDR,
111+
metrics.PodnetARMIDLabel: pm.metastate.subnetARMID, metrics.SubnetExhaustionStateLabel: strconv.FormatBool(pm.metastate.exhausted),
111112
}).Inc()
112113
select {
113114
default:
@@ -482,6 +483,27 @@ func (pm *Monitor) clampScaler(scaler *v1alpha.Scaler) {
482483
}
483484
}
484485

486+
func observeIPPoolState(state ipPoolState, meta metaState) {
487+
labels := []string{meta.subnet, meta.subnetCIDR, meta.subnetARMID}
488+
metrics.IpamAllocatedIPCount.WithLabelValues(labels...).Set(float64(state.allocatedToPods))
489+
metrics.IpamAvailableIPCount.WithLabelValues(labels...).Set(float64(state.available))
490+
metrics.IpamBatchSize.WithLabelValues(labels...).Set(float64(meta.batch))
491+
metrics.IpamCurrentAvailableIPcount.WithLabelValues(labels...).Set(float64(state.currentAvailableIPs))
492+
metrics.IpamExpectedAvailableIPCount.WithLabelValues(labels...).Set(float64(state.expectedAvailableIPs))
493+
metrics.IpamMaxIPCount.WithLabelValues(labels...).Set(float64(meta.max))
494+
metrics.IpamPendingProgramIPCount.WithLabelValues(labels...).Set(float64(state.pendingProgramming))
495+
metrics.IpamPendingReleaseIPCount.WithLabelValues(labels...).Set(float64(state.pendingRelease))
496+
metrics.IpamPrimaryIPCount.WithLabelValues(labels...).Set(float64(len(meta.primaryIPAddresses)))
497+
metrics.IpamRequestedIPConfigCount.WithLabelValues(labels...).Set(float64(state.requestedIPs))
498+
metrics.IpamSecondaryIPCount.WithLabelValues(labels...).Set(float64(state.secondaryIPs))
499+
metrics.IpamTotalIPCount.WithLabelValues(labels...).Set(float64(state.secondaryIPs + int64(len(meta.primaryIPAddresses))))
500+
if meta.exhausted {
501+
metrics.IpamSubnetExhaustionState.WithLabelValues(labels...).Set(float64(metrics.SubnetIPExhausted))
502+
} else {
503+
metrics.IpamSubnetExhaustionState.WithLabelValues(labels...).Set(float64(metrics.SubnetIPNotExhausted))
504+
}
505+
}
506+
485507
// CalculateMinFreeIPs calculates the minimum free IP quantity based on the Scaler
486508
// in the passed NodeNetworkConfig.
487509
// Half of odd batches are rounded up!

0 commit comments

Comments
 (0)