Skip to content

Commit cadca2f

Browse files
authored
Include PendingRelease when calculating IP pool for scale-down (#1237)
* fix scale down Signed-off-by: Evan Baker <[email protected]> * update pool state variable naming Signed-off-by: Evan Baker <[email protected]> * add test for pending release Signed-off-by: Evan Baker <[email protected]>
1 parent ad516c0 commit cadca2f

File tree

4 files changed

+91
-48
lines changed

4 files changed

+91
-48
lines changed

cns/ipampool/metrics.go

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,8 @@ import (
88
var (
99
ipamAllocatedIPCount = prometheus.NewGauge(
1010
prometheus.GaugeOpts{
11-
Name: "ipam_allocated_ips",
12-
Help: "CNS's allocated IP pool size.",
13-
},
14-
)
15-
ipamAssignedIPCount = prometheus.NewGauge(
16-
prometheus.GaugeOpts{
17-
Name: "ipam_assigned_ips",
18-
Help: "Assigned IP count.",
11+
Name: "ipam_pod_allocated_ips",
12+
Help: "Count of IPs CNS has allocated to Pods.",
1913
},
2014
)
2115
ipamAvailableIPCount = prometheus.NewGauge(
@@ -30,6 +24,18 @@ var (
3024
Help: "IPAM IP pool batch size.",
3125
},
3226
)
27+
ipamCurrentAvailableIPcount = prometheus.NewGauge(
28+
prometheus.GaugeOpts{
29+
Name: "ipam_current_available_ips",
30+
Help: "Current available IP count.",
31+
},
32+
)
33+
ipamExpectedAvailableIPCount = prometheus.NewGauge(
34+
prometheus.GaugeOpts{
35+
Name: "ipam_expect_available_ips",
36+
Help: "Expected future available IP count assuming the Requested IP count is honored.",
37+
},
38+
)
3339
ipamMaxIPCount = prometheus.NewGauge(
3440
prometheus.GaugeOpts{
3541
Name: "ipam_max_ips",
@@ -54,44 +60,38 @@ var (
5460
Help: "Requested IP count.",
5561
},
5662
)
57-
ipamRequestedUnassignedIPConfigCount = prometheus.NewGauge(
58-
prometheus.GaugeOpts{
59-
Name: "ipam_requested_unassigned_ips",
60-
Help: "Future unassigned IP count assuming the Requested IP count is honored.",
61-
},
62-
)
63-
ipamUnassignedIPCount = prometheus.NewGauge(
63+
ipamTotalIPCount = prometheus.NewGauge(
6464
prometheus.GaugeOpts{
65-
Name: "ipam_unassigned_ips",
66-
Help: "Unassigned IP count.",
65+
Name: "ipam_total_ips",
66+
Help: "Count of total IP pool size allocated to CNS by DNC.",
6767
},
6868
)
6969
)
7070

7171
func init() {
7272
metrics.Registry.MustRegister(
7373
ipamAllocatedIPCount,
74-
ipamAssignedIPCount,
7574
ipamAvailableIPCount,
7675
ipamBatchSize,
76+
ipamCurrentAvailableIPcount,
77+
ipamExpectedAvailableIPCount,
7778
ipamMaxIPCount,
7879
ipamPendingProgramIPCount,
7980
ipamPendingReleaseIPCount,
8081
ipamRequestedIPConfigCount,
81-
ipamRequestedUnassignedIPConfigCount,
82-
ipamUnassignedIPCount,
82+
ipamTotalIPCount,
8383
)
8484
}
8585

8686
func observeIPPoolState(state ipPoolState, meta metaState) {
87-
ipamAllocatedIPCount.Set(float64(state.allocated))
88-
ipamAssignedIPCount.Set(float64(state.assigned))
87+
ipamAllocatedIPCount.Set(float64(state.allocatedToPods))
8988
ipamAvailableIPCount.Set(float64(state.available))
9089
ipamBatchSize.Set(float64(meta.batch))
90+
ipamCurrentAvailableIPcount.Set(float64(state.currentAvailableIPs))
91+
ipamExpectedAvailableIPCount.Set(float64(state.expectedAvailableIPs))
9192
ipamMaxIPCount.Set(float64(meta.max))
9293
ipamPendingProgramIPCount.Set(float64(state.pendingProgramming))
9394
ipamPendingReleaseIPCount.Set(float64(state.pendingRelease))
94-
ipamRequestedIPConfigCount.Set(float64(state.requested))
95-
ipamRequestedUnassignedIPConfigCount.Set(float64(state.requestedUnassigned))
96-
ipamUnassignedIPCount.Set(float64(state.unassigned))
95+
ipamRequestedIPConfigCount.Set(float64(state.requestedIPs))
96+
ipamTotalIPCount.Set(float64(state.totalIPs))
9797
}

cns/ipampool/monitor.go

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -102,33 +102,33 @@ func (pm *Monitor) Start(ctx context.Context) error {
102102

103103
// ipPoolState is the current actual state of the CNS IP pool.
104104
type ipPoolState struct {
105-
// allocated are the IPs given to CNS.
106-
allocated int64
107-
// assigned are the IPs CNS gives to Pods.
108-
assigned int64
109-
// available are the allocated IPs in state "Available".
105+
// allocatedToPods are the IPs CNS gives to Pods.
106+
allocatedToPods int64
107+
// available are the IPs in state "Available".
110108
available int64
111-
// pendingProgramming are the allocated IPs in state "PendingProgramming".
109+
// currentAvailableIPs are the current available IPs: allocated - assigned - pendingRelease.
110+
currentAvailableIPs int64
111+
// expectedAvailableIPs are the "future" available IPs, if the requested IP count is honored: requested - assigned.
112+
expectedAvailableIPs int64
113+
// pendingProgramming are the IPs in state "PendingProgramming".
112114
pendingProgramming int64
113-
// pendingRelease are the allocated IPs in state "PendingRelease".
115+
// pendingRelease are the IPs in state "PendingRelease".
114116
pendingRelease int64
115-
// requested are the IPs CNS has requested that it be allocated.
116-
requested int64
117-
// requestedUnassigned are the "future" unassigned IPs, if the requested IP count is honored: requested - assigned.
118-
requestedUnassigned int64
119-
// unassigned are the currently unassigned IPs: allocated - assigned.
120-
unassigned int64
117+
// requestedIPs are the IPs CNS has requested that it be allocated by DNC.
118+
requestedIPs int64
119+
// totalIPs are all the IPs given to CNS by DNC.
120+
totalIPs int64
121121
}
122122

123123
func buildIPPoolState(ips map[string]cns.IPConfigurationStatus, spec v1alpha.NodeNetworkConfigSpec) ipPoolState {
124124
state := ipPoolState{
125-
allocated: int64(len(ips)),
126-
requested: spec.RequestedIPCount,
125+
totalIPs: int64(len(ips)),
126+
requestedIPs: spec.RequestedIPCount,
127127
}
128128
for _, v := range ips {
129129
switch v.GetState() {
130130
case types.Assigned:
131-
state.assigned++
131+
state.allocatedToPods++
132132
case types.Available:
133133
state.available++
134134
case types.PendingProgramming:
@@ -137,8 +137,8 @@ func buildIPPoolState(ips map[string]cns.IPConfigurationStatus, spec v1alpha.Nod
137137
state.pendingRelease++
138138
}
139139
}
140-
state.unassigned = state.allocated - state.assigned
141-
state.requestedUnassigned = state.requested - state.assigned
140+
state.currentAvailableIPs = state.totalIPs - state.allocatedToPods - state.pendingRelease
141+
state.expectedAvailableIPs = state.requestedIPs - state.allocatedToPods
142142
return state
143143
}
144144

@@ -150,8 +150,8 @@ func (pm *Monitor) reconcile(ctx context.Context) error {
150150

151151
switch {
152152
// pod count is increasing
153-
case state.requestedUnassigned < pm.metastate.minFreeCount:
154-
if state.requested == pm.metastate.max {
153+
case state.expectedAvailableIPs < pm.metastate.minFreeCount:
154+
if state.requestedIPs == pm.metastate.max {
155155
// If we're already at the maxIPCount, don't try to increase
156156
return nil
157157
}
@@ -160,7 +160,7 @@ func (pm *Monitor) reconcile(ctx context.Context) error {
160160
return pm.increasePoolSize(ctx, state)
161161

162162
// pod count is decreasing
163-
case state.unassigned >= pm.metastate.maxFreeCount:
163+
case state.currentAvailableIPs >= pm.metastate.maxFreeCount:
164164
logger.Printf("[ipam-pool-monitor] Decreasing pool size...")
165165
return pm.decreasePoolSize(ctx, state)
166166

@@ -171,7 +171,7 @@ func (pm *Monitor) reconcile(ctx context.Context) error {
171171
return pm.cleanPendingRelease(ctx)
172172

173173
// no pods scheduled
174-
case state.assigned == 0:
174+
case state.allocatedToPods == 0:
175175
logger.Printf("[ipam-pool-monitor] No pods scheduled")
176176
return nil
177177
}

cns/ipampool/monitor_test.go

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,10 @@ type testState struct {
3737
assigned int
3838
batch int64
3939
max int64
40+
pendingRelease int64
4041
releaseThresholdPercent int64
4142
requestThresholdPercent int64
43+
totalIPs int64
4244
}
4345

4446
func initFakes(state testState) (*fakes.HTTPServiceFake, *fakes.RequestControllerFake, *Monitor) {
@@ -52,8 +54,11 @@ func initFakes(state testState) (*fakes.HTTPServiceFake, *fakes.RequestControlle
5254
}
5355
subnetaddresspace := "10.0.0.0/8"
5456

57+
if state.totalIPs == 0 {
58+
state.totalIPs = state.allocated
59+
}
5560
fakecns := fakes.NewHTTPServiceFake()
56-
fakerc := fakes.NewRequestControllerFake(fakecns, scalarUnits, subnetaddresspace, state.allocated)
61+
fakerc := fakes.NewRequestControllerFake(fakecns, scalarUnits, subnetaddresspace, state.totalIPs)
5762

5863
poolmonitor := NewMonitor(fakecns, &fakeNodeNetworkConfigUpdater{fakerc.NNC}, &Options{RefreshDelay: 100 * time.Second})
5964
poolmonitor.metastate = metaState{
@@ -64,6 +69,9 @@ func initFakes(state testState) (*fakes.HTTPServiceFake, *fakes.RequestControlle
6469
if err := fakecns.SetNumberOfAssignedIPs(state.assigned); err != nil {
6570
logger.Printf("%s", err)
6671
}
72+
if _, err := fakecns.MarkIPAsPendingRelease(int(state.pendingRelease)); err != nil {
73+
logger.Printf("%s", err)
74+
}
6775

6876
return fakecns, fakerc, poolmonitor
6977
}
@@ -389,6 +397,40 @@ func TestDecreaseAfterNodeLimitReached(t *testing.T) {
389397
assert.Len(t, poolmonitor.spec.IPsNotInUse, int(initState.max%initState.batch))
390398
}
391399

400+
func TestDecreaseWithPendingRelease(t *testing.T) {
401+
initState := testState{
402+
batch: 16,
403+
assigned: 46,
404+
allocated: 64,
405+
pendingRelease: 8,
406+
requestThresholdPercent: 50,
407+
releaseThresholdPercent: 150,
408+
totalIPs: 64,
409+
max: 250,
410+
}
411+
fakecns, fakerc, poolmonitor := initFakes(initState)
412+
fakerc.NNC.Spec.RequestedIPCount = 48
413+
assert.NoError(t, fakerc.Reconcile(true))
414+
415+
assert.NoError(t, poolmonitor.reconcile(context.Background()))
416+
417+
// reallocate some IPs
418+
assert.NoError(t, fakecns.SetNumberOfAssignedIPs(40))
419+
assert.NoError(t, poolmonitor.reconcile(context.Background()))
420+
421+
// Ensure poolmonitor asked for a multiple of batch size
422+
assert.EqualValues(t, 64, poolmonitor.spec.RequestedIPCount)
423+
assert.Len(t, poolmonitor.spec.IPsNotInUse, int(initState.pendingRelease))
424+
425+
// trigger a batch release
426+
assert.NoError(t, fakecns.SetNumberOfAssignedIPs(30))
427+
assert.NoError(t, poolmonitor.reconcile(context.Background()))
428+
429+
// Ensure poolmonitor asked for a multiple of batch size
430+
assert.EqualValues(t, 48, poolmonitor.spec.RequestedIPCount)
431+
assert.Len(t, poolmonitor.spec.IPsNotInUse, int(initState.batch)+int(initState.pendingRelease))
432+
}
433+
392434
func TestPoolDecreaseBatchSizeGreaterThanMaxPodIPCount(t *testing.T) {
393435
initState := testState{
394436
batch: 31,

test/integration/setup_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ func TestMain(m *testing.M) {
9494
// create dirty cns ds
9595
if installCNS, err := strconv.ParseBool(installopt); err == nil && installCNS == true {
9696
if cnscleanup, err = installCNSDaemonset(ctx, clientset, os.Getenv(envTag), logDir); err != nil {
97+
log.Print(err)
9798
exitCode = 2
9899
return
99100
}

0 commit comments

Comments
 (0)