Skip to content

Commit 28207fc

Browse files
authored
CNS: Fix IPAM scaler down to handle incremental decrease (#731)
1 parent f0907b4 commit 28207fc

File tree

3 files changed

+112
-20
lines changed

3 files changed

+112
-20
lines changed

cns/fakes/cnsfake.go

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -135,17 +135,19 @@ func (ipm *IPStateManager) MarkIPsAsPending(numberOfIPsToMark int) (map[string]c
135135
defer ipm.Unlock()
136136

137137
var (
138-
err error
139-
pendingRelease []cns.IPConfigurationStatus
138+
err error
140139
)
141140

141+
pendingRelease := make(map[string]cns.IPConfigurationStatus)
142+
142143
defer func() {
143144
// if there was an error, and not all ip's have been freed, restore state
144145
if err != nil && len(pendingRelease) != numberOfIPsToMark {
145-
for i := range pendingRelease {
146-
ipm.AvailableIPIDStack.Push(pendingRelease[i].ID)
147-
ipm.AvailableIPConfigState[pendingRelease[i].ID] = pendingRelease[i]
148-
delete(ipm.PendingReleaseIPConfigState, pendingRelease[i].ID)
146+
for uuid, ipState := range pendingRelease {
147+
ipState.State = cns.Available
148+
ipm.AvailableIPIDStack.Push(pendingRelease[uuid].ID)
149+
ipm.AvailableIPConfigState[pendingRelease[uuid].ID] = ipState
150+
delete(ipm.PendingReleaseIPConfigState, pendingRelease[uuid].ID)
149151
}
150152
}
151153
}()
@@ -157,7 +159,10 @@ func (ipm *IPStateManager) MarkIPsAsPending(numberOfIPsToMark int) (map[string]c
157159
}
158160

159161
// add all pending release to a slice
160-
pendingRelease = append(pendingRelease, ipm.AvailableIPConfigState[id])
162+
ipConfig := ipm.AvailableIPConfigState[id]
163+
ipConfig.State = cns.PendingRelease
164+
pendingRelease[id] = ipConfig
165+
161166
delete(ipm.AvailableIPConfigState, id)
162167
}
163168

@@ -166,7 +171,7 @@ func (ipm *IPStateManager) MarkIPsAsPending(numberOfIPsToMark int) (map[string]c
166171
ipm.PendingReleaseIPConfigState[pendingRelease[i].ID] = pendingRelease[i]
167172
}
168173

169-
return ipm.PendingReleaseIPConfigState, nil
174+
return pendingRelease, nil
170175
}
171176

172177
type HTTPServiceFake struct {

cns/ipampoolmonitor/ipampoolmonitor.go

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -107,12 +107,12 @@ func (pm *CNSIPAMPoolMonitor) increasePoolSize() error {
107107
pm.cachedNNC.Spec.RequestedIPCount += pm.scalarUnits.BatchSize
108108

109109
// pass nil map to CNStoCRDSpec because we don't want to modify the to be deleted ipconfigs
110-
pm.cachedNNC.Spec, err = MarkIPsAsPendingInCRD(nil, pm.cachedNNC.Spec.RequestedIPCount)
110+
pm.cachedNNC.Spec, err = pm.createNNCSpecForCRD(false)
111111
if err != nil {
112112
return err
113113
}
114114

115-
logger.Printf("[ipam-pool-monitor] Increasing pool size, Current Pool Size: %v, Requested IP Count: %v, Pods with IP's:%v", len(pm.cns.GetPodIPConfigState()), pm.cachedNNC.Spec.RequestedIPCount, len(pm.cns.GetAllocatedIPConfigs()))
115+
logger.Printf("[ipam-pool-monitor] Increasing pool size, Current Pool Size: %v, Requested IP Count: %v, Pods with IP's:%v, ToBeDeleted Count: %v", len(pm.cns.GetPodIPConfigState()), pm.cachedNNC.Spec.RequestedIPCount, len(pm.cns.GetAllocatedIPConfigs()), len(pm.cachedNNC.Spec.IPsNotInUse))
116116
return pm.rc.UpdateCRDSpec(context.Background(), pm.cachedNNC.Spec)
117117
}
118118

@@ -124,18 +124,20 @@ func (pm *CNSIPAMPoolMonitor) decreasePoolSize() error {
124124
pm.cachedNNC.Spec.RequestedIPCount -= pm.scalarUnits.BatchSize
125125

126126
// mark n number of IP's as pending
127-
pendingIPAddresses, err := pm.cns.MarkIPsAsPending(int(pm.scalarUnits.BatchSize))
127+
pendingIpAddresses, err := pm.cns.MarkIPsAsPending(int(pm.scalarUnits.BatchSize))
128128
if err != nil {
129129
return err
130130
}
131131

132+
logger.Printf("[ipam-pool-monitor] Updated Requested count %v, Releasing ips: %+v", pm.cachedNNC.Spec.RequestedIPCount, pendingIpAddresses)
133+
132134
// convert the pending IP addresses to a spec
133-
pm.cachedNNC.Spec, err = MarkIPsAsPendingInCRD(pendingIPAddresses, pm.cachedNNC.Spec.RequestedIPCount)
135+
pm.cachedNNC.Spec, err = pm.createNNCSpecForCRD(false)
134136
if err != nil {
135137
return err
136138
}
137139
pm.pendingRelease = true
138-
logger.Printf("[ipam-pool-monitor] Decreasing pool size, Current Pool Size: %v, Requested IP Count: %v, Pods with IP's: %v", len(pm.cns.GetPodIPConfigState()), pm.cachedNNC.Spec.RequestedIPCount, len(pm.cns.GetAllocatedIPConfigs()))
140+
logger.Printf("[ipam-pool-monitor] Decreasing pool size, Current Pool Size: %v, Requested IP Count: %v, Pods with IP's: %v, ToBeDeleted Count: %v", len(pm.cns.GetPodIPConfigState()), pm.cachedNNC.Spec.RequestedIPCount, len(pm.cns.GetAllocatedIPConfigs()), len(pm.cachedNNC.Spec.IPsNotInUse))
139141
return pm.rc.UpdateCRDSpec(context.Background(), pm.cachedNNC.Spec)
140142
}
141143

@@ -146,7 +148,7 @@ func (pm *CNSIPAMPoolMonitor) cleanPendingRelease() error {
146148
defer pm.mu.Unlock()
147149

148150
var err error
149-
pm.cachedNNC.Spec, err = MarkIPsAsPendingInCRD(nil, pm.cachedNNC.Spec.RequestedIPCount)
151+
pm.cachedNNC.Spec, err = pm.createNNCSpecForCRD(true)
150152
if err != nil {
151153
logger.Printf("[ipam-pool-monitor] Failed to translate ")
152154
}
@@ -156,19 +158,22 @@ func (pm *CNSIPAMPoolMonitor) cleanPendingRelease() error {
156158
}
157159

158160
// CNSToCRDSpec translates CNS's map of Ips to be released and requested ip count into a CRD Spec
159-
func MarkIPsAsPendingInCRD(toBeDeletedSecondaryIPConfigs map[string]cns.IPConfigurationStatus, ipCount int64) (nnc.NodeNetworkConfigSpec, error) {
161+
func (pm *CNSIPAMPoolMonitor) createNNCSpecForCRD(resetNotInUseList bool) (nnc.NodeNetworkConfigSpec, error) {
160162
var (
161163
spec nnc.NodeNetworkConfigSpec
162-
uuid string
163164
)
164165

165-
spec.RequestedIPCount = ipCount
166+
// DUpdate the count from cached spec
167+
spec.RequestedIPCount = pm.cachedNNC.Spec.RequestedIPCount
166168

167-
if toBeDeletedSecondaryIPConfigs == nil {
169+
// Discard the ToBeDeleted list if requested. This happens if DNC has cleaned up the pending ips and CNS has also updated its state.
170+
if resetNotInUseList == true {
168171
spec.IPsNotInUse = make([]string, 0)
169172
} else {
170-
for uuid = range toBeDeletedSecondaryIPConfigs {
171-
spec.IPsNotInUse = append(spec.IPsNotInUse, uuid)
173+
// Get All Pending IPs from CNS and populate it again.
174+
pendingIps := pm.cns.GetPendingReleaseIPConfigs()
175+
for _, pendingIp := range pendingIps {
176+
spec.IPsNotInUse = append(spec.IPsNotInUse, pendingIp.ID)
172177
}
173178
}
174179

cns/ipampoolmonitor/ipampoolmonitor_test.go

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,3 +313,85 @@ func TestPoolSizeDecreaseWhenDecreaseHasAlreadyBeenRequested(t *testing.T) {
313313
t.Fatalf("Expected IP's not in use to be 0 after reconcile, expected %v, actual %v", (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse))
314314
}
315315
}
316+
317+
func TestPoolSizeDecreaseToReallyLow(t *testing.T) {
318+
var (
319+
batchSize = 10
320+
initialIPConfigCount = 30
321+
requestThresholdPercent = 30
322+
releaseThresholdPercent = 100
323+
)
324+
325+
fakecns, fakerc, poolmonitor := initFakes(batchSize, initialIPConfigCount, requestThresholdPercent, releaseThresholdPercent)
326+
327+
log.Printf("Min free IP's %v", poolmonitor.MinimumFreeIps)
328+
log.Printf("Max free IP %v", poolmonitor.MaximumFreeIps)
329+
330+
// initial pool count is 30, set 23 of them to be allocated
331+
err := fakecns.SetNumberOfAllocatedIPs(23)
332+
if err != nil {
333+
t.Error(err)
334+
}
335+
336+
// Pool monitor does nothing, as the current number of IP's falls in the threshold
337+
err = poolmonitor.Reconcile()
338+
if err != nil {
339+
t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err)
340+
}
341+
342+
// Now Drop the Allocated count to really low, say 3. This should trigger release in 2 batches
343+
err = fakecns.SetNumberOfAllocatedIPs(3)
344+
if err != nil {
345+
t.Error(err)
346+
}
347+
348+
// Pool monitor does nothing, as the current number of IP's falls in the threshold
349+
t.Logf("Reconcile after Allocated count from 33 -> 3, Exepected free count = 10")
350+
err = poolmonitor.Reconcile()
351+
if err != nil {
352+
t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err)
353+
}
354+
355+
// Ensure the size of the requested spec is still the same
356+
if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != batchSize {
357+
t.Fatalf("Expected IP's not in use is not correct, expected %v, actual %v", batchSize, len(poolmonitor.cachedNNC.Spec.IPsNotInUse))
358+
}
359+
360+
// Ensure the request ipcount is now one batch size smaller than the inital IP count
361+
if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(initialIPConfigCount-batchSize) {
362+
t.Fatalf("Expected pool size to be one batch size smaller after reconcile, expected %v, actual %v", (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse))
363+
}
364+
365+
// Reconcile again, it should release the second batch
366+
t.Logf("Reconcile again - 2, Exepected free count = 20")
367+
err = poolmonitor.Reconcile()
368+
if err != nil {
369+
t.Errorf("Expected pool monitor to not fail after CNS set number of allocated IP's %v", err)
370+
}
371+
372+
// Ensure the size of the requested spec is still the same
373+
if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != batchSize*2 {
374+
t.Fatalf("Expected IP's not in use is not correct, expected %v, actual %v", batchSize*2, len(poolmonitor.cachedNNC.Spec.IPsNotInUse))
375+
}
376+
377+
// Ensure the request ipcount is now one batch size smaller than the inital IP count
378+
if poolmonitor.cachedNNC.Spec.RequestedIPCount != int64(initialIPConfigCount-(batchSize*2)) {
379+
t.Fatalf("Expected pool size to be one batch size smaller after reconcile, expected %v, actual %v", (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse))
380+
}
381+
382+
t.Logf("Update Request Controller")
383+
err = fakerc.Reconcile()
384+
if err != nil {
385+
t.Error(err)
386+
}
387+
388+
err = poolmonitor.Reconcile()
389+
if err != nil {
390+
t.Errorf("Expected no pool monitor failure after request controller reconcile: %v", err)
391+
}
392+
393+
// Ensure the spec doesn't have any IPsNotInUse after request controller has reconciled
394+
if len(poolmonitor.cachedNNC.Spec.IPsNotInUse) != 0 {
395+
t.Fatalf("Expected IP's not in use to be 0 after reconcile, expected %v, actual %v", (initialIPConfigCount - batchSize), len(poolmonitor.cachedNNC.Spec.IPsNotInUse))
396+
}
397+
}

0 commit comments

Comments
 (0)