Skip to content

Commit 21b5bbe

Browse files
authored
fix: reconcile initial state from CRD regardless of existing podInfo (#2022)
* fix: reconcile initial state from CRD regardless of existing podInfo Signed-off-by: Evan Baker <[email protected]> * fix: add test for no state and pending release in NNC Signed-off-by: Evan Baker <[email protected]> * fix: add test for restoring state and pending release in NNC Signed-off-by: Evan Baker <[email protected]> --------- Signed-off-by: Evan Baker <[email protected]>
1 parent 5caf8d2 commit 21b5bbe

File tree

4 files changed

+138
-43
lines changed

4 files changed

+138
-43
lines changed

cns/restserver/internalapi_test.go

Lines changed: 120 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package restserver
66
import (
77
"context"
88
"fmt"
9+
"math/rand"
910
"os"
1011
"reflect"
1112
"strconv"
@@ -21,6 +22,7 @@ import (
2122
"github.com/google/uuid"
2223
"github.com/pkg/errors"
2324
"github.com/stretchr/testify/assert"
25+
"golang.org/x/exp/maps"
2426
)
2527

2628
const (
@@ -336,7 +338,99 @@ func TestReconcileNCWithEmptyState(t *testing.T) {
336338
t.Errorf("Unexpected failure on reconcile with no state %d", returnCode)
337339
}
338340

339-
validateNCStateAfterReconcile(t, nil, expectedNcCount, expectedAssignedPods)
341+
validateNCStateAfterReconcile(t, nil, expectedNcCount, expectedAssignedPods, nil)
342+
}
343+
344+
// TestReconcileNCWithEmptyStateAndPendingRelease tests the case where there is
345+
// no state (node reboot) and there are pending release IPs in the NNC that
346+
// may have been deallocated and should not be made available for assignment
347+
// to pods.
348+
func TestReconcileNCWithEmptyStateAndPendingRelease(t *testing.T) {
349+
restartService()
350+
setEnv(t)
351+
setOrchestratorTypeInternal(cns.KubernetesCRD)
352+
353+
expectedAssignedPods := make(map[string]cns.PodInfo)
354+
355+
secondaryIPConfigs := make(map[string]cns.SecondaryIPConfig)
356+
for i := 6; i < 22; i++ {
357+
ipaddress := "10.0.0." + strconv.Itoa(i)
358+
secIPConfig := newSecondaryIPConfig(ipaddress, -1)
359+
ipID := uuid.New()
360+
secondaryIPConfigs[ipID.String()] = secIPConfig
361+
}
362+
pending := func() []string {
363+
numPending := rand.Intn(len(secondaryIPConfigs)) + 1 //nolint:gosec // weak rand is sufficient in test
364+
pendingIPs := []string{}
365+
for k := range secondaryIPConfigs {
366+
if numPending == 0 {
367+
break
368+
}
369+
pendingIPs = append(pendingIPs, k)
370+
numPending--
371+
}
372+
return pendingIPs
373+
}()
374+
req := generateNetworkContainerRequest(secondaryIPConfigs, "reconcileNc1", "-1")
375+
returnCode := svc.ReconcileNCState(req, expectedAssignedPods, &v1alpha.NodeNetworkConfig{
376+
Spec: v1alpha.NodeNetworkConfigSpec{
377+
IPsNotInUse: pending,
378+
},
379+
})
380+
if returnCode != types.Success {
381+
t.Errorf("Unexpected failure on reconcile with no state %d", returnCode)
382+
}
383+
validateNetworkRequest(t, *req)
384+
// confirm that the correct number of IPs are now PendingRelease
385+
assert.EqualValues(t, len(pending), len(svc.GetPendingReleaseIPConfigs()))
386+
}
387+
388+
func TestReconcileNCWithExistingStateAndPendingRelease(t *testing.T) {
389+
restartService()
390+
setEnv(t)
391+
setOrchestratorTypeInternal(cns.KubernetesCRD)
392+
393+
secondaryIPConfigs := make(map[string]cns.SecondaryIPConfig)
394+
for i := 6; i < 22; i++ {
395+
ipaddress := "10.0.0." + strconv.Itoa(i)
396+
secIPConfig := newSecondaryIPConfig(ipaddress, -1)
397+
ipID := uuid.New()
398+
secondaryIPConfigs[ipID.String()] = secIPConfig
399+
}
400+
expectedAssignedPods := map[string]cns.PodInfo{
401+
"10.0.0.6": cns.NewPodInfo("", "", "reconcilePod1", "PodNS1"),
402+
"10.0.0.7": cns.NewPodInfo("", "", "reconcilePod2", "PodNS1"),
403+
}
404+
pendingIPIDs := func() map[string]cns.PodInfo {
405+
numPending := rand.Intn(len(secondaryIPConfigs)) + 1 //nolint:gosec // weak rand is sufficient in test
406+
pendingIPs := map[string]cns.PodInfo{}
407+
for k := range secondaryIPConfigs {
408+
if numPending == 0 {
409+
break
410+
}
411+
if _, ok := expectedAssignedPods[secondaryIPConfigs[k].IPAddress]; ok {
412+
continue
413+
}
414+
pendingIPs[k] = nil
415+
numPending--
416+
}
417+
return pendingIPs
418+
}()
419+
req := generateNetworkContainerRequest(secondaryIPConfigs, "reconcileNc1", "-1")
420+
421+
expectedNcCount := len(svc.state.ContainerStatus)
422+
returnCode := svc.ReconcileNCState(req, expectedAssignedPods, &v1alpha.NodeNetworkConfig{
423+
Spec: v1alpha.NodeNetworkConfigSpec{
424+
IPsNotInUse: maps.Keys(pendingIPIDs),
425+
},
426+
})
427+
if returnCode != types.Success {
428+
t.Errorf("Unexpected failure on reconcile with no state %d", returnCode)
429+
}
430+
validateNetworkRequest(t, *req)
431+
// confirm that the correct number of IPs are now PendingRelease
432+
assert.EqualValues(t, len(pendingIPIDs), len(svc.GetPendingReleaseIPConfigs()))
433+
validateNCStateAfterReconcile(t, req, expectedNcCount+1, expectedAssignedPods, pendingIPIDs)
340434
}
341435

342436
func TestReconcileNCWithExistingState(t *testing.T) {
@@ -378,7 +472,7 @@ func TestReconcileNCWithExistingState(t *testing.T) {
378472
t.Errorf("Unexpected failure on reconcile with no state %d", returnCode)
379473
}
380474

381-
validateNCStateAfterReconcile(t, req, expectedNcCount+1, expectedAssignedPods)
475+
validateNCStateAfterReconcile(t, req, expectedNcCount+1, expectedAssignedPods, nil)
382476
}
383477

384478
func TestReconcileNCWithExistingStateFromInterfaceID(t *testing.T) {
@@ -422,7 +516,7 @@ func TestReconcileNCWithExistingStateFromInterfaceID(t *testing.T) {
422516
t.Errorf("Unexpected failure on reconcile with no state %d", returnCode)
423517
}
424518

425-
validateNCStateAfterReconcile(t, req, expectedNcCount+1, expectedAssignedPods)
519+
validateNCStateAfterReconcile(t, req, expectedNcCount+1, expectedAssignedPods, nil)
426520
}
427521

428522
func TestReconcileNCWithSystemPods(t *testing.T) {
@@ -466,7 +560,7 @@ func TestReconcileNCWithSystemPods(t *testing.T) {
466560
}
467561

468562
delete(expectedAssignedPods, "192.168.0.1")
469-
validateNCStateAfterReconcile(t, req, expectedNcCount, expectedAssignedPods)
563+
validateNCStateAfterReconcile(t, req, expectedNcCount, expectedAssignedPods, nil)
470564
}
471565

472566
func setOrchestratorTypeInternal(orchestratorType string) {
@@ -653,21 +747,21 @@ func generateNetworkContainerRequest(secondaryIps map[string]cns.SecondaryIPConf
653747
return &req
654748
}
655749

656-
func validateNCStateAfterReconcile(t *testing.T, ncRequest *cns.CreateNetworkContainerRequest, expectedNcCount int, expectedAssignedPods map[string]cns.PodInfo) {
750+
func validateNCStateAfterReconcile(t *testing.T, ncRequest *cns.CreateNetworkContainerRequest, expectedNCCount int, expectedAssignedIPs, expectedPendingIPs map[string]cns.PodInfo) {
657751
if ncRequest == nil {
658752
// check svc ContainerStatus will be empty
659-
if len(svc.state.ContainerStatus) != expectedNcCount {
753+
if len(svc.state.ContainerStatus) != expectedNCCount {
660754
t.Fatalf("CNS has some stale ContainerStatus, count: %d, state: %+v", len(svc.state.ContainerStatus), svc.state.ContainerStatus)
661755
}
662756
} else {
663757
validateNetworkRequest(t, *ncRequest)
664758
}
665759

666-
if len(expectedAssignedPods) != len(svc.PodIPIDByPodInterfaceKey) {
667-
t.Fatalf("Unexpected assigned pods, actual: %d, expected: %d", len(svc.PodIPIDByPodInterfaceKey), len(expectedAssignedPods))
760+
if len(expectedAssignedIPs) != len(svc.PodIPIDByPodInterfaceKey) {
761+
t.Fatalf("Unexpected assigned pods, actual: %d, expected: %d", len(svc.PodIPIDByPodInterfaceKey), len(expectedAssignedIPs))
668762
}
669763

670-
for ipaddress, podInfo := range expectedAssignedPods {
764+
for ipaddress, podInfo := range expectedAssignedIPs {
671765
for _, ipID := range svc.PodIPIDByPodInterfaceKey[podInfo.Key()] {
672766
ipConfigstate := svc.PodIPConfigState[ipID]
673767
if ipConfigstate.GetState() != types.Assigned {
@@ -694,18 +788,26 @@ func validateNCStateAfterReconcile(t *testing.T, ncRequest *cns.CreateNetworkCon
694788

695789
// validate rest of Secondary IPs in Available state
696790
if ncRequest != nil {
697-
for secIpId, secIpConfig := range ncRequest.SecondaryIPConfigs {
698-
if _, exists := expectedAssignedPods[secIpConfig.IPAddress]; exists {
699-
continue
700-
}
701-
791+
for secIPID, secIPConfig := range ncRequest.SecondaryIPConfigs {
702792
// Validate IP state
703-
if secIpConfigState, found := svc.PodIPConfigState[secIpId]; found {
704-
if secIpConfigState.GetState() != types.Available {
705-
t.Fatalf("IPId: %s State is not Available, ipStatus: %+v", secIpId, secIpConfigState)
793+
if secIPConfigState, found := svc.PodIPConfigState[secIPID]; found {
794+
if _, exists := expectedAssignedIPs[secIPConfig.IPAddress]; exists {
795+
if secIPConfigState.GetState() != types.Assigned {
796+
t.Fatalf("IPId: %s State is not Assigned, ipStatus: %+v", secIPID, secIPConfigState)
797+
}
798+
continue
799+
}
800+
if _, exists := expectedPendingIPs[secIPID]; exists {
801+
if secIPConfigState.GetState() != types.PendingRelease {
802+
t.Fatalf("IPId: %s State is not PendingRelease, ipStatus: %+v", secIPID, secIPConfigState)
803+
}
804+
continue
805+
}
806+
if secIPConfigState.GetState() != types.Available {
807+
t.Fatalf("IPId: %s State is not Available, ipStatus: %+v", secIPID, secIPConfigState)
706808
}
707809
} else {
708-
t.Fatalf("IPId: %s, IpAddress: %+v State doesnt exists in PodIp Map", secIpId, secIpConfig)
810+
t.Fatalf("IPId: %s, IpAddress: %+v State doesnt exists in PodIp Map", secIPID, secIPConfig)
709811
}
710812
}
711813
}

cns/service/main.go

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,34 +1163,24 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn
11631163
poolMonitor := ipampool.NewMonitor(httpRestServiceImplementation, scopedcli, clusterSubnetStateChan, &poolOpts)
11641164
httpRestServiceImplementation.IPAMPoolMonitor = poolMonitor
11651165

1166-
// reconcile initial CNS state from CNI or apiserver.
1167-
// Only reconcile if there are any existing Pods using NC ips,
1168-
// else let the goal state be updated using a regular NNC Reconciler loop
1169-
podInfoByIP, err := podInfoByIPProvider.PodInfoByIP()
1170-
if err != nil {
1171-
return errors.Wrap(err, "failed to provide PodInfoByIP")
1172-
}
1173-
if len(podInfoByIP) > 0 {
1174-
logger.Printf("Reconciling initial CNS state as PodInfoByIP is not empty: %d", len(podInfoByIP))
1175-
1176-
// apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for
1177-
// aks addons to come up so retry a bit more aggresively here.
1178-
// will retry 10 times maxing out at a minute taking about 8 minutes before it gives up.
1179-
attempt := 0
1180-
err = retry.Do(func() error {
1181-
attempt++
1182-
logger.Printf("reconciling initial CNS state attempt: %d", attempt)
1183-
err = reconcileInitialCNSState(ctx, scopedcli, httpRestServiceImplementation, podInfoByIPProvider)
1184-
if err != nil {
1185-
logger.Errorf("failed to reconcile initial CNS state, attempt: %d err: %v", attempt, err)
1186-
}
1187-
return errors.Wrap(err, "failed to initialize CNS state")
1188-
}, retry.Context(ctx), retry.Delay(initCNSInitalDelay), retry.MaxDelay(time.Minute))
1166+
logger.Printf("Reconciling initial CNS state")
1167+
// apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for
1168+
// aks addons to come up so retry a bit more aggresively here.
1169+
// will retry 10 times maxing out at a minute taking about 8 minutes before it gives up.
1170+
attempt := 0
1171+
err = retry.Do(func() error {
1172+
attempt++
1173+
logger.Printf("reconciling initial CNS state attempt: %d", attempt)
1174+
err = reconcileInitialCNSState(ctx, scopedcli, httpRestServiceImplementation, podInfoByIPProvider)
11891175
if err != nil {
1190-
return err
1176+
logger.Errorf("failed to reconcile initial CNS state, attempt: %d err: %v", attempt, err)
11911177
}
1192-
logger.Printf("reconciled initial CNS state after %d attempts", attempt)
1178+
return errors.Wrap(err, "failed to initialize CNS state")
1179+
}, retry.Context(ctx), retry.Delay(initCNSInitalDelay), retry.MaxDelay(time.Minute))
1180+
if err != nil {
1181+
return err
11931182
}
1183+
logger.Printf("reconciled initial CNS state after %d attempts", attempt)
11941184

11951185
// start the pool Monitor before the Reconciler, since it needs to be ready to receive an
11961186
// NodeNetworkConfig update by the time the Reconciler tries to send it.

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ require (
3333
github.com/spf13/viper v1.14.0
3434
github.com/stretchr/testify v1.8.2
3535
go.uber.org/zap v1.24.0
36+
golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1
3637
golang.org/x/sys v0.6.0
3738
google.golang.org/grpc v1.52.0
3839
google.golang.org/protobuf v1.28.1

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0
844844
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
845845
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
846846
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
847+
golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1 h1:k/i9J1pBpvlfR+9QsetwPyERsqu1GIbi967PQMq3Ivc=
848+
golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1/go.mod h1:V1LtkGg67GoY2N1AnLN78QLrzxkLyJw7RJb1gzOOz9w=
847849
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
848850
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
849851
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=

0 commit comments

Comments
 (0)