Skip to content

Commit fa2cbef

Browse files
authored
fix: reconcile initial state from CRD regardless of existing podInfo (#2022)
* fix: reconcile initial state from CRD regardless of existing podInfo Signed-off-by: Evan Baker <[email protected]> * fix: add test for no state and pending release in NNC Signed-off-by: Evan Baker <[email protected]> * fix: add test for restoring state and pending release in NNC Signed-off-by: Evan Baker <[email protected]>
1 parent 0822d0c commit fa2cbef

File tree

136 files changed

+4441
-1442
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

136 files changed

+4441
-1442
lines changed

cns/restserver/internalapi_test.go

Lines changed: 151 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package restserver
66
import (
77
"context"
88
"fmt"
9+
"math/rand"
910
"os"
1011
"reflect"
1112
"strconv"
@@ -21,6 +22,8 @@ import (
2122
"github.com/google/uuid"
2223
"github.com/pkg/errors"
2324
"github.com/stretchr/testify/assert"
25+
"github.com/stretchr/testify/require"
26+
"golang.org/x/exp/maps"
2427
)
2528

2629
const (
@@ -336,7 +339,126 @@ func TestReconcileNCWithEmptyState(t *testing.T) {
336339
t.Errorf("Unexpected failure on reconcile with no state %d", returnCode)
337340
}
338341

339-
validateNCStateAfterReconcile(t, nil, expectedNcCount, expectedAssignedPods)
342+
validateNCStateAfterReconcile(t, nil, expectedNcCount, expectedAssignedPods, nil)
343+
}
344+
345+
// TestReconcileNCWithEmptyStateAndPendingRelease tests the case where there is
346+
// no state (node reboot) and there are pending release IPs in the NNC that
347+
// may have been deallocated and should not be made available for assignment
348+
// to pods.
349+
func TestReconcileNCWithEmptyStateAndPendingRelease(t *testing.T) {
350+
restartService()
351+
setEnv(t)
352+
setOrchestratorTypeInternal(cns.KubernetesCRD)
353+
354+
expectedAssignedPods := map[string]cns.PodInfo{}
355+
secondaryIPConfigs := map[string]cns.SecondaryIPConfig{}
356+
for i := 6; i < 22; i++ {
357+
ipaddress := "10.0.0." + strconv.Itoa(i)
358+
secIPConfig := newSecondaryIPConfig(ipaddress, -1)
359+
ipID := uuid.New()
360+
secondaryIPConfigs[ipID.String()] = secIPConfig
361+
}
362+
pendingReleaseIPIDs := func() map[string]cns.PodInfo {
363+
numPending := rand.Intn(len(secondaryIPConfigs)) + 1 //nolint:gosec // weak rand is sufficient in test
364+
pendingIPs := map[string]cns.PodInfo{}
365+
for k := range secondaryIPConfigs {
366+
if numPending == 0 {
367+
break
368+
}
369+
pendingIPs[k] = nil
370+
numPending--
371+
}
372+
return pendingIPs
373+
}()
374+
req := generateNetworkContainerRequest(secondaryIPConfigs, "reconcileNc1", "-1")
375+
returnCode := svc.ReconcileNCState(req, expectedAssignedPods, &v1alpha.NodeNetworkConfig{
376+
Spec: v1alpha.NodeNetworkConfigSpec{
377+
IPsNotInUse: maps.Keys(pendingReleaseIPIDs),
378+
},
379+
})
380+
assert.Equal(t, types.Success, returnCode, "Unexpected failure on reconcile with no state")
381+
// confirm that the correct number of IPs are now PendingRelease
382+
assert.EqualValues(t, len(pendingReleaseIPIDs), len(svc.GetPendingReleaseIPConfigs()))
383+
384+
// validate state
385+
containerStatus := svc.state.ContainerStatus[req.NetworkContainerid]
386+
assert.Equal(t, req.NetworkContainerid, containerStatus.ID, "NCID is not persisted")
387+
assert.Equal(t, req.NetworkContainerType, containerStatus.CreateNetworkContainerRequest.NetworkContainerType, "ContainerType is not persisted")
388+
assert.Equal(t, req.IPConfiguration.IPSubnet.IPAddress, containerStatus.CreateNetworkContainerRequest.IPConfiguration.IPSubnet.IPAddress, "Primary IPAddress doesnt match")
389+
assert.Equal(t, len(req.SecondaryIPConfigs), len(svc.PodIPConfigState), "Secondary IP count doesnt match in PodIpConfig state")
390+
for ipID, ipStatus := range svc.PodIPConfigState {
391+
require.Contains(t, req.SecondaryIPConfigs, ipID, "PodIpConfigState has stale ipID")
392+
assert.Equal(t, req.SecondaryIPConfigs[ipID].IPAddress, ipStatus.IPAddress, "IPSubnet doesnt match")
393+
if expectedAssignedPods[ipID] != nil {
394+
require.Equal(t, types.Assigned, ipStatus.GetState(), "IPState is not Assigned")
395+
require.NotNil(t, ipStatus.PodInfo, "PodInfo is nil")
396+
}
397+
if pendingReleaseIPIDs[ipID] != nil {
398+
require.Equal(t, types.PendingRelease, ipStatus.GetState(), "IPState is not PendingRelease")
399+
}
400+
}
401+
}
402+
403+
func TestReconcileNCWithExistingStateAndPendingRelease(t *testing.T) {
404+
restartService()
405+
setEnv(t)
406+
setOrchestratorTypeInternal(cns.KubernetesCRD)
407+
408+
secondaryIPConfigs := make(map[string]cns.SecondaryIPConfig)
409+
for i := 6; i < 22; i++ {
410+
ipaddress := "10.0.0." + strconv.Itoa(i)
411+
secIPConfig := newSecondaryIPConfig(ipaddress, -1)
412+
ipID := uuid.New()
413+
secondaryIPConfigs[ipID.String()] = secIPConfig
414+
}
415+
expectedAssignedPods := map[string]cns.PodInfo{
416+
"10.0.0.6": cns.NewPodInfo("", "", "reconcilePod1", "PodNS1"),
417+
"10.0.0.7": cns.NewPodInfo("", "", "reconcilePod2", "PodNS1"),
418+
}
419+
pendingReleaseIPIDs := func() map[string]cns.PodInfo {
420+
numPending := rand.Intn(len(secondaryIPConfigs)) + 1 //nolint:gosec // weak rand is sufficient in test
421+
pendingIPs := map[string]cns.PodInfo{}
422+
for k := range secondaryIPConfigs {
423+
if numPending == 0 {
424+
break
425+
}
426+
if _, ok := expectedAssignedPods[secondaryIPConfigs[k].IPAddress]; ok {
427+
continue
428+
}
429+
pendingIPs[k] = nil
430+
numPending--
431+
}
432+
return pendingIPs
433+
}()
434+
req := generateNetworkContainerRequest(secondaryIPConfigs, "reconcileNc1", "-1")
435+
returnCode := svc.ReconcileNCState(req, expectedAssignedPods, &v1alpha.NodeNetworkConfig{
436+
Spec: v1alpha.NodeNetworkConfigSpec{
437+
IPsNotInUse: maps.Keys(pendingReleaseIPIDs),
438+
},
439+
})
440+
if returnCode != types.Success {
441+
t.Errorf("Unexpected failure on reconcile with no state %d", returnCode)
442+
}
443+
// confirm that the correct number of IPs are now PendingRelease
444+
assert.EqualValues(t, len(pendingReleaseIPIDs), len(svc.GetPendingReleaseIPConfigs()))
445+
// validate state
446+
containerStatus := svc.state.ContainerStatus[req.NetworkContainerid]
447+
assert.Equal(t, req.NetworkContainerid, containerStatus.ID, "NCID is not persisted")
448+
assert.Equal(t, req.NetworkContainerType, containerStatus.CreateNetworkContainerRequest.NetworkContainerType, "ContainerType is not persisted")
449+
assert.Equal(t, req.IPConfiguration.IPSubnet.IPAddress, containerStatus.CreateNetworkContainerRequest.IPConfiguration.IPSubnet.IPAddress, "Primary IPAddress doesnt match")
450+
assert.Equal(t, len(req.SecondaryIPConfigs), len(svc.PodIPConfigState), "Secondary IP count doesnt match in PodIpConfig state")
451+
for ipID, ipStatus := range svc.PodIPConfigState {
452+
require.Contains(t, req.SecondaryIPConfigs, ipID, "PodIpConfigState has stale ipID")
453+
assert.Equal(t, req.SecondaryIPConfigs[ipID].IPAddress, ipStatus.IPAddress, "IPSubnet doesnt match")
454+
if expectedAssignedPods[ipID] != nil {
455+
require.Equal(t, types.Assigned, ipStatus.GetState(), "IPState is not Assigned")
456+
require.NotNil(t, ipStatus.PodInfo, "PodInfo is nil")
457+
}
458+
if pendingReleaseIPIDs[ipID] != nil {
459+
require.Equal(t, types.PendingRelease, ipStatus.GetState(), "IPState is not PendingRelease")
460+
}
461+
}
340462
}
341463

342464
func TestReconcileNCWithExistingState(t *testing.T) {
@@ -378,7 +500,7 @@ func TestReconcileNCWithExistingState(t *testing.T) {
378500
t.Errorf("Unexpected failure on reconcile with no state %d", returnCode)
379501
}
380502

381-
validateNCStateAfterReconcile(t, req, expectedNcCount+1, expectedAssignedPods)
503+
validateNCStateAfterReconcile(t, req, expectedNcCount+1, expectedAssignedPods, nil)
382504
}
383505

384506
func TestReconcileNCWithExistingStateFromInterfaceID(t *testing.T) {
@@ -422,7 +544,7 @@ func TestReconcileNCWithExistingStateFromInterfaceID(t *testing.T) {
422544
t.Errorf("Unexpected failure on reconcile with no state %d", returnCode)
423545
}
424546

425-
validateNCStateAfterReconcile(t, req, expectedNcCount+1, expectedAssignedPods)
547+
validateNCStateAfterReconcile(t, req, expectedNcCount+1, expectedAssignedPods, nil)
426548
}
427549

428550
func TestReconcileNCWithSystemPods(t *testing.T) {
@@ -466,7 +588,7 @@ func TestReconcileNCWithSystemPods(t *testing.T) {
466588
}
467589

468590
delete(expectedAssignedPods, "192.168.0.1")
469-
validateNCStateAfterReconcile(t, req, expectedNcCount, expectedAssignedPods)
591+
validateNCStateAfterReconcile(t, req, expectedNcCount, expectedAssignedPods, nil)
470592
}
471593

472594
func setOrchestratorTypeInternal(orchestratorType string) {
@@ -655,24 +777,23 @@ func generateNetworkContainerRequest(secondaryIps map[string]cns.SecondaryIPConf
655777
return &req
656778
}
657779

658-
func validateNCStateAfterReconcile(t *testing.T, ncRequest *cns.CreateNetworkContainerRequest, expectedNcCount int, expectedAssignedPods map[string]cns.PodInfo) {
780+
func validateNCStateAfterReconcile(t *testing.T, ncRequest *cns.CreateNetworkContainerRequest, expectedNCCount int, expectedAssignedIPs, expectedPendingIPs map[string]cns.PodInfo) {
659781
if ncRequest == nil {
660782
// check svc ContainerStatus will be empty
661-
if len(svc.state.ContainerStatus) != expectedNcCount {
783+
if len(svc.state.ContainerStatus) != expectedNCCount {
662784
t.Fatalf("CNS has some stale ContainerStatus, count: %d, state: %+v", len(svc.state.ContainerStatus), svc.state.ContainerStatus)
663785
}
664786
} else {
665787
validateNetworkRequest(t, *ncRequest)
666788
}
667789

668-
if len(expectedAssignedPods) != len(svc.PodIPIDByPodInterfaceKey) {
669-
t.Fatalf("Unexpected assigned pods, actual: %d, expected: %d", len(svc.PodIPIDByPodInterfaceKey), len(expectedAssignedPods))
790+
if len(expectedAssignedIPs) != len(svc.PodIPIDByPodInterfaceKey) {
791+
t.Fatalf("Unexpected assigned pods, actual: %d, expected: %d", len(svc.PodIPIDByPodInterfaceKey), len(expectedAssignedIPs))
670792
}
671793

672-
for ipaddress, podInfo := range expectedAssignedPods {
673-
ipId := svc.PodIPIDByPodInterfaceKey[podInfo.Key()]
674-
ipConfigstate := svc.PodIPConfigState[ipId]
675-
794+
for ipaddress, podInfo := range expectedAssignedIPs {
795+
ipID := svc.PodIPIDByPodInterfaceKey[podInfo.Key()]
796+
ipConfigstate := svc.PodIPConfigState[ipID]
676797
if ipConfigstate.GetState() != types.Assigned {
677798
t.Fatalf("IpAddress %s is not marked as assigned to Pod: %+v, ipState: %+v", ipaddress, podInfo, ipConfigstate)
678799
}
@@ -682,7 +803,7 @@ func validateNCStateAfterReconcile(t *testing.T, ncRequest *cns.CreateNetworkCon
682803
t.Fatalf("IpAddress %s is not same, for Pod: %+v, actual ipState: %+v", ipaddress, podInfo, ipConfigstate)
683804
}
684805

685-
// Valdate pod context
806+
// Validate pod context
686807
if reflect.DeepEqual(ipConfigstate.PodInfo, podInfo) != true {
687808
t.Fatalf("OrchestrationContext: is not same, expected: %+v, actual %+v", ipConfigstate.PodInfo, podInfo)
688809
}
@@ -696,18 +817,26 @@ func validateNCStateAfterReconcile(t *testing.T, ncRequest *cns.CreateNetworkCon
696817

697818
// validate rest of Secondary IPs in Available state
698819
if ncRequest != nil {
699-
for secIpId, secIpConfig := range ncRequest.SecondaryIPConfigs {
700-
if _, exists := expectedAssignedPods[secIpConfig.IPAddress]; exists {
701-
continue
702-
}
703-
820+
for secIPID, secIPConfig := range ncRequest.SecondaryIPConfigs {
704821
// Validate IP state
705-
if secIpConfigState, found := svc.PodIPConfigState[secIpId]; found {
706-
if secIpConfigState.GetState() != types.Available {
707-
t.Fatalf("IPId: %s State is not Available, ipStatus: %+v", secIpId, secIpConfigState)
822+
if secIPConfigState, found := svc.PodIPConfigState[secIPID]; found {
823+
if _, exists := expectedAssignedIPs[secIPConfig.IPAddress]; exists {
824+
if secIPConfigState.GetState() != types.Assigned {
825+
t.Fatalf("IPId: %s State is not Assigned, ipStatus: %+v", secIPID, secIPConfigState)
826+
}
827+
continue
828+
}
829+
if _, exists := expectedPendingIPs[secIPID]; exists {
830+
if secIPConfigState.GetState() != types.PendingRelease {
831+
t.Fatalf("IPId: %s State is not PendingRelease, ipStatus: %+v", secIPID, secIPConfigState)
832+
}
833+
continue
834+
}
835+
if secIPConfigState.GetState() != types.Available {
836+
t.Fatalf("IPId: %s State is not Available, ipStatus: %+v", secIPID, secIPConfigState)
708837
}
709838
} else {
710-
t.Fatalf("IPId: %s, IpAddress: %+v State doesnt exists in PodIp Map", secIpId, secIpConfig)
839+
t.Fatalf("IPId: %s, IpAddress: %+v State doesnt exists in PodIp Map", secIPID, secIPConfig)
711840
}
712841
}
713842
}

cns/service/main.go

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1139,34 +1139,24 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn
11391139
poolMonitor := ipampool.NewMonitor(httpRestServiceImplementation, scopedcli, clusterSubnetStateChan, &poolOpts)
11401140
httpRestServiceImplementation.IPAMPoolMonitor = poolMonitor
11411141

1142-
// reconcile initial CNS state from CNI or apiserver.
1143-
// Only reconcile if there are any existing Pods using NC ips,
1144-
// else let the goal state be updated using a regular NNC Reconciler loop
1145-
podInfoByIP, err := podInfoByIPProvider.PodInfoByIP()
1146-
if err != nil {
1147-
return errors.Wrap(err, "failed to provide PodInfoByIP")
1148-
}
1149-
if len(podInfoByIP) > 0 {
1150-
logger.Printf("Reconciling initial CNS state as PodInfoByIP is not empty: %d", len(podInfoByIP))
1151-
1152-
// apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for
1153-
// aks addons to come up so retry a bit more aggresively here.
1154-
// will retry 10 times maxing out at a minute taking about 8 minutes before it gives up.
1155-
attempt := 0
1156-
err = retry.Do(func() error {
1157-
attempt++
1158-
logger.Printf("reconciling initial CNS state attempt: %d", attempt)
1159-
err = reconcileInitialCNSState(ctx, scopedcli, httpRestServiceImplementation, podInfoByIPProvider)
1160-
if err != nil {
1161-
logger.Errorf("failed to reconcile initial CNS state, attempt: %d err: %v", attempt, err)
1162-
}
1163-
return errors.Wrap(err, "failed to initialize CNS state")
1164-
}, retry.Context(ctx), retry.Delay(initCNSInitalDelay), retry.MaxDelay(time.Minute))
1142+
logger.Printf("Reconciling initial CNS state")
1143+
// apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for
1144+
// aks addons to come up so retry a bit more aggresively here.
1145+
// will retry 10 times maxing out at a minute taking about 8 minutes before it gives up.
1146+
attempt := 0
1147+
err = retry.Do(func() error {
1148+
attempt++
1149+
logger.Printf("reconciling initial CNS state attempt: %d", attempt)
1150+
err = reconcileInitialCNSState(ctx, scopedcli, httpRestServiceImplementation, podInfoByIPProvider)
11651151
if err != nil {
1166-
return err
1152+
logger.Errorf("failed to reconcile initial CNS state, attempt: %d err: %v", attempt, err)
11671153
}
1168-
logger.Printf("reconciled initial CNS state after %d attempts", attempt)
1154+
return errors.Wrap(err, "failed to initialize CNS state")
1155+
}, retry.Context(ctx), retry.Delay(initCNSInitalDelay), retry.MaxDelay(time.Minute))
1156+
if err != nil {
1157+
return err
11691158
}
1159+
logger.Printf("reconciled initial CNS state after %d attempts", attempt)
11701160

11711161
// start the pool Monitor before the Reconciler, since it needs to be ready to receive an
11721162
// NodeNetworkConfig update by the time the Reconciler tries to send it.

go.mod

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ require (
3333
github.com/spf13/viper v1.14.0
3434
github.com/stretchr/testify v1.8.2
3535
go.uber.org/zap v1.24.0
36-
golang.org/x/sys v0.3.0
36+
golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1
37+
golang.org/x/sys v0.6.0
3738
google.golang.org/grpc v1.52.0
3839
google.golang.org/protobuf v1.28.1
3940
k8s.io/api v0.26.0

go.sum

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0
844844
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
845845
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
846846
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
847+
golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1 h1:k/i9J1pBpvlfR+9QsetwPyERsqu1GIbi967PQMq3Ivc=
848+
golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1/go.mod h1:V1LtkGg67GoY2N1AnLN78QLrzxkLyJw7RJb1gzOOz9w=
847849
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
848850
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
849851
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
@@ -1023,8 +1025,8 @@ golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBc
10231025
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
10241026
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
10251027
golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
1026-
golang.org/x/sys v0.3.0 h1:w8ZOecv6NaNa/zC8944JTU3vz4u6Lagfk4RPQxv92NQ=
1027-
golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
1028+
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
1029+
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
10281030
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
10291031
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
10301032
golang.org/x/term v0.3.0 h1:qoo4akIqOcDME5bhc/NgxUdovd6BSS2uMsVjB56q1xI=

vendor/golang.org/x/exp/LICENSE

Lines changed: 27 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/golang.org/x/exp/PATENTS

Lines changed: 22 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)