Skip to content

Commit 7f223f2

Browse files
authored
fix: generate cni conflist if NCs already exist in state (#1940)
* fix: generate cni conflist if NCs already exist in state * fix: lint * fix: data race in test * fix: lint in tests
1 parent b9861e2 commit 7f223f2

File tree

2 files changed

+224
-11
lines changed

2 files changed

+224
-11
lines changed

cns/restserver/internalapi.go

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,13 @@ func (service *HTTPRestService) SyncHostNCVersion(ctx context.Context, channelMo
164164
service.Lock()
165165
defer service.Unlock()
166166
start := time.Now()
167-
err := service.syncHostNCVersion(ctx, channelMode)
167+
programmedNCCount, err := service.syncHostNCVersion(ctx, channelMode)
168+
// even if we get an error, we want to write the CNI conflist if we have any NC programmed to any version
169+
if programmedNCCount > 0 {
170+
// This will only be done once per lifetime of the CNS process. This function is threadsafe and will panic
171+
// if it fails, so it is safe to call in a non-preemptable goroutine.
172+
go service.MustGenerateCNIConflistOnce()
173+
}
168174
if err != nil {
169175
logger.Errorf("sync host error %v", err)
170176
}
@@ -174,8 +180,13 @@ func (service *HTTPRestService) SyncHostNCVersion(ctx context.Context, channelMo
174180

175181
var errNonExistentContainerStatus = errors.New("nonExistantContainerstatus")
176182

177-
func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMode string) error {
183+
// syncHostVersion updates the CNS state with the latest programmed versions of NCs attached to the VM. If any NC in local CNS state
184+
// does not match the version that DNC claims to have published, this function will call NMAgent and list the latest programmed versions of
185+
// all NCs and update the CNS state accordingly. This function returns the the total number of NCs on this VM that have been programmed to
186+
// some version, NOT the number of NCs that are up-to-date.
187+
func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMode string) (int, error) {
178188
outdatedNCs := map[string]struct{}{}
189+
programmedNCs := map[string]struct{}{}
179190
for idx := range service.state.ContainerStatus {
180191
// Will open a separate PR to convert all the NC version related variable to int. Change from string to int is a pain.
181192
localNCVersion, err := strconv.Atoi(service.state.ContainerStatus[idx].HostVersion)
@@ -194,13 +205,17 @@ func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMo
194205
} else if localNCVersion > dncNCVersion {
195206
logger.Errorf("NC version from NMAgent is larger than DNC, NC version from NMAgent is %d, NC version from DNC is %d", localNCVersion, dncNCVersion)
196207
}
208+
209+
if localNCVersion > -1 {
210+
programmedNCs[service.state.ContainerStatus[idx].ID] = struct{}{}
211+
}
197212
}
198213
if len(outdatedNCs) == 0 {
199-
return nil
214+
return len(programmedNCs), nil
200215
}
201216
ncVersionListResp, err := service.nma.GetNCVersionList(ctx)
202217
if err != nil {
203-
return errors.Wrap(err, "failed to get nc version list from nmagent")
218+
return len(programmedNCs), errors.Wrap(err, "failed to get nc version list from nmagent")
204219
}
205220

206221
nmaNCs := map[string]string{}
@@ -223,8 +238,13 @@ func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMo
223238
if !exist {
224239
// if we marked this NC as needs update, but it no longer exists in internal state when we reach
225240
// this point, our internal state has changed unexpectedly and we should bail out and try again.
226-
return errors.Wrapf(errNonExistentContainerStatus, "can't find NC with ID %s in service state, stop updating this host NC version", ncID)
241+
return len(programmedNCs), errors.Wrapf(errNonExistentContainerStatus, "can't find NC with ID %s in service state, stop updating this host NC version", ncID)
227242
}
243+
// if the NC still exists in state and is programmed to some version (doesn't have to be latest), add it to our set of NCs that have been programmed
244+
if nmaNCVersion > -1 {
245+
programmedNCs[ncID] = struct{}{}
246+
}
247+
228248
localNCVersion, err := strconv.Atoi(ncInfo.HostVersion)
229249
if err != nil {
230250
logger.Errorf("failed to parse host nc version string %s: %s", ncInfo.HostVersion, err)
@@ -247,14 +267,10 @@ func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMo
247267
// if we didn't empty out the needs update set, NMA has not programmed all the NCs we are expecting, and we
248268
// need to return an error indicating that
249269
if len(outdatedNCs) > 0 {
250-
return errors.Errorf("unabled to update some NCs: %v, missing or bad response from NMA", outdatedNCs)
270+
return len(programmedNCs), errors.Errorf("unabled to update some NCs: %v, missing or bad response from NMA", outdatedNCs)
251271
}
252272

253-
// if NMA has programmed all the NCs that we expect, we should write the CNI conflist. This will only be done
254-
// once per lifetime of the CNS process. This function is threadsafe and will panic if it fails, so it is safe
255-
// to call in a non-preemptable goroutine.
256-
go service.MustGenerateCNIConflistOnce()
257-
return nil
273+
return len(programmedNCs), nil
258274
}
259275

260276
// This API will be called by CNS RequestController on CRD update.

cns/restserver/internalapi_test.go

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,18 @@ import (
99
"os"
1010
"reflect"
1111
"strconv"
12+
"sync"
1213
"testing"
14+
"time"
1315

1416
"github.com/Azure/azure-container-networking/cns"
1517
"github.com/Azure/azure-container-networking/cns/fakes"
1618
"github.com/Azure/azure-container-networking/cns/types"
1719
"github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha"
1820
nma "github.com/Azure/azure-container-networking/nmagent"
1921
"github.com/google/uuid"
22+
"github.com/pkg/errors"
23+
"github.com/stretchr/testify/assert"
2024
)
2125

2226
const (
@@ -656,3 +660,196 @@ func restartService() {
656660
os.Exit(1)
657661
}
658662
}
663+
664+
type mockCNIConflistGenerator struct {
665+
generatedCount int
666+
mutex sync.Mutex
667+
}
668+
669+
func (*mockCNIConflistGenerator) Close() error { return nil }
670+
func (m *mockCNIConflistGenerator) Generate() error {
671+
m.mutex.Lock()
672+
defer m.mutex.Unlock()
673+
m.generatedCount++
674+
return nil
675+
}
676+
677+
func (m *mockCNIConflistGenerator) getGeneratedCount() int {
678+
m.mutex.Lock()
679+
defer m.mutex.Unlock()
680+
return m.generatedCount
681+
}
682+
683+
// TestCNIConflistGenerationNewNC tests that discovering a new programmed NC in CNS state will trigger CNI conflist generation
684+
func TestCNIConflistGenerationNewNC(t *testing.T) {
685+
ncID := "some-new-nc" //nolint:goconst // value not shared across tests, can change without issue
686+
mockgen := &mockCNIConflistGenerator{}
687+
service := &HTTPRestService{
688+
cniConflistGenerator: mockgen,
689+
state: &httpRestServiceState{
690+
ContainerStatus: map[string]containerstatus{
691+
ncID: {
692+
ID: ncID,
693+
HostVersion: "-1",
694+
CreateNetworkContainerRequest: cns.CreateNetworkContainerRequest{
695+
Version: "0",
696+
},
697+
},
698+
},
699+
},
700+
nma: &fakes.NMAgentClientFake{
701+
GetNCVersionListF: func(_ context.Context) (nma.NCVersionList, error) {
702+
return nma.NCVersionList{
703+
Containers: []nma.NCVersion{
704+
{
705+
NetworkContainerID: ncID,
706+
Version: "0",
707+
},
708+
},
709+
}, nil
710+
},
711+
},
712+
}
713+
714+
service.SyncHostNCVersion(context.Background(), cns.CRD)
715+
// CNI conflist gen happens in goroutine so sleep for a second to let it run
716+
time.Sleep(time.Second)
717+
assert.Equal(t, 1, mockgen.getGeneratedCount())
718+
}
719+
720+
// TestCNIConflistGenerationExistingNC tests that if the CNS starts up with a NC already in its state, it will still generate the conflist
721+
func TestCNIConflistGenerationExistingNC(t *testing.T) {
722+
ncID := "some-existing-nc" //nolint:goconst // value not shared across tests, can change without issue
723+
mockgen := &mockCNIConflistGenerator{}
724+
service := &HTTPRestService{
725+
cniConflistGenerator: mockgen,
726+
state: &httpRestServiceState{
727+
ContainerStatus: map[string]containerstatus{
728+
ncID: {
729+
ID: ncID,
730+
HostVersion: "0",
731+
CreateNetworkContainerRequest: cns.CreateNetworkContainerRequest{
732+
Version: "0",
733+
},
734+
},
735+
},
736+
},
737+
}
738+
739+
service.SyncHostNCVersion(context.Background(), cns.CRD)
740+
// CNI conflist gen happens in goroutine so sleep for a second to let it run
741+
time.Sleep(time.Second)
742+
assert.Equal(t, 1, mockgen.getGeneratedCount())
743+
}
744+
745+
// TestCNIConflistGenerationNewNCTwice tests that discovering a new programmed NC in CNS state will trigger CNI conflist generation, but syncing
746+
// the host NC version a second time does not regenerate the conflist (conflist should only get generated once per binary lifetime)
747+
func TestCNIConflistGenerationNewNCTwice(t *testing.T) {
748+
ncID := "some-new-nc" //nolint:goconst // value not shared across tests, can change without issue
749+
mockgen := &mockCNIConflistGenerator{}
750+
service := &HTTPRestService{
751+
cniConflistGenerator: mockgen,
752+
state: &httpRestServiceState{
753+
ContainerStatus: map[string]containerstatus{
754+
ncID: {
755+
ID: ncID,
756+
HostVersion: "-1",
757+
CreateNetworkContainerRequest: cns.CreateNetworkContainerRequest{
758+
Version: "0",
759+
},
760+
},
761+
},
762+
},
763+
nma: &fakes.NMAgentClientFake{
764+
GetNCVersionListF: func(_ context.Context) (nma.NCVersionList, error) {
765+
return nma.NCVersionList{
766+
Containers: []nma.NCVersion{
767+
{
768+
NetworkContainerID: ncID,
769+
Version: "0",
770+
},
771+
},
772+
}, nil
773+
},
774+
},
775+
}
776+
777+
service.SyncHostNCVersion(context.Background(), cns.CRD)
778+
// CNI conflist gen happens in goroutine so sleep for a second to let it run
779+
time.Sleep(time.Second)
780+
assert.Equal(t, 1, mockgen.getGeneratedCount())
781+
782+
service.SyncHostNCVersion(context.Background(), cns.CRD)
783+
// CNI conflist gen happens in goroutine so sleep for a second to let it run
784+
time.Sleep(time.Second)
785+
assert.Equal(t, 1, mockgen.getGeneratedCount()) // should still be one
786+
}
787+
788+
// TestCNIConflistNotGenerated tests that the cni conflist is not generated if no NCs are programmed
789+
func TestCNIConflistNotGenerated(t *testing.T) {
790+
newNCID := "some-new-nc" //nolint:goconst // value not shared across tests, can change without issue
791+
mockgen := &mockCNIConflistGenerator{}
792+
service := &HTTPRestService{
793+
cniConflistGenerator: mockgen,
794+
state: &httpRestServiceState{
795+
ContainerStatus: map[string]containerstatus{
796+
newNCID: {
797+
ID: newNCID,
798+
HostVersion: "-1",
799+
CreateNetworkContainerRequest: cns.CreateNetworkContainerRequest{
800+
Version: "0",
801+
},
802+
},
803+
},
804+
},
805+
nma: &fakes.NMAgentClientFake{
806+
GetNCVersionListF: func(_ context.Context) (nma.NCVersionList, error) {
807+
return nma.NCVersionList{}, nil
808+
},
809+
},
810+
}
811+
812+
service.SyncHostNCVersion(context.Background(), cns.CRD)
813+
// CNI conflist gen happens in goroutine so sleep for a second to let it run
814+
time.Sleep(time.Second)
815+
assert.Equal(t, 0, mockgen.getGeneratedCount())
816+
}
817+
818+
// TestCNIConflistGenerationOnNMAError tests that the cni conflist is generated as long as we have at least one programmed NC even if
819+
// the call to NMA to list NCs fails
820+
func TestCNIConflistGenerationOnNMAError(t *testing.T) {
821+
newNCID := "some-new-nc" //nolint:goconst // value not shared across tests, can change without issue
822+
existingNCID := "some-existing-nc" //nolint:goconst // value not shared across tests, can change without issue
823+
mockgen := &mockCNIConflistGenerator{}
824+
service := &HTTPRestService{
825+
cniConflistGenerator: mockgen,
826+
state: &httpRestServiceState{
827+
ContainerStatus: map[string]containerstatus{
828+
newNCID: {
829+
ID: newNCID,
830+
HostVersion: "-1",
831+
CreateNetworkContainerRequest: cns.CreateNetworkContainerRequest{
832+
Version: "0",
833+
},
834+
},
835+
existingNCID: {
836+
ID: existingNCID,
837+
HostVersion: "0",
838+
CreateNetworkContainerRequest: cns.CreateNetworkContainerRequest{
839+
Version: "0",
840+
},
841+
},
842+
},
843+
},
844+
nma: &fakes.NMAgentClientFake{
845+
GetNCVersionListF: func(_ context.Context) (nma.NCVersionList, error) {
846+
return nma.NCVersionList{}, errors.New("some nma error")
847+
},
848+
},
849+
}
850+
851+
service.SyncHostNCVersion(context.Background(), cns.CRD)
852+
// CNI conflist gen happens in goroutine so sleep for a second to let it run
853+
time.Sleep(time.Second)
854+
assert.Equal(t, 1, mockgen.getGeneratedCount())
855+
}

0 commit comments

Comments
 (0)