Skip to content

Commit ea7be85

Browse files
committed
fix: generate cni conflist if NCs already exist in state (#1940)
* fix: generate cni conflist if NCs already exist in state * fix: lint * fix: data race in test * fix: lint in tests
1 parent 0fc09b4 commit ea7be85

File tree

2 files changed

+224
-11
lines changed

2 files changed

+224
-11
lines changed

cns/restserver/internalapi.go

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,13 @@ func (service *HTTPRestService) SyncHostNCVersion(ctx context.Context, channelMo
164164
service.Lock()
165165
defer service.Unlock()
166166
start := time.Now()
167-
err := service.syncHostNCVersion(ctx, channelMode)
167+
programmedNCCount, err := service.syncHostNCVersion(ctx, channelMode)
168+
// even if we get an error, we want to write the CNI conflist if we have any NC programmed to any version
169+
if programmedNCCount > 0 {
170+
// This will only be done once per lifetime of the CNS process. This function is threadsafe and will panic
171+
// if it fails, so it is safe to call in a non-preemptable goroutine.
172+
go service.MustGenerateCNIConflistOnce()
173+
}
168174
if err != nil {
169175
logger.Errorf("sync host error %v", err)
170176
}
@@ -174,8 +180,13 @@ func (service *HTTPRestService) SyncHostNCVersion(ctx context.Context, channelMo
174180

175181
var errNonExistentContainerStatus = errors.New("nonExistantContainerstatus")
176182

177-
func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMode string) error {
183+
// syncHostVersion updates the CNS state with the latest programmed versions of NCs attached to the VM. If any NC in local CNS state
184+
// does not match the version that DNC claims to have published, this function will call NMAgent and list the latest programmed versions of
185+
// all NCs and update the CNS state accordingly. This function returns the the total number of NCs on this VM that have been programmed to
186+
// some version, NOT the number of NCs that are up-to-date.
187+
func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMode string) (int, error) {
178188
outdatedNCs := map[string]struct{}{}
189+
programmedNCs := map[string]struct{}{}
179190
for idx := range service.state.ContainerStatus {
180191
// Will open a separate PR to convert all the NC version related variable to int. Change from string to int is a pain.
181192
localNCVersion, err := strconv.Atoi(service.state.ContainerStatus[idx].HostVersion)
@@ -194,13 +205,17 @@ func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMo
194205
} else if localNCVersion > dncNCVersion {
195206
logger.Errorf("NC version from NMAgent is larger than DNC, NC version from NMAgent is %d, NC version from DNC is %d", localNCVersion, dncNCVersion)
196207
}
208+
209+
if localNCVersion > -1 {
210+
programmedNCs[service.state.ContainerStatus[idx].ID] = struct{}{}
211+
}
197212
}
198213
if len(outdatedNCs) == 0 {
199-
return nil
214+
return len(programmedNCs), nil
200215
}
201216
ncVersionListResp, err := service.nma.GetNCVersionList(ctx)
202217
if err != nil {
203-
return errors.Wrap(err, "failed to get nc version list from nmagent")
218+
return len(programmedNCs), errors.Wrap(err, "failed to get nc version list from nmagent")
204219
}
205220

206221
nmaNCs := map[string]string{}
@@ -223,8 +238,13 @@ func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMo
223238
if !exist {
224239
// if we marked this NC as needs update, but it no longer exists in internal state when we reach
225240
// this point, our internal state has changed unexpectedly and we should bail out and try again.
226-
return errors.Wrapf(errNonExistentContainerStatus, "can't find NC with ID %s in service state, stop updating this host NC version", ncID)
241+
return len(programmedNCs), errors.Wrapf(errNonExistentContainerStatus, "can't find NC with ID %s in service state, stop updating this host NC version", ncID)
227242
}
243+
// if the NC still exists in state and is programmed to some version (doesn't have to be latest), add it to our set of NCs that have been programmed
244+
if nmaNCVersion > -1 {
245+
programmedNCs[ncID] = struct{}{}
246+
}
247+
228248
localNCVersion, err := strconv.Atoi(ncInfo.HostVersion)
229249
if err != nil {
230250
logger.Errorf("failed to parse host nc version string %s: %s", ncInfo.HostVersion, err)
@@ -247,14 +267,10 @@ func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMo
247267
// if we didn't empty out the needs update set, NMA has not programmed all the NCs we are expecting, and we
248268
// need to return an error indicating that
249269
if len(outdatedNCs) > 0 {
250-
return errors.Errorf("unabled to update some NCs: %v, missing or bad response from NMA", outdatedNCs)
270+
return len(programmedNCs), errors.Errorf("unabled to update some NCs: %v, missing or bad response from NMA", outdatedNCs)
251271
}
252272

253-
// if NMA has programmed all the NCs that we expect, we should write the CNI conflist. This will only be done
254-
// once per lifetime of the CNS process. This function is threadsafe and will panic if it fails, so it is safe
255-
// to call in a non-preemptable goroutine.
256-
go service.MustGenerateCNIConflistOnce()
257-
return nil
273+
return len(programmedNCs), nil
258274
}
259275

260276
// This API will be called by CNS RequestController on CRD update.

cns/restserver/internalapi_test.go

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,18 @@ import (
99
"os"
1010
"reflect"
1111
"strconv"
12+
"sync"
1213
"testing"
14+
"time"
1315

1416
"github.com/Azure/azure-container-networking/cns"
1517
"github.com/Azure/azure-container-networking/cns/fakes"
1618
"github.com/Azure/azure-container-networking/cns/types"
1719
"github.com/Azure/azure-container-networking/crd/nodenetworkconfig/api/v1alpha"
1820
nma "github.com/Azure/azure-container-networking/nmagent"
1921
"github.com/google/uuid"
22+
"github.com/pkg/errors"
23+
"github.com/stretchr/testify/assert"
2024
)
2125

2226
const (
@@ -655,3 +659,196 @@ func restartService() {
655659
os.Exit(1)
656660
}
657661
}
662+
663+
type mockCNIConflistGenerator struct {
664+
generatedCount int
665+
mutex sync.Mutex
666+
}
667+
668+
func (*mockCNIConflistGenerator) Close() error { return nil }
669+
func (m *mockCNIConflistGenerator) Generate() error {
670+
m.mutex.Lock()
671+
defer m.mutex.Unlock()
672+
m.generatedCount++
673+
return nil
674+
}
675+
676+
func (m *mockCNIConflistGenerator) getGeneratedCount() int {
677+
m.mutex.Lock()
678+
defer m.mutex.Unlock()
679+
return m.generatedCount
680+
}
681+
682+
// TestCNIConflistGenerationNewNC tests that discovering a new programmed NC in CNS state will trigger CNI conflist generation
683+
func TestCNIConflistGenerationNewNC(t *testing.T) {
684+
ncID := "some-new-nc" //nolint:goconst // value not shared across tests, can change without issue
685+
mockgen := &mockCNIConflistGenerator{}
686+
service := &HTTPRestService{
687+
cniConflistGenerator: mockgen,
688+
state: &httpRestServiceState{
689+
ContainerStatus: map[string]containerstatus{
690+
ncID: {
691+
ID: ncID,
692+
HostVersion: "-1",
693+
CreateNetworkContainerRequest: cns.CreateNetworkContainerRequest{
694+
Version: "0",
695+
},
696+
},
697+
},
698+
},
699+
nma: &fakes.NMAgentClientFake{
700+
GetNCVersionListF: func(_ context.Context) (nma.NCVersionList, error) {
701+
return nma.NCVersionList{
702+
Containers: []nma.NCVersion{
703+
{
704+
NetworkContainerID: ncID,
705+
Version: "0",
706+
},
707+
},
708+
}, nil
709+
},
710+
},
711+
}
712+
713+
service.SyncHostNCVersion(context.Background(), cns.CRD)
714+
// CNI conflist gen happens in goroutine so sleep for a second to let it run
715+
time.Sleep(time.Second)
716+
assert.Equal(t, 1, mockgen.getGeneratedCount())
717+
}
718+
719+
// TestCNIConflistGenerationExistingNC tests that if the CNS starts up with a NC already in its state, it will still generate the conflist
720+
func TestCNIConflistGenerationExistingNC(t *testing.T) {
721+
ncID := "some-existing-nc" //nolint:goconst // value not shared across tests, can change without issue
722+
mockgen := &mockCNIConflistGenerator{}
723+
service := &HTTPRestService{
724+
cniConflistGenerator: mockgen,
725+
state: &httpRestServiceState{
726+
ContainerStatus: map[string]containerstatus{
727+
ncID: {
728+
ID: ncID,
729+
HostVersion: "0",
730+
CreateNetworkContainerRequest: cns.CreateNetworkContainerRequest{
731+
Version: "0",
732+
},
733+
},
734+
},
735+
},
736+
}
737+
738+
service.SyncHostNCVersion(context.Background(), cns.CRD)
739+
// CNI conflist gen happens in goroutine so sleep for a second to let it run
740+
time.Sleep(time.Second)
741+
assert.Equal(t, 1, mockgen.getGeneratedCount())
742+
}
743+
744+
// TestCNIConflistGenerationNewNCTwice tests that discovering a new programmed NC in CNS state will trigger CNI conflist generation, but syncing
745+
// the host NC version a second time does not regenerate the conflist (conflist should only get generated once per binary lifetime)
746+
func TestCNIConflistGenerationNewNCTwice(t *testing.T) {
747+
ncID := "some-new-nc" //nolint:goconst // value not shared across tests, can change without issue
748+
mockgen := &mockCNIConflistGenerator{}
749+
service := &HTTPRestService{
750+
cniConflistGenerator: mockgen,
751+
state: &httpRestServiceState{
752+
ContainerStatus: map[string]containerstatus{
753+
ncID: {
754+
ID: ncID,
755+
HostVersion: "-1",
756+
CreateNetworkContainerRequest: cns.CreateNetworkContainerRequest{
757+
Version: "0",
758+
},
759+
},
760+
},
761+
},
762+
nma: &fakes.NMAgentClientFake{
763+
GetNCVersionListF: func(_ context.Context) (nma.NCVersionList, error) {
764+
return nma.NCVersionList{
765+
Containers: []nma.NCVersion{
766+
{
767+
NetworkContainerID: ncID,
768+
Version: "0",
769+
},
770+
},
771+
}, nil
772+
},
773+
},
774+
}
775+
776+
service.SyncHostNCVersion(context.Background(), cns.CRD)
777+
// CNI conflist gen happens in goroutine so sleep for a second to let it run
778+
time.Sleep(time.Second)
779+
assert.Equal(t, 1, mockgen.getGeneratedCount())
780+
781+
service.SyncHostNCVersion(context.Background(), cns.CRD)
782+
// CNI conflist gen happens in goroutine so sleep for a second to let it run
783+
time.Sleep(time.Second)
784+
assert.Equal(t, 1, mockgen.getGeneratedCount()) // should still be one
785+
}
786+
787+
// TestCNIConflistNotGenerated tests that the cni conflist is not generated if no NCs are programmed
788+
func TestCNIConflistNotGenerated(t *testing.T) {
789+
newNCID := "some-new-nc" //nolint:goconst // value not shared across tests, can change without issue
790+
mockgen := &mockCNIConflistGenerator{}
791+
service := &HTTPRestService{
792+
cniConflistGenerator: mockgen,
793+
state: &httpRestServiceState{
794+
ContainerStatus: map[string]containerstatus{
795+
newNCID: {
796+
ID: newNCID,
797+
HostVersion: "-1",
798+
CreateNetworkContainerRequest: cns.CreateNetworkContainerRequest{
799+
Version: "0",
800+
},
801+
},
802+
},
803+
},
804+
nma: &fakes.NMAgentClientFake{
805+
GetNCVersionListF: func(_ context.Context) (nma.NCVersionList, error) {
806+
return nma.NCVersionList{}, nil
807+
},
808+
},
809+
}
810+
811+
service.SyncHostNCVersion(context.Background(), cns.CRD)
812+
// CNI conflist gen happens in goroutine so sleep for a second to let it run
813+
time.Sleep(time.Second)
814+
assert.Equal(t, 0, mockgen.getGeneratedCount())
815+
}
816+
817+
// TestCNIConflistGenerationOnNMAError tests that the cni conflist is generated as long as we have at least one programmed NC even if
818+
// the call to NMA to list NCs fails
819+
func TestCNIConflistGenerationOnNMAError(t *testing.T) {
820+
newNCID := "some-new-nc" //nolint:goconst // value not shared across tests, can change without issue
821+
existingNCID := "some-existing-nc" //nolint:goconst // value not shared across tests, can change without issue
822+
mockgen := &mockCNIConflistGenerator{}
823+
service := &HTTPRestService{
824+
cniConflistGenerator: mockgen,
825+
state: &httpRestServiceState{
826+
ContainerStatus: map[string]containerstatus{
827+
newNCID: {
828+
ID: newNCID,
829+
HostVersion: "-1",
830+
CreateNetworkContainerRequest: cns.CreateNetworkContainerRequest{
831+
Version: "0",
832+
},
833+
},
834+
existingNCID: {
835+
ID: existingNCID,
836+
HostVersion: "0",
837+
CreateNetworkContainerRequest: cns.CreateNetworkContainerRequest{
838+
Version: "0",
839+
},
840+
},
841+
},
842+
},
843+
nma: &fakes.NMAgentClientFake{
844+
GetNCVersionListF: func(_ context.Context) (nma.NCVersionList, error) {
845+
return nma.NCVersionList{}, errors.New("some nma error")
846+
},
847+
},
848+
}
849+
850+
service.SyncHostNCVersion(context.Background(), cns.CRD)
851+
// CNI conflist gen happens in goroutine so sleep for a second to let it run
852+
time.Sleep(time.Second)
853+
assert.Equal(t, 1, mockgen.getGeneratedCount())
854+
}

0 commit comments

Comments
 (0)