Skip to content

Commit b8e906b

Browse files
committed
don't go ready till we actually write out a conflist/syncnc
1 parent f5b9a90 commit b8e906b

File tree

5 files changed

+220
-195
lines changed

5 files changed

+220
-195
lines changed

cns/healthserver/healthz.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ func NewHealthzHandlerWithChecks(cfg *Config) (http.Handler, error) {
7070
return nil
7171
}
7272
}
73+
7374
return &healthz.Handler{
7475
Checks: checks,
7576
}, nil

cns/restserver/internalapi.go

Lines changed: 0 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,7 @@ import (
1313
"net/http/httptest"
1414
"net/netip"
1515
"reflect"
16-
"strconv"
1716
"strings"
18-
"time"
1917

2018
"github.com/Azure/azure-container-networking/cns"
2119
"github.com/Azure/azure-container-networking/cns/logger"
@@ -168,151 +166,6 @@ func (service *HTTPRestService) SyncNodeStatus(dncEP, infraVnet, nodeID string,
168166
return
169167
}
170168

171-
// SyncHostNCVersion will check NC version from NMAgent and save it as host NC version in container status.
172-
// If NMAgent NC version got updated, CNS will refresh the pending programming IP status.
173-
func (service *HTTPRestService) SyncHostNCVersion(ctx context.Context, channelMode string) {
174-
service.Lock()
175-
defer service.Unlock()
176-
start := time.Now()
177-
programmedNCCount, err := service.syncHostNCVersion(ctx, channelMode)
178-
179-
// even if we get an error, we want to write the CNI conflist if we have any NC programmed to any version
180-
if programmedNCCount > 0 {
181-
// This will only be done once per lifetime of the CNS process. This function is threadsafe and will panic
182-
// if it fails, so it is safe to call in a non-preemptable goroutine.
183-
go service.MustGenerateCNIConflistOnce()
184-
} else {
185-
logger.Printf("No NCs programmed on this host yet, skipping CNI conflist generation")
186-
}
187-
188-
if err != nil {
189-
logger.Errorf("sync host error %v", err)
190-
}
191-
192-
syncHostNCVersionCount.WithLabelValues(strconv.FormatBool(err == nil)).Inc()
193-
syncHostNCVersionLatency.WithLabelValues(strconv.FormatBool(err == nil)).Observe(time.Since(start).Seconds())
194-
}
195-
196-
var errNonExistentContainerStatus = errors.New("nonExistantContainerstatus")
197-
198-
// syncHostVersion updates the CNS state with the latest programmed versions of NCs attached to the VM. If any NC in local CNS state
199-
// does not match the version that DNC claims to have published, this function will call NMAgent and list the latest programmed versions of
200-
// all NCs and update the CNS state accordingly. This function returns the the total number of NCs on this VM that have been programmed to
201-
// some version, NOT the number of NCs that are up-to-date.
202-
func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMode string) (int, error) {
203-
outdatedNCs := map[string]struct{}{}
204-
programmedNCs := map[string]struct{}{}
205-
for idx := range service.state.ContainerStatus {
206-
// Will open a separate PR to convert all the NC version related variable to int. Change from string to int is a pain.
207-
localNCVersion, err := strconv.Atoi(service.state.ContainerStatus[idx].HostVersion)
208-
if err != nil {
209-
logger.Errorf("Received err when change containerstatus.HostVersion %s to int, err msg %v", service.state.ContainerStatus[idx].HostVersion, err)
210-
continue
211-
}
212-
dncNCVersion, err := strconv.Atoi(service.state.ContainerStatus[idx].CreateNetworkContainerRequest.Version)
213-
if err != nil {
214-
logger.Errorf("Received err when change nc version %s in containerstatus to int, err msg %v", service.state.ContainerStatus[idx].CreateNetworkContainerRequest.Version, err)
215-
continue
216-
}
217-
// host NC version is the NC version from NMAgent, if it's smaller than NC version from DNC, then append it to indicate it needs update.
218-
if localNCVersion < dncNCVersion {
219-
outdatedNCs[service.state.ContainerStatus[idx].ID] = struct{}{}
220-
} else if localNCVersion > dncNCVersion {
221-
logger.Errorf("NC version from NMAgent is larger than DNC, NC version from NMAgent is %d, NC version from DNC is %d", localNCVersion, dncNCVersion)
222-
}
223-
224-
if localNCVersion > -1 {
225-
programmedNCs[service.state.ContainerStatus[idx].ID] = struct{}{}
226-
}
227-
}
228-
if len(outdatedNCs) == 0 {
229-
return len(programmedNCs), nil
230-
}
231-
232-
ncVersionListResp, err := service.nma.GetNCVersionList(ctx)
233-
if err != nil {
234-
return len(programmedNCs), errors.Wrap(err, "failed to get nc version list from nmagent")
235-
}
236-
237-
// Get IMDS NC versions for delegated NIC scenarios
238-
imdsNCVersions, err := service.GetIMDSNCs(ctx)
239-
if err != nil {
240-
// If any of the NMA API check calls, imds calls fails assume that nma build doesn't have the latest changes and create empty map
241-
imdsNCVersions = make(map[string]string)
242-
}
243-
244-
nmaNCs := map[string]string{}
245-
for _, nc := range ncVersionListResp.Containers {
246-
nmaNCs[strings.ToLower(nc.NetworkContainerID)] = nc.Version
247-
}
248-
249-
// Consolidate both nc's from NMA and IMDS calls
250-
nmaProgrammedNCs := make(map[string]string)
251-
for ncID, version := range nmaNCs {
252-
nmaProgrammedNCs[ncID] = version
253-
}
254-
for ncID, version := range imdsNCVersions {
255-
if _, exists := nmaProgrammedNCs[ncID]; !exists {
256-
nmaProgrammedNCs[strings.ToLower(ncID)] = version
257-
} else {
258-
//nolint:staticcheck // SA1019: suppress deprecated logger.Warnf usage. Todo: legacy logger usage is consistent in cns repo. Migrates when all logger usage is migrated
259-
logger.Warnf("NC %s exists in both NMA and IMDS responses, which is not expected", ncID)
260-
}
261-
}
262-
hasNC.Set(float64(len(nmaProgrammedNCs)))
263-
for ncID := range outdatedNCs {
264-
nmaProgrammedNCVersionStr, ok := nmaProgrammedNCs[ncID]
265-
if !ok {
266-
// Neither NMA nor IMDS has this NC that we need programmed yet, bail out
267-
continue
268-
}
269-
nmaProgrammedNCVersion, err := strconv.Atoi(nmaProgrammedNCVersionStr)
270-
if err != nil {
271-
logger.Errorf("failed to parse container version of %s: %s", ncID, err)
272-
continue
273-
}
274-
// Check whether it exist in service state and get the related nc info
275-
ncInfo, exist := service.state.ContainerStatus[ncID]
276-
if !exist {
277-
// if we marked this NC as needs update, but it no longer exists in internal state when we reach
278-
// this point, our internal state has changed unexpectedly and we should bail out and try again.
279-
return len(programmedNCs), errors.Wrapf(errNonExistentContainerStatus, "can't find NC with ID %s in service state, stop updating this host NC version", ncID)
280-
}
281-
// if the NC still exists in state and is programmed to some version (doesn't have to be latest), add it to our set of NCs that have been programmed
282-
if nmaProgrammedNCVersion > -1 {
283-
programmedNCs[ncID] = struct{}{}
284-
}
285-
286-
localNCVersion, err := strconv.Atoi(ncInfo.HostVersion)
287-
if err != nil {
288-
logger.Errorf("failed to parse host nc version string %s: %s", ncInfo.HostVersion, err)
289-
continue
290-
}
291-
if localNCVersion > nmaProgrammedNCVersion {
292-
//nolint:staticcheck // SA1019: suppress deprecated logger.Printf usage. Todo: legacy logger usage is consistent in cns repo. Migrates when all logger usage is migrated
293-
logger.Errorf("NC version from consolidated sources is decreasing: have %d, got %d", localNCVersion, nmaProgrammedNCVersion)
294-
continue
295-
}
296-
if channelMode == cns.CRD {
297-
service.MarkIpsAsAvailableUntransacted(ncInfo.ID, nmaProgrammedNCVersion)
298-
}
299-
//nolint:staticcheck // SA1019: suppress deprecated logger.Printf usage. Todo: legacy logger usage is consistent in cns repo. Migrates when all logger usage is migrated
300-
logger.Printf("Updating NC %s host version from %s to %s", ncID, ncInfo.HostVersion, nmaProgrammedNCVersionStr)
301-
ncInfo.HostVersion = nmaProgrammedNCVersionStr
302-
logger.Printf("Updated NC %s host version to %s", ncID, ncInfo.HostVersion)
303-
service.state.ContainerStatus[ncID] = ncInfo
304-
// if we successfully updated the NC, pop it from the needs update set.
305-
delete(outdatedNCs, ncID)
306-
}
307-
// if we didn't empty out the needs update set, NMA has not programmed all the NCs we are expecting, and we
308-
// need to return an error indicating that
309-
if len(outdatedNCs) > 0 {
310-
return len(programmedNCs), errors.Errorf("Have outdated NCs: %v, Current Programmed nics from NMA/IMDS %v", outdatedNCs, programmedNCs)
311-
}
312-
313-
return len(programmedNCs), nil
314-
}
315-
316169
func (service *HTTPRestService) ReconcileIPAssignment(podInfoByIP map[string]cns.PodInfo, ncReqs []*cns.CreateNetworkContainerRequest) types.ResponseCode {
317170
// index all the secondary IP configs for all the nc reqs, for easier lookup later on.
318171
allSecIPsIdx := make(map[string]*cns.CreateNetworkContainerRequest)

cns/restserver/restserver.go

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ type HTTPRestService struct {
102102
PnpIDByMacAddress map[string]string
103103
imdsClient imdsClient
104104
nodesubnetIPFetcher *nodesubnet.IPFetcher
105+
ncSynced chan struct{}
105106
}
106107

107108
type CNIConflistGenerator interface {
@@ -382,20 +383,6 @@ func (service *HTTPRestService) Stop() {
382383
logger.Printf("[Azure CNS] Service stopped.")
383384
}
384385

385-
// MustGenerateCNIConflistOnce will generate the CNI conflist once if the service was initialized with
386-
// a conflist generator. If not, this is a no-op.
387-
func (service *HTTPRestService) MustGenerateCNIConflistOnce() {
388-
service.generateCNIConflistOnce.Do(func() {
389-
if err := service.cniConflistGenerator.Generate(); err != nil {
390-
panic("unable to generate cni conflist with error: " + err.Error())
391-
}
392-
393-
if err := service.cniConflistGenerator.Close(); err != nil {
394-
panic("unable to close the cni conflist output stream: " + err.Error())
395-
}
396-
})
397-
}
398-
399386
func (service *HTTPRestService) AttachIPConfigsHandlerMiddleware(middleware cns.IPConfigsHandlerMiddleware) {
400387
service.IPConfigsHandlerMiddleware = middleware
401388
}

0 commit comments

Comments
 (0)