Skip to content

Commit 3c48a34

Browse files
authored
Add a go routine to update NC host version from NMAgent periodically. (#714)
* Add a go routine to update NC host version from NMAgent periodically. If orchestrator type is CRD, update pending programming IPs as well. * Update NC version in test from 0 to -1, which will allow default IP state as Avaialable instead of pending programming. * Add secondary IP status updation when reconcile. Resovle conflicts manually. Update unit test nc version value. Update unit test nc version. Add get nmagent default value back for integ testing purpose. Unit test can be break by this change. Update default new IP CNS status to available. Assign value to host version if none exist in util.go Addressed feedback and perform cluster integ test with 1 sec frequent nc version update. Need to clean logNCSnapshots when send out PR. Update nc version associate with secondary ip. Add new nmagent api test. Add versionResponseWithoutToken.Containers log Add containerId from our runner sub. Add containerId from NMAgent team. Addressed feedback and add real nmagent logic. Add timeout when query nmagent for nc version. * Update comments. * Add context background with timeout function for syncing node nc version. * Add 5 second force update CNS pending programming IP to available logic. * Resovle merge conflict from master. * Debug and it pass all the test. This is the final version. Change the way of http get request to add context. Change channel to no buffer with same goroutine. Found always fall in ctx.Done() condition. Add channel close for get nc version list. Add milisecond unit for timeout. Testing with different context version. * Resolve merge conflict. * Remove force update pending programming IP to available logic. Remain retry if no response from NMAgent. Release pending programming IP when scale down. * Remain VMVersion, HostVersion variable name as it is and use the Version inside CreateNetworkContainerRequest. * Addressed team member feedback.
1 parent 2bb1fd9 commit 3c48a34

File tree

15 files changed

+368
-43
lines changed

15 files changed

+368
-43
lines changed

cns/cnsclient/cnsclient_test.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ func addTestStateToRestServer(t *testing.T, secondaryIps []string) {
5353
for _, secIpAddress := range secondaryIps {
5454
secIpConfig := cns.SecondaryIPConfig{
5555
IPAddress: secIpAddress,
56+
NCVersion: -1,
5657
}
5758
ipId := uuid.New()
5859
secondaryIPConfigs[ipId.String()] = secIpConfig
@@ -63,6 +64,9 @@ func addTestStateToRestServer(t *testing.T, secondaryIps []string) {
6364
NetworkContainerid: "testNcId1",
6465
IPConfiguration: ipConfig,
6566
SecondaryIPConfigs: secondaryIPConfigs,
67+
// Set it as -1 to be same as default host version.
68+
// It will allow secondary IPs status to be set as available.
69+
Version: "-1",
6670
}
6771

6872
returnCode := svc.CreateOrUpdateNetworkContainerInternal(req, fakes.NewFakeScalar(releasePercent, requestPercent, batchSize), fakes.NewFakeNodeNetworkConfigSpec(initPoolSize))
@@ -122,7 +126,7 @@ func TestMain(m *testing.M) {
122126
logger.InitLogger(logName, 0, 0, tmpLogDir+"/")
123127
config := common.ServiceConfig{}
124128

125-
httpRestService, err := restserver.NewHTTPRestService(&config, fakes.NewFakeImdsClient())
129+
httpRestService, err := restserver.NewHTTPRestService(&config, fakes.NewFakeImdsClient(), fakes.NewFakeNMAgentClient())
126130
svc = httpRestService.(*restserver.HTTPRestService)
127131
svc.Name = "cns-test-server"
128132
svc.IPAMPoolMonitor = fakes.NewIPAMPoolMonitorFake()

cns/configuration/configuration.go

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"io/ioutil"
77
"os"
88
"path/filepath"
9+
"time"
910

1011
"github.com/Azure/azure-container-networking/cns"
1112
"github.com/Azure/azure-container-networking/cns/logger"
@@ -17,14 +18,17 @@ const (
1718
)
1819

1920
type CNSConfig struct {
20-
TelemetrySettings TelemetrySettings
21-
ManagedSettings ManagedSettings
22-
ChannelMode string
23-
UseHTTPS bool
24-
TLSSubjectName string
25-
TLSCertificatePath string
26-
TLSPort string
27-
WireserverIP string
21+
TelemetrySettings TelemetrySettings
22+
ManagedSettings ManagedSettings
23+
ChannelMode string
24+
UseHTTPS bool
25+
TLSSubjectName string
26+
TLSCertificatePath string
27+
TLSPort string
28+
TLSEndpoint string
29+
WireserverIP string
30+
SyncHostNCVersionIntervalMs time.Duration
31+
SyncHostNCTimeoutMs time.Duration
2832
}
2933

3034
type TelemetrySettings struct {
@@ -121,11 +125,13 @@ func setManagedSettingDefaults(managedSettings *ManagedSettings) {
121125
}
122126
}
123127

124-
// Set Default values of CNS config if not specified
128+
// SetCNSConfigDefaults set default values of CNS config if not specified
125129
func SetCNSConfigDefaults(config *CNSConfig) {
126130
setTelemetrySettingDefaults(&config.TelemetrySettings)
127131
setManagedSettingDefaults(&config.ManagedSettings)
128132
if config.ChannelMode == "" {
129133
config.ChannelMode = cns.Direct
130134
}
135+
config.SyncHostNCVersionIntervalMs = 1000
136+
config.SyncHostNCTimeoutMs = 500
131137
}

cns/fakes/cnsfake.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
package fakes
22

33
import (
4+
"context"
45
"encoding/json"
56
"errors"
67
"sync"
8+
"time"
79

810
"github.com/Azure/azure-container-networking/cns"
911
"github.com/Azure/azure-container-networking/cns/common"
@@ -230,6 +232,11 @@ func (fake *HTTPServiceFake) SyncNodeStatus(string, string, string, json.RawMess
230232
return 0, ""
231233
}
232234

235+
// SyncHostNCVersion will update HostVersion in containerstatus.
236+
func (fake *HTTPServiceFake) SyncHostNCVersion(context.Context, string, time.Duration) {
237+
return
238+
}
239+
233240
func (fake *HTTPServiceFake) GetPendingProgramIPConfigs() []cns.IPConfigurationStatus {
234241
ipconfigs := []cns.IPConfigurationStatus{}
235242
for _, ipconfig := range fake.IPStateManager.PendingProgramIPConfigState {

cns/fakes/nmagentclientfake.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Copyright 2020 Microsoft. All rights reserved.
2+
// MIT License
3+
4+
package fakes
5+
6+
// NMAgentClientTest can be used to query to VM Host info.
7+
type NMAgentClientTest struct {
8+
}
9+
10+
// NewFakeNMAgentClient return a mock implemetation of NMAgentClient
11+
func NewFakeNMAgentClient() *NMAgentClientTest {
12+
return &NMAgentClientTest{}
13+
}
14+
15+
// GetNcVersionListWithOutToken is mock implementation to return nc version list.
16+
func (nmagentclient *NMAgentClientTest) GetNcVersionListWithOutToken(ncNeedUpdateList []string) map[string]int {
17+
ncVersionList := make(map[string]int)
18+
for _, ncID := range ncNeedUpdateList {
19+
ncVersionList[ncID] = 0
20+
}
21+
return ncVersionList
22+
}

cns/nmagentclient/nmagentclient.go

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,25 @@ import (
55
"encoding/json"
66
"encoding/xml"
77
"fmt"
8+
"io/ioutil"
89
"net/http"
10+
"strconv"
11+
"time"
912

1013
"github.com/Azure/azure-container-networking/cns/logger"
1114
"github.com/Azure/azure-container-networking/common"
1215
)
1316

1417
const (
1518
//GetNmAgentSupportedApiURLFmt Api endpoint to get supported Apis of NMAgent
16-
GetNmAgentSupportedApiURLFmt = "http://%s/machine/plugins/?comp=nmagent&type=GetSupportedApis"
17-
GetNetworkContainerVersionURLFmt = "http://%s/machine/plugins/?comp=nmagent&type=NetworkManagement/interfaces/%s/networkContainers/%s/version/authenticationToken/%s/api-version/1"
19+
GetNmAgentSupportedApiURLFmt = "http://%s/machine/plugins/?comp=nmagent&type=GetSupportedApis"
20+
GetNetworkContainerVersionURLFmt = "http://%s/machine/plugins/?comp=nmagent&type=NetworkManagement/interfaces/%s/networkContainers/%s/version/authenticationToken/%s/api-version/1"
21+
GetNcVersionListWithOutTokenURLFmt = "http://%s/machine/plugins/?comp=nmagent&type=NetworkManagement/interfaces/api-version/%s"
1822
)
1923

2024
//WireServerIP - wire server ip
2125
var WireserverIP = "168.63.129.16"
26+
var getNcVersionListWithOutTokenURLVersion = "2"
2227

2328
// NMANetworkContainerResponse - NMAgent response.
2429
type NMANetworkContainerResponse struct {
@@ -31,6 +36,36 @@ type NMAgentSupportedApisResponseXML struct {
3136
SupportedApis []string `xml:"type"`
3237
}
3338

39+
type ContainerInfo struct {
40+
NetworkContainerID string `json:"networkContainerId"`
41+
Version string `json:"version"`
42+
}
43+
44+
type NMANetworkContainerListResponse struct {
45+
ResponseCode string `json:"httpStatusCode"`
46+
Containers []ContainerInfo `json:"networkContainers"`
47+
}
48+
49+
// NMAgentClient is client to handle queries to nmagent
50+
type NMAgentClient struct {
51+
connectionURL string
52+
}
53+
54+
// NMAgentClientInterface has interface that nmagent client will handle
55+
type NMAgentClientInterface interface {
56+
GetNcVersionListWithOutToken(ncNeedUpdateList []string) map[string]int
57+
}
58+
59+
// NewNMAgentClient create a new nmagent client.
60+
func NewNMAgentClient(url string) (*NMAgentClient, error) {
61+
if url == "" {
62+
url = fmt.Sprintf(GetNcVersionListWithOutTokenURLFmt, WireserverIP, getNcVersionListWithOutTokenURLVersion)
63+
}
64+
return &NMAgentClient{
65+
connectionURL: url,
66+
}, nil
67+
}
68+
3469
// JoinNetwork joins the given network
3570
func JoinNetwork(
3671
networkID string,
@@ -149,3 +184,42 @@ func GetNmAgentSupportedApis(
149184
logger.Printf("[NMAgentClient][Response] GetNmAgentSupportedApis. Response: %+v.", response)
150185
return xmlDoc.SupportedApis, nil
151186
}
187+
188+
// GetNcVersionListWithOutToken query nmagent for programmed container version.
189+
func (nmagentclient *NMAgentClient) GetNcVersionListWithOutToken(ncNeedUpdateList []string) map[string]int {
190+
ncVersionList := make(map[string]int)
191+
now := time.Now()
192+
response, err := http.Get(nmagentclient.connectionURL)
193+
latency := time.Since(now)
194+
logger.Printf("[NMAgentClient][Response] GetNcVersionListWithOutToken response: %+v, latency is %d", response, latency.Milliseconds())
195+
196+
if response.StatusCode != http.StatusOK {
197+
logger.Printf("[NMAgentClient][Response] GetNcVersionListWithOutToken failed with %d, err is %v", response.StatusCode, err)
198+
return nil
199+
}
200+
201+
var nmaNcListResponse NMANetworkContainerListResponse
202+
rBytes, _ := ioutil.ReadAll(response.Body)
203+
logger.Printf("Response body is %v", rBytes)
204+
json.Unmarshal(rBytes, &nmaNcListResponse)
205+
if nmaNcListResponse.ResponseCode != strconv.Itoa(http.StatusOK) {
206+
logger.Printf("[NMAgentClient][Response] GetNcVersionListWithOutToken unmarshal failed with %s", rBytes)
207+
return nil
208+
}
209+
210+
var receivedNcVersionListInMap = make(map[string]string)
211+
for _, containers := range nmaNcListResponse.Containers {
212+
receivedNcVersionListInMap[containers.NetworkContainerID] = containers.Version
213+
}
214+
for _, ncID := range ncNeedUpdateList {
215+
if version, ok := receivedNcVersionListInMap[ncID]; ok {
216+
if versionInInt, err := strconv.Atoi(version); err != nil {
217+
logger.Printf("[NMAgentClient][Response] GetNcVersionListWithOutToken translate version %s to int failed with %s", version, err)
218+
} else {
219+
ncVersionList[ncID] = versionInInt
220+
logger.Printf("Containers id is %s, programmed NC version is %d", ncID, versionInInt)
221+
}
222+
}
223+
}
224+
return ncVersionList
225+
}

cns/requestcontroller/kubecontroller/crdtranslator.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"strconv"
77

88
"github.com/Azure/azure-container-networking/cns"
9+
"github.com/Azure/azure-container-networking/log"
910
nnc "github.com/Azure/azure-container-networking/nodenetworkconfig/api/v1alpha"
1011
)
1112

@@ -66,7 +67,10 @@ func CRDStatusToNCRequest(crdStatus nnc.NodeNetworkConfigStatus) (cns.CreateNetw
6667
NCVersion: ncVersion,
6768
}
6869
ncRequest.SecondaryIPConfigs[ipAssignment.Name] = secondaryIPConfig
70+
log.Debugf("Seconday IP Configs got set, name is %s, config is %v", ipAssignment.Name, secondaryIPConfig)
6971
}
72+
log.Printf("Set NC request info with NetworkContainerid %s, NetworkContainerType %s, NC Version %s",
73+
ncRequest.NetworkContainerid, ncRequest.NetworkContainerType, ncRequest.Version)
7074
}
7175

7276
//Only returning the first network container for now, later we will return a list

cns/restserver/api_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -912,7 +912,7 @@ func startService() {
912912
var err error
913913
// Create the service.
914914
config := common.ServiceConfig{}
915-
service, err = NewHTTPRestService(&config, fakes.NewFakeImdsClient())
915+
service, err = NewHTTPRestService(&config, fakes.NewFakeImdsClient(), fakes.NewFakeNMAgentClient())
916916
if err != nil {
917917
fmt.Printf("Failed to create CNS object %v\n", err)
918918
os.Exit(1)

cns/restserver/const.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ const (
3838
NetworkContainerVfpProgramComplete = 35
3939
NetworkContainerVfpProgramCheckSkipped = 36
4040
NmAgentSupportedApisError = 37
41+
UnsupportedNCVersion = 38
4142
UnexpectedError = 99
4243
)
4344

cns/restserver/internalapi.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,14 @@ package restserver
55

66
import (
77
"bytes"
8+
"context"
89
"encoding/json"
910
"fmt"
1011
"net/http"
1112
"net/http/httptest"
1213
"reflect"
14+
"strconv"
15+
"time"
1316

1417
"github.com/Azure/azure-container-networking/cns"
1518
"github.com/Azure/azure-container-networking/cns/logger"
@@ -143,6 +146,64 @@ func (service *HTTPRestService) SyncNodeStatus(dncEP, infraVnet, nodeID string,
143146
return
144147
}
145148

149+
// SyncHostNCVersion will check NC version from NMAgent and save it as host NC version in container status.
150+
// If NMAgent NC version got updated, CNS will refresh the pending programming IP status.
151+
func (service *HTTPRestService) SyncHostNCVersion(ctx context.Context, channelMode string, syncHostNCTimeoutMilliSec time.Duration) {
152+
var hostVersionNeedUpdateNcList []string
153+
service.RLock()
154+
for _, containerstatus := range service.state.ContainerStatus {
155+
// Will open a separate PR to convert all the NC version related variable to int. Change from string to int is a pain.
156+
hostVersion, err := strconv.Atoi(containerstatus.HostVersion)
157+
if err != nil {
158+
log.Errorf("Received err when change containerstatus.HostVersion %s to int, err msg %v", containerstatus.HostVersion, err)
159+
continue
160+
}
161+
dncNcVersion, err := strconv.Atoi(containerstatus.CreateNetworkContainerRequest.Version)
162+
if err != nil {
163+
log.Errorf("Received err when change nc version %s in containerstatus to int, err msg %v", containerstatus.CreateNetworkContainerRequest.Version, err)
164+
continue
165+
}
166+
// host NC version is the NC version from NMAgent, if it's smaller than NC version from DNC, then append it to indicate it needs update.
167+
if hostVersion < dncNcVersion {
168+
hostVersionNeedUpdateNcList = append(hostVersionNeedUpdateNcList, containerstatus.ID)
169+
} else if hostVersion > dncNcVersion {
170+
log.Errorf("NC version from NMAgent is larger than DNC, NC version from NMAgent is %d, NC version from DNC is %d", hostVersion, dncNcVersion)
171+
}
172+
}
173+
service.RUnlock()
174+
if len(hostVersionNeedUpdateNcList) > 0 {
175+
ncVersionChannel := make(chan map[string]int)
176+
ctxWithTimeout, _ := context.WithTimeout(ctx, syncHostNCTimeoutMilliSec*time.Millisecond)
177+
go func() {
178+
ncVersionChannel <- service.nmagentClient.GetNcVersionListWithOutToken(hostVersionNeedUpdateNcList)
179+
close(ncVersionChannel)
180+
}()
181+
select {
182+
case newHostNCVersionList := <-ncVersionChannel:
183+
if newHostNCVersionList == nil {
184+
logger.Errorf("Can't get vfp programmed NC version list from url without token")
185+
} else {
186+
service.Lock()
187+
for ncID, newHostNCVersion := range newHostNCVersionList {
188+
// Check whether it exist in service state and get the related nc info
189+
if ncInfo, exist := service.state.ContainerStatus[ncID]; !exist {
190+
logger.Errorf("Can't find NC with ID %s in service state, stop updating this host NC version", ncID)
191+
} else {
192+
if channelMode == cns.CRD {
193+
service.MarkIpsAsAvailableUntransacted(ncInfo.ID, newHostNCVersion)
194+
}
195+
ncInfo.HostVersion = strconv.Itoa(newHostNCVersion)
196+
service.state.ContainerStatus[ncID] = ncInfo
197+
}
198+
}
199+
service.Unlock()
200+
}
201+
case <-ctxWithTimeout.Done():
202+
logger.Errorf("Timeout when getting vfp programmed NC version list from url without token")
203+
}
204+
}
205+
}
206+
146207
// This API will be called by CNS RequestController on CRD update.
147208
func (service *HTTPRestService) ReconcileNCState(ncRequest *cns.CreateNetworkContainerRequest, podInfoByIp map[string]cns.KubernetesPodInfo, scalar nnc.Scaler, spec nnc.NodeNetworkConfigSpec) int {
148209
// check if ncRequest is null, then return as there is no CRD state yet

0 commit comments

Comments
 (0)