diff --git a/.golangci.yml b/.golangci.yml index b0d9e59c04..b96a12221f 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -3,37 +3,37 @@ issues: max-issues-per-linter: 0 new-from-rev: origin/master linters: - presets: - - bugs - - error - - format - - performance - - unused + presets: + - bugs + - error + - format + - performance + - unused disable: - - maligned - - scopelint + - maligned + - scopelint + - gomnd enable: - - exportloopref - - goconst - - gocritic - - gocyclo - - gofmt - - gomnd - - goprintffuncname - - gosimple - - lll - - misspell - - nakedret - - promlinter - - revive + - exportloopref + - goconst + - gocritic + - gocyclo + - gofmt + - goprintffuncname + - gosimple + - lll + - misspell + - nakedret + - promlinter + - revive linters-settings: gocritic: enabled-tags: - - "diagnostic" - - "style" - - "performance" + - "diagnostic" + - "style" + - "performance" disabled-checks: - - "hugeParam" + - "hugeParam" govet: check-shadowing: true lll: diff --git a/.pipelines/mdnc/azure-cns-cni-1.4.39.1.yaml b/.pipelines/mdnc/azure-cns-cni-1.4.39.1.yaml index 47ae2f3557..c18a5d219c 100644 --- a/.pipelines/mdnc/azure-cns-cni-1.4.39.1.yaml +++ b/.pipelines/mdnc/azure-cns-cni-1.4.39.1.yaml @@ -12,7 +12,7 @@ metadata: rules: - apiGroups: ["acn.azure.com"] resources: ["nodenetworkconfigs"] - verbs: ["get", "list", "watch", "patch", "update"] + verbs: ["create", "delete", "get", "list", "watch", "patch", "update"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/.pipelines/mdnc/azure-cns-cni-1.5.28.yaml b/.pipelines/mdnc/azure-cns-cni-1.5.28.yaml index 3db8a46a3c..18f0eaee44 100644 --- a/.pipelines/mdnc/azure-cns-cni-1.5.28.yaml +++ b/.pipelines/mdnc/azure-cns-cni-1.5.28.yaml @@ -12,7 +12,7 @@ metadata: rules: - apiGroups: ["acn.azure.com"] resources: ["nodenetworkconfigs"] - verbs: ["get", "list", "watch", "patch", "update"] + verbs: ["create", "delete", "get", "list", "watch", "patch", "update"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/.pipelines/mdnc/azure-cns-cni-1.5.4.yaml b/.pipelines/mdnc/azure-cns-cni-1.5.4.yaml index f33fbba69d..40a3ec4bc2 100644 --- a/.pipelines/mdnc/azure-cns-cni-1.5.4.yaml +++ b/.pipelines/mdnc/azure-cns-cni-1.5.4.yaml @@ -12,7 +12,7 @@ metadata: rules: - apiGroups: ["acn.azure.com"] resources: ["nodenetworkconfigs"] - verbs: ["get", "list", "watch", "patch", "update"] + verbs: ["create", "delete", "get", "list", "watch", "patch", "update"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/.pipelines/mdnc/azure-cns-cni.yaml b/.pipelines/mdnc/azure-cns-cni.yaml index 469f25c669..a1e2ea5d42 100644 --- a/.pipelines/mdnc/azure-cns-cni.yaml +++ b/.pipelines/mdnc/azure-cns-cni.yaml @@ -12,7 +12,7 @@ metadata: rules: - apiGroups: ["acn.azure.com"] resources: ["nodenetworkconfigs"] - verbs: ["get", "list", "watch", "patch", "update"] + verbs: ["create", "delete", "get", "list", "watch", "patch", "update"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/cns/azure-cns.yaml b/cns/azure-cns.yaml index 260bb775c1..fefafb14de 100644 --- a/cns/azure-cns.yaml +++ b/cns/azure-cns.yaml @@ -12,7 +12,7 @@ metadata: rules: - apiGroups: ["acn.azure.com"] resources: ["nodenetworkconfigs"] - verbs: ["get", "list", "watch", "patch", "update"] + verbs: ["create", "delete", "get", "list", "watch", "patch", "update"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/cns/configuration/configuration.go b/cns/configuration/configuration.go index c183c9f0e2..ac3625bf73 100644 --- a/cns/configuration/configuration.go +++ b/cns/configuration/configuration.go @@ -35,6 +35,7 @@ type CNSConfig struct { EnableSubnetScarcity bool EnableSwiftV2 bool InitializeFromCNI bool + EnableHomeAZ bool KeyVaultSettings KeyVaultSettings MSISettings MSISettings ManageEndpointState bool diff --git a/cns/restserver/homeazmonitor.go b/cns/restserver/homeazmonitor.go index f73c6e35af..1277214fd3 100644 --- a/cns/restserver/homeazmonitor.go +++ b/cns/restserver/homeazmonitor.go @@ -135,7 +135,7 @@ func (h *HomeAzMonitor) Populate(ctx context.Context) { return default: - returnMessage := fmt.Sprintf("[HomeAzMonitor] failed with StatusCode: %d", apiError.StatusCode()) + returnMessage := fmt.Sprintf("[HomeAzMonitor] failed with StatusCode: %d and error %v", apiError.StatusCode(), err) returnCode := types.UnexpectedError h.update(returnCode, returnMessage, cns.HomeAzResponse{IsSupported: true}) return diff --git a/cns/restserver/internalapi.go b/cns/restserver/internalapi.go index fa41525030..4c2d80e258 100644 --- a/cns/restserver/internalapi.go +++ b/cns/restserver/internalapi.go @@ -633,3 +633,15 @@ func (service *HTTPRestService) CreateOrUpdateNetworkContainerInternal(req *cns. func (service *HTTPRestService) SetVFForAccelnetNICs() error { return service.setVFForAccelnetNICs() } + +// GetHomeAz - Get the Home Az for the Node where CNS is running. +func (service *HTTPRestService) GetHomeAz(ctx context.Context) (cns.GetHomeAzResponse, error) { + service.RLock() + defer service.RUnlock() + homeAzResponse := service.homeAzMonitor.GetHomeAz(ctx) + if homeAzResponse.Response.ReturnCode == types.NotFound { + return homeAzResponse, errors.New(homeAzResponse.Response.Message) + } + + return homeAzResponse, nil +} diff --git a/cns/service/main.go b/cns/service/main.go index 36f24dffa7..26ba92cb94 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -668,9 +668,9 @@ func main() { } homeAzMonitor := restserver.NewHomeAzMonitor(nmaClient, time.Duration(cnsconfig.AZRSettings.PopulateHomeAzCacheRetryIntervalSecs)*time.Second) - // homeAz monitor is only required when there is a direct channel between DNC and CNS. - // This will prevent the monitor from unnecessarily calling NMA APIs for other scenarios such as AKS-swift, swiftv2 - if cnsconfig.ChannelMode == cns.Direct { + // homeAz monitor is required when there is a direct channel between DNC and CNS OR when homeAz feature is enabled in CNS for AKS-Swift + // This will prevent the monitor from unnecessarily calling NMA APIs for other scenarios such as AKS-swift, swiftv2 when disabled. + if cnsconfig.ChannelMode == cns.Direct || cnsconfig.EnableHomeAZ { homeAzMonitor.Start() defer homeAzMonitor.Stop() } @@ -1303,6 +1303,68 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn // TODO(rbtr): nodename and namespace should be in the cns config directscopedcli := nncctrl.NewScopedClient(directnnccli, types.NamespacedName{Namespace: "kube-system", Name: nodeName}) + // Create the base NNC CRD if HomeAz is enabled + if cnsconfig.EnableHomeAZ { + var homeAzResponse cns.GetHomeAzResponse + if homeAzResponse, err = httpRestServiceImplementation.GetHomeAz(ctx); err != nil { + return errors.Wrap(err, "failed to get HomeAz") // error out so that CNS restarts. + } + az := homeAzResponse.HomeAzResponse.HomeAz + logger.Printf("[Azure CNS] HomeAz: %d", az) + // Create Node Network Config CRD and update the Home Az field with the cache value from the HomeAz Monitor + var nnc *v1alpha.NodeNetworkConfig + err = retry.Do(func() error { + if nnc, err = directnnccli.Get(ctx, types.NamespacedName{Namespace: "kube-system", Name: nodeName}); err != nil { + return errors.Wrap(err, "[Azure CNS] failed to get existing NNC") + } + return nil + }, retry.Delay(initCNSInitalDelay), retry.Attempts(5)) + + newNNC := createBaseNNC(node) + if err != nil { + logger.Printf("[Azure CNS] Creating new base NNC with Az %d", az) + newNNC.Spec.AvailabilityZone = az + nncErr := retry.Do(func() error { + if err = directcli.Create(ctx, newNNC); err != nil { + return errors.Wrap(err, "failed to create base NNC with HomeAz") + } + return nil + }, retry.Delay(initCNSInitalDelay), retry.Attempts(5)) + if nncErr != nil { + return errors.Wrap(nncErr, "[Azure CNS] failed to create base NNC with HomeAz") + } + } + + if err == nil { // NNC exists, patch it with new HomeAz + logger.Printf("[Azure CNS] Patching existing NNC with new Spec with HomeAz %d", az) + newNNC.ObjectMeta.ResourceVersion = nnc.ObjectMeta.ResourceVersion + newNNC.Spec.AvailabilityZone = az + newNNC.Spec.RequestedIPCount = nnc.Spec.RequestedIPCount + newNNC.Spec.IPsNotInUse = nnc.Spec.IPsNotInUse + newNNC.Status = nnc.Status + newNNC.UID = nnc.UID + newNNC.Name = nnc.Name + newNNC.Namespace = nnc.Namespace + newNNC.Annotations = nnc.Annotations + newNNC.Labels = nnc.Labels + newNNC.Finalizers = nnc.Finalizers + newNNC.OwnerReferences = nnc.OwnerReferences + newNNC.CreationTimestamp = nnc.CreationTimestamp + newNNC.DeletionTimestamp = nnc.DeletionTimestamp + nncErr := retry.Do(func() error { + patchErr := directcli.Update(ctx, newNNC, &client.UpdateOptions{}) + if patchErr != nil { + return errors.Wrap(patchErr, "failed to patch NNC") + } + return nil + }, retry.Delay(initCNSInitalDelay), retry.Attempts(5)) + if nncErr != nil { + return errors.Wrap(nncErr, "[AzureCNS] failed to patch NNC with Home Az") + } + } + logger.Printf("[Azure CNS] Updated HomeAz in NNC %v", newNNC) + } + logger.Printf("Reconciling initial CNS state") // apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for // aks addons to come up so retry a bit more aggresively here. @@ -1513,6 +1575,18 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn return nil } +func createBaseNNC(node *corev1.Node) *v1alpha.NodeNetworkConfig { + return &v1alpha.NodeNetworkConfig{ObjectMeta: metav1.ObjectMeta{ + Annotations: make(map[string]string), + Labels: map[string]string{ + "managed": "true", + "owner": node.Name, + }, + Name: node.Name, + Namespace: "kube-system", + }} +} + // getPodInfoByIPProvider returns a PodInfoByIPProvider that reads endpoint state from the configured source func getPodInfoByIPProvider( ctx context.Context, diff --git a/crd/nodenetworkconfig/api/v1alpha/nodenetworkconfig.go b/crd/nodenetworkconfig/api/v1alpha/nodenetworkconfig.go index 91691d6548..9379504c7e 100644 --- a/crd/nodenetworkconfig/api/v1alpha/nodenetworkconfig.go +++ b/crd/nodenetworkconfig/api/v1alpha/nodenetworkconfig.go @@ -48,8 +48,9 @@ type NodeNetworkConfigSpec struct { RequestedIPCount int64 `json:"requestedIPCount"` IPsNotInUse []string `json:"ipsNotInUse,omitempty"` // AvailabilityZone contains the Azure availability zone for the virtual machine where network containers are placed. + // NMA returns an int value for the availability zone. // +kubebuilder:validation:Optional - AvailabilityZone string `json:"availabilityZone,omitempty"` + AvailabilityZone uint `json:"availabilityZone,omitempty"` } // Status indicates the NNC reconcile status diff --git a/test/integration/manifests/cilium/cns-write-ovly.yaml b/test/integration/manifests/cilium/cns-write-ovly.yaml index 4f3d919757..296edd3f48 100644 --- a/test/integration/manifests/cilium/cns-write-ovly.yaml +++ b/test/integration/manifests/cilium/cns-write-ovly.yaml @@ -12,7 +12,7 @@ metadata: rules: - apiGroups: ["acn.azure.com"] resources: ["nodenetworkconfigs"] - verbs: ["get", "list", "watch", "patch", "update"] + verbs: ["create", "delete", "get", "list", "watch", "patch", "update"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole