-
Notifications
You must be signed in to change notification settings - Fork 260
feat: Creating NNC with HomeAz info in AKS-Swift Workflows when CNS starts up behind a configuration flag #3157
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
cc81929
95d78f7
48e0e1d
ff0bb65
0b1ca25
69cb096
e5f2637
c7129e1
978fe6c
600dd98
df7c25a
e0b0a89
8e9ce08
e11ded8
aa40cac
32c562e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,7 +12,7 @@ metadata: | |
| rules: | ||
| - apiGroups: ["acn.azure.com"] | ||
| resources: ["nodenetworkconfigs"] | ||
| verbs: ["get", "list", "watch", "patch", "update"] | ||
| verbs: ["create", "delete", "get", "list", "watch", "patch", "update"] | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i don't think we'll need delete
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The design is to move the node reconcile logic over to CNS. Why do we want to avoid CNS from doing this?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In what scenario could CNS know it should delete an NNC? |
||
| --- | ||
| apiVersion: rbac.authorization.k8s.io/v1 | ||
| kind: ClusterRole | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -35,6 +35,7 @@ type CNSConfig struct { | |
| EnableSubnetScarcity bool | ||
| EnableSwiftV2 bool | ||
| InitializeFromCNI bool | ||
| EnableHomeAZ bool | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the previous comment got lost but I think we need further discussion on the name of this field. I don't think
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. agree, flag should tightly describe what function it does, not the abstract name of the scenario. |
||
| KeyVaultSettings KeyVaultSettings | ||
| MSISettings MSISettings | ||
| ManageEndpointState bool | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -633,3 +633,15 @@ func (service *HTTPRestService) CreateOrUpdateNetworkContainerInternal(req *cns. | |
| func (service *HTTPRestService) SetVFForAccelnetNICs() error { | ||
| return service.setVFForAccelnetNICs() | ||
| } | ||
|
|
||
| // GetHomeAz - Get the Home Az for the Node where CNS is running. | ||
| func (service *HTTPRestService) GetHomeAz(ctx context.Context) (cns.GetHomeAzResponse, error) { | ||
| service.RLock() | ||
| defer service.RUnlock() | ||
| homeAzResponse := service.homeAzMonitor.GetHomeAz(ctx) | ||
| if homeAzResponse.Response.ReturnCode == types.NotFound { | ||
| return homeAzResponse, errors.New(homeAzResponse.Response.Message) | ||
| } | ||
|
|
||
| return homeAzResponse, nil | ||
| } | ||
|
Comment on lines
+637
to
+647
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what value is this adding over calling |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -668,9 +668,9 @@ func main() { | |
| } | ||
|
|
||
| homeAzMonitor := restserver.NewHomeAzMonitor(nmaClient, time.Duration(cnsconfig.AZRSettings.PopulateHomeAzCacheRetryIntervalSecs)*time.Second) | ||
| // homeAz monitor is only required when there is a direct channel between DNC and CNS. | ||
| // This will prevent the monitor from unnecessarily calling NMA APIs for other scenarios such as AKS-swift, swiftv2 | ||
| if cnsconfig.ChannelMode == cns.Direct { | ||
| // homeAz monitor is required when there is a direct channel between DNC and CNS OR when homeAz feature is enabled in CNS for AKS-Swift | ||
| // This will prevent the monitor from unnecessarily calling NMA APIs for other scenarios such as AKS-swift, swiftv2 when disabled. | ||
| if cnsconfig.ChannelMode == cns.Direct || cnsconfig.EnableHomeAZ { | ||
| homeAzMonitor.Start() | ||
| defer homeAzMonitor.Stop() | ||
| } | ||
|
|
@@ -1303,6 +1303,68 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn | |
| // TODO(rbtr): nodename and namespace should be in the cns config | ||
| directscopedcli := nncctrl.NewScopedClient(directnnccli, types.NamespacedName{Namespace: "kube-system", Name: nodeName}) | ||
|
|
||
| // Create the base NNC CRD if HomeAz is enabled | ||
| if cnsconfig.EnableHomeAZ { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. better to extract the scope opened by this flag to a different function. it will help clean up some code paths.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i would like to see NNC creation decoupled completely from HomeAz |
||
| var homeAzResponse cns.GetHomeAzResponse | ||
| if homeAzResponse, err = httpRestServiceImplementation.GetHomeAz(ctx); err != nil { | ||
| return errors.Wrap(err, "failed to get HomeAz") // error out so that CNS restarts. | ||
| } | ||
| az := homeAzResponse.HomeAzResponse.HomeAz | ||
ramiro-gamarra marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| logger.Printf("[Azure CNS] HomeAz: %d", az) | ||
| // Create Node Network Config CRD and update the Home Az field with the cache value from the HomeAz Monitor | ||
| var nnc *v1alpha.NodeNetworkConfig | ||
| err = retry.Do(func() error { | ||
| if nnc, err = directnnccli.Get(ctx, types.NamespacedName{Namespace: "kube-system", Name: nodeName}); err != nil { | ||
| return errors.Wrap(err, "[Azure CNS] failed to get existing NNC") | ||
| } | ||
| return nil | ||
| }, retry.Delay(initCNSInitalDelay), retry.Attempts(5)) | ||
|
|
||
| newNNC := createBaseNNC(node) | ||
| if err != nil { | ||
| logger.Printf("[Azure CNS] Creating new base NNC with Az %d", az) | ||
| newNNC.Spec.AvailabilityZone = az | ||
| nncErr := retry.Do(func() error { | ||
| if err = directcli.Create(ctx, newNNC); err != nil { | ||
| return errors.Wrap(err, "failed to create base NNC with HomeAz") | ||
| } | ||
| return nil | ||
| }, retry.Delay(initCNSInitalDelay), retry.Attempts(5)) | ||
| if nncErr != nil { | ||
| return errors.Wrap(nncErr, "[Azure CNS] failed to create base NNC with HomeAz") | ||
| } | ||
| } | ||
|
|
||
| if err == nil { // NNC exists, patch it with new HomeAz | ||
| logger.Printf("[Azure CNS] Patching existing NNC with new Spec with HomeAz %d", az) | ||
ramiro-gamarra marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| newNNC.ObjectMeta.ResourceVersion = nnc.ObjectMeta.ResourceVersion | ||
| newNNC.Spec.AvailabilityZone = az | ||
| newNNC.Spec.RequestedIPCount = nnc.Spec.RequestedIPCount | ||
| newNNC.Spec.IPsNotInUse = nnc.Spec.IPsNotInUse | ||
| newNNC.Status = nnc.Status | ||
| newNNC.UID = nnc.UID | ||
| newNNC.Name = nnc.Name | ||
| newNNC.Namespace = nnc.Namespace | ||
| newNNC.Annotations = nnc.Annotations | ||
| newNNC.Labels = nnc.Labels | ||
| newNNC.Finalizers = nnc.Finalizers | ||
| newNNC.OwnerReferences = nnc.OwnerReferences | ||
| newNNC.CreationTimestamp = nnc.CreationTimestamp | ||
| newNNC.DeletionTimestamp = nnc.DeletionTimestamp | ||
| nncErr := retry.Do(func() error { | ||
| patchErr := directcli.Update(ctx, newNNC, &client.UpdateOptions{}) | ||
| if patchErr != nil { | ||
| return errors.Wrap(patchErr, "failed to patch NNC") | ||
| } | ||
| return nil | ||
| }, retry.Delay(initCNSInitalDelay), retry.Attempts(5)) | ||
| if nncErr != nil { | ||
| return errors.Wrap(nncErr, "[AzureCNS] failed to patch NNC with Home Az") | ||
| } | ||
| } | ||
| logger.Printf("[Azure CNS] Updated HomeAz in NNC %v", newNNC) | ||
| } | ||
|
|
||
| logger.Printf("Reconciling initial CNS state") | ||
| // apiserver nnc might not be registered or api server might be down and crashloop backof puts us outside of 5-10 minutes we have for | ||
| // aks addons to come up so retry a bit more aggresively here. | ||
|
|
@@ -1513,6 +1575,18 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn | |
| return nil | ||
| } | ||
|
|
||
| func createBaseNNC(node *corev1.Node) *v1alpha.NodeNetworkConfig { | ||
| return &v1alpha.NodeNetworkConfig{ObjectMeta: metav1.ObjectMeta{ | ||
| Annotations: make(map[string]string), | ||
| Labels: map[string]string{ | ||
| "managed": "true", | ||
| "owner": node.Name, | ||
| }, | ||
| Name: node.Name, | ||
| Namespace: "kube-system", | ||
| }} | ||
| } | ||
|
|
||
| // getPodInfoByIPProvider returns a PodInfoByIPProvider that reads endpoint state from the configured source | ||
| func getPodInfoByIPProvider( | ||
| ctx context.Context, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.