From 3761feb0c7f38d4ec753d2222279bc257016ccb4 Mon Sep 17 00:00:00 2001 From: Bryan Cox Date: Fri, 24 Oct 2025 12:53:11 -0400 Subject: [PATCH 1/3] docs: Add load balancer zone redundancy documentation Add comprehensive documentation for zone-redundant load balancer feature: - Explain Azure zone redundancy concepts for load balancers - Provide configuration examples for all load balancer types: - Internal load balancers (API server) - Public load balancers - Node outbound load balancers - Control plane outbound load balancers - Include complete highly available cluster example - Document important considerations: - Immutability of zones after creation - Region support requirements - Standard SKU requirement - Backend pool placement best practices - Provide migration guidance for existing clusters - Add troubleshooting section - Document best practices --- docs/book/src/SUMMARY.md | 1 + .../load-balancer-zone-redundancy.md | 294 ++++++++++++++++++ 2 files changed, 295 insertions(+) create mode 100644 docs/book/src/self-managed/load-balancer-zone-redundancy.md diff --git a/docs/book/src/SUMMARY.md b/docs/book/src/SUMMARY.md index b437fc7b3e3..a4f14b92d0e 100644 --- a/docs/book/src/SUMMARY.md +++ b/docs/book/src/SUMMARY.md @@ -35,6 +35,7 @@ - [Externally managed Azure infrastructure](./self-managed/externally-managed-azure-infrastructure.md) - [Failure Domains](./self-managed/failure-domains.md) - [Flatcar](./self-managed/flatcar.md) + - [Load Balancer Zone Redundancy](./self-managed/load-balancer-zone-redundancy.md) - [GPU-enabled Clusters](./self-managed/gpu.md) - [IPv6](./self-managed/ipv6.md) - [Machine Pools (VMSS)](./self-managed/machinepools.md) diff --git a/docs/book/src/self-managed/load-balancer-zone-redundancy.md b/docs/book/src/self-managed/load-balancer-zone-redundancy.md new file mode 100644 index 00000000000..8e4ea121fe9 --- /dev/null +++ b/docs/book/src/self-managed/load-balancer-zone-redundancy.md @@ -0,0 +1,294 @@ +# Load Balancer Zone Redundancy + +## Zone Redundancy for Load Balancers in Azure + +Azure Load Balancers can be configured as zone-redundant to ensure high availability across multiple availability zones within a region. A zone-redundant load balancer distributes traffic across all zones, providing resilience against zone failures. + +**Key concepts:** +- Zone redundancy for load balancers is configured through the **frontend IP configuration** +- For **internal load balancers**, zones are set directly on the frontend IP configuration +- For **public load balancers**, zones are inherited from the zone configuration of the public IP address +- **Zones are immutable** - once created, they cannot be changed, added, or removed + +Full details can be found in the [Azure Load Balancer reliability documentation](https://learn.microsoft.com/azure/reliability/reliability-load-balancer). + +## Configuring Zone-Redundant Load Balancers + +CAPZ exposes the `availabilityZones` field on load balancer specifications to enable zone redundancy. + +### Internal Load Balancers + +For internal load balancers (such as a private API server), you can configure availability zones directly on the load balancer spec: + +```yaml +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: my-cluster + namespace: default +spec: + location: eastus + networkSpec: + apiServerLB: + type: Internal + availabilityZones: + - "1" + - "2" + - "3" +``` + +This configuration creates a zone-redundant internal load balancer with frontend IPs distributed across zones 1, 2, and 3. + +### Public Load Balancers + +For public load balancers, zone redundancy is primarily controlled by the public IP addresses. However, you can still set `availabilityZones` on the load balancer for consistency: + +```yaml +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: my-cluster + namespace: default +spec: + location: eastus + networkSpec: + apiServerLB: + type: Public + availabilityZones: + - "1" + - "2" + - "3" +``` + +> **Note**: For public load balancers, ensure that the associated public IP addresses are also zone-redundant for complete zone redundancy. + +### Node Outbound Load Balancer + +You can also configure zone redundancy for node outbound load balancers: + +```yaml +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: my-cluster + namespace: default +spec: + location: westus2 + networkSpec: + nodeOutboundLB: + type: Public + availabilityZones: + - "1" + - "2" + - "3" + frontendIPs: + - name: node-outbound-ip + publicIP: + name: node-outbound-publicip +``` + +### Control Plane Outbound Load Balancer + +For clusters with private API servers, you can configure the control plane outbound load balancer: + +```yaml +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: my-cluster + namespace: default +spec: + location: eastus + networkSpec: + apiServerLB: + type: Internal + availabilityZones: + - "1" + - "2" + - "3" + controlPlaneOutboundLB: + availabilityZones: + - "1" + - "2" + - "3" + frontendIPs: + - name: controlplane-outbound-ip + publicIP: + name: controlplane-outbound-publicip +``` + +## Complete Example: Highly Available Cluster + +Here's a complete example of a highly available cluster with zone-redundant load balancers: + +```yaml +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: ha-cluster + namespace: default +spec: + location: eastus + resourceGroup: ha-cluster-rg + networkSpec: + # Zone-redundant internal API server load balancer + apiServerLB: + type: Internal + name: ha-cluster-internal-lb + availabilityZones: + - "1" + - "2" + - "3" + frontendIPs: + - name: api-server-internal-ip + privateIPAddress: "10.0.0.100" + + # Zone-redundant control plane outbound load balancer + controlPlaneOutboundLB: + name: ha-cluster-cp-outbound-lb + availabilityZones: + - "1" + - "2" + - "3" + frontendIPs: + - name: cp-outbound-ip + publicIP: + name: cp-outbound-publicip + + # Zone-redundant node outbound load balancer + nodeOutboundLB: + name: ha-cluster-node-outbound-lb + availabilityZones: + - "1" + - "2" + - "3" + frontendIPs: + - name: node-outbound-ip + publicIP: + name: node-outbound-publicip + + # Custom VNet configuration + vnet: + name: ha-cluster-vnet + cidrBlocks: + - "10.0.0.0/16" + + subnets: + - name: control-plane-subnet + role: control-plane + cidrBlocks: + - "10.0.0.0/24" + - name: node-subnet + role: node + cidrBlocks: + - "10.0.1.0/24" +``` + +## Important Considerations + +### Immutability + +Once a load balancer is created with availability zones, the zone configuration **cannot be changed**. This is an Azure platform limitation. To change zones, you must: + +1. Delete the load balancer +2. Recreate it with the new zone configuration + +> **Warning**: Changing load balancer zones requires recreating the cluster's load balancers, which will cause service interruption. + +### Region Support + +Not all Azure regions support availability zones. Before configuring zone-redundant load balancers, verify that your target region supports zones: + +```bash +az vm list-skus -l --zone -o table +``` + +### Standard SKU Requirement + +Zone-redundant load balancers require the **Standard SKU**. CAPZ uses Standard SKU by default, so no additional configuration is needed. + +### Backend Pool Placement + +For optimal high availability: +- Spread your control plane nodes across all availability zones +- Spread your worker nodes across all availability zones +- Ensure backend pool members exist in the same zones as the load balancer + +See the [Failure Domains](failure-domains.md) documentation for details on distributing VMs across zones. + +## Migration from Non-Zone-Redundant Load Balancers + +If you have an existing cluster without zone-redundant load balancers, migration requires careful planning: + +### For New Clusters + +When creating a new cluster, simply include the `availabilityZones` field in your `AzureCluster` specification from the start. + +### For Existing Clusters + +**Migration is not straightforward** because: +1. Azure does not allow modifying zones on existing load balancers +2. CAPZ's webhook validation prevents zone changes to enforce this immutability +3. Load balancer recreation requires cluster downtime + +**Recommended approach for existing clusters:** +1. Create a new cluster with zone-redundant configuration +2. Migrate workloads to the new cluster +3. Decommission the old cluster + +**Alternative for development/test clusters:** +1. Delete the `AzureCluster` resource (this will delete the infrastructure) +2. Recreate the `AzureCluster` with `availabilityZones` configured +3. Reconcile the cluster + +> **Important**: The alternative approach causes significant downtime and should only be used in non-production environments. + +## Troubleshooting + +### Load Balancer Not Zone-Redundant + +If your load balancer is not zone-redundant despite configuration: + +1. **Verify the zones are set in spec:** + ```bash + kubectl get azurecluster -o jsonpath='{.spec.networkSpec.apiServerLB.availabilityZones}' + ``` + +2. **Check the Azure load balancer frontend configuration:** + ```bash + az network lb frontend-ip show \ + --lb-name \ + --name \ + --resource-group \ + --query zones + ``` + +3. **Verify the region supports zones:** + ```bash + az vm list-skus -l --zone -o table | grep -i standardsku + ``` + +### Validation Errors + +If you encounter validation errors when updating `availabilityZones`: + +``` +field is immutable +``` + +This is expected behavior. Zones cannot be modified after creation. You must recreate the load balancer with the desired configuration. + +## Best Practices + +1. **Enable zone redundancy from the start** when creating new clusters in zone-capable regions +2. **Use all available zones** in the region (typically 3 zones) for maximum resilience +3. **Spread backend pools** across all zones configured on the load balancer +4. **Monitor zone health** and be prepared to handle zone failures +5. **Test failover scenarios** to ensure your cluster can survive zone outages +6. **Document your zone configuration** for disaster recovery procedures + +## Related Documentation + +- [Failure Domains](failure-domains.md) - Configure VMs across availability zones +- [API Server Endpoint](api-server-endpoint.md) - API server load balancer configuration +- [Azure Load Balancer Reliability](https://learn.microsoft.com/azure/reliability/reliability-load-balancer) - Azure official documentation From 4a09fe5763c62c5943006bf7efc13c876df5e1e6 Mon Sep 17 00:00:00 2001 From: Bryan Cox Date: Thu, 11 Dec 2025 13:44:03 -0500 Subject: [PATCH 2/3] Add support for zone-redundant load balancers - Add AvailabilityZones field to LoadBalancerSpec API - Implement zone support in service layer for frontend IP configs - Add webhook validation for zone immutability - Update generated CRD manifests - Add zone redundancy to private cluster flavor --- api/v1beta1/azurecluster_webhook.go | 37 +++++++++++++++++++ api/v1beta1/types.go | 8 ++++ api/v1beta1/zz_generated.deepcopy.go | 5 +++ azure/scope/cluster.go | 4 ++ azure/services/loadbalancers/spec.go | 12 ++++++ ...ucture.cluster.x-k8s.io_azureclusters.yaml | 33 +++++++++++++++++ templates/cluster-template-private.yaml | 4 ++ .../flavors/private/patches/private-lb.yaml | 4 ++ .../ci/cluster-template-prow-private.yaml | 4 ++ 9 files changed, 111 insertions(+) diff --git a/api/v1beta1/azurecluster_webhook.go b/api/v1beta1/azurecluster_webhook.go index def1503c053..816d1a0afdd 100644 --- a/api/v1beta1/azurecluster_webhook.go +++ b/api/v1beta1/azurecluster_webhook.go @@ -169,6 +169,43 @@ func (*AzureClusterWebhook) ValidateUpdate(_ context.Context, oldRaw, newObj run allErrs = append(allErrs, err) } + // Validate availability zones are immutable for load balancers + if c.Spec.NetworkSpec.APIServerLB != nil && old.Spec.NetworkSpec.APIServerLB != nil { + if !webhookutils.EnsureStringSlicesAreEquivalent( + c.Spec.NetworkSpec.APIServerLB.AvailabilityZones, + old.Spec.NetworkSpec.APIServerLB.AvailabilityZones) { + allErrs = append(allErrs, + field.Invalid( + field.NewPath("spec", "networkSpec", "apiServerLB", "availabilityZones"), + c.Spec.NetworkSpec.APIServerLB.AvailabilityZones, + "field is immutable")) + } + } + + if c.Spec.NetworkSpec.NodeOutboundLB != nil && old.Spec.NetworkSpec.NodeOutboundLB != nil { + if !webhookutils.EnsureStringSlicesAreEquivalent( + c.Spec.NetworkSpec.NodeOutboundLB.AvailabilityZones, + old.Spec.NetworkSpec.NodeOutboundLB.AvailabilityZones) { + allErrs = append(allErrs, + field.Invalid( + field.NewPath("spec", "networkSpec", "nodeOutboundLB", "availabilityZones"), + c.Spec.NetworkSpec.NodeOutboundLB.AvailabilityZones, + "field is immutable")) + } + } + + if c.Spec.NetworkSpec.ControlPlaneOutboundLB != nil && old.Spec.NetworkSpec.ControlPlaneOutboundLB != nil { + if !webhookutils.EnsureStringSlicesAreEquivalent( + c.Spec.NetworkSpec.ControlPlaneOutboundLB.AvailabilityZones, + old.Spec.NetworkSpec.ControlPlaneOutboundLB.AvailabilityZones) { + allErrs = append(allErrs, + field.Invalid( + field.NewPath("spec", "networkSpec", "controlPlaneOutboundLB", "availabilityZones"), + c.Spec.NetworkSpec.ControlPlaneOutboundLB.AvailabilityZones, + "field is immutable")) + } + } + allErrs = append(allErrs, c.validateSubnetUpdate(old)...) if len(allErrs) == 0 { diff --git a/api/v1beta1/types.go b/api/v1beta1/types.go index 3423976cbdd..d20c1f9539c 100644 --- a/api/v1beta1/types.go +++ b/api/v1beta1/types.go @@ -364,6 +364,14 @@ type LoadBalancerSpec struct { // BackendPool describes the backend pool of the load balancer. // +optional BackendPool BackendPool `json:"backendPool,omitempty"` + // AvailabilityZones is a list of availability zones for the load balancer. + // When specified for an internal load balancer, the frontend IP configuration + // will be zone-redundant across the specified zones. + // For public load balancers, this should be set on the associated public IP addresses instead. + // +optional + // +listType=set + // +kubebuilder:validation:MaxItems=3 + AvailabilityZones []string `json:"availabilityZones,omitempty"` LoadBalancerClassSpec `json:",inline"` } diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index 71792a2ce24..a281b35dc05 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -3441,6 +3441,11 @@ func (in *LoadBalancerSpec) DeepCopyInto(out *LoadBalancerSpec) { **out = **in } out.BackendPool = in.BackendPool + if in.AvailabilityZones != nil { + in, out := &in.AvailabilityZones, &out.AvailabilityZones + *out = make([]string, len(*in)) + copy(*out, *in) + } in.LoadBalancerClassSpec.DeepCopyInto(&out.LoadBalancerClassSpec) } diff --git a/azure/scope/cluster.go b/azure/scope/cluster.go index c9760b5ad7e..0c00daceb45 100644 --- a/azure/scope/cluster.go +++ b/azure/scope/cluster.go @@ -267,6 +267,7 @@ func (s *ClusterScope) LBSpecs() []azure.ResourceSpecGetter { IdleTimeoutInMinutes: s.APIServerLB().IdleTimeoutInMinutes, AdditionalTags: s.AdditionalTags(), AdditionalPorts: s.AdditionalAPIServerLBPorts(), + AvailabilityZones: s.APIServerLB().AvailabilityZones, } if s.APIServerLB().FrontendIPs != nil { @@ -301,6 +302,7 @@ func (s *ClusterScope) LBSpecs() []azure.ResourceSpecGetter { IdleTimeoutInMinutes: s.APIServerLB().IdleTimeoutInMinutes, AdditionalTags: s.AdditionalTags(), AdditionalPorts: s.AdditionalAPIServerLBPorts(), + AvailabilityZones: s.APIServerLB().AvailabilityZones, } privateIPFound := false @@ -348,6 +350,7 @@ func (s *ClusterScope) LBSpecs() []azure.ResourceSpecGetter { IdleTimeoutInMinutes: s.NodeOutboundLB().IdleTimeoutInMinutes, Role: infrav1.NodeOutboundRole, AdditionalTags: s.AdditionalTags(), + AvailabilityZones: s.NodeOutboundLB().AvailabilityZones, }) } @@ -369,6 +372,7 @@ func (s *ClusterScope) LBSpecs() []azure.ResourceSpecGetter { IdleTimeoutInMinutes: s.ControlPlaneOutboundLB().IdleTimeoutInMinutes, Role: infrav1.ControlPlaneOutboundRole, AdditionalTags: s.AdditionalTags(), + AvailabilityZones: s.ControlPlaneOutboundLB().AvailabilityZones, }) } diff --git a/azure/services/loadbalancers/spec.go b/azure/services/loadbalancers/spec.go index 5fd3cedf9f2..625fc010b4e 100644 --- a/azure/services/loadbalancers/spec.go +++ b/azure/services/loadbalancers/spec.go @@ -48,6 +48,7 @@ type LBSpec struct { IdleTimeoutInMinutes *int32 AdditionalTags map[string]string AdditionalPorts []infrav1.LoadBalancerPort + AvailabilityZones []string } // ResourceName returns the name of the load balancer. @@ -167,6 +168,16 @@ func (s *LBSpec) Parameters(_ context.Context, existing interface{}) (parameters func getFrontendIPConfigs(lbSpec LBSpec) ([]*armnetwork.FrontendIPConfiguration, []*armnetwork.SubResource) { frontendIPConfigurations := make([]*armnetwork.FrontendIPConfiguration, 0) frontendIDs := make([]*armnetwork.SubResource, 0) + + // Convert availability zones to []*string for Azure SDK + var zones []*string + if len(lbSpec.AvailabilityZones) > 0 { + zones = make([]*string, len(lbSpec.AvailabilityZones)) + for i, zone := range lbSpec.AvailabilityZones { + zones[i] = ptr.To(zone) + } + } + for _, ipConfig := range lbSpec.FrontendIPConfigs { var properties armnetwork.FrontendIPConfigurationPropertiesFormat if lbSpec.Type == infrav1.Internal { @@ -187,6 +198,7 @@ func getFrontendIPConfigs(lbSpec LBSpec) ([]*armnetwork.FrontendIPConfiguration, frontendIPConfigurations = append(frontendIPConfigurations, &armnetwork.FrontendIPConfiguration{ Properties: &properties, Name: ptr.To(ipConfig.Name), + Zones: zones, }) frontendIDs = append(frontendIDs, &armnetwork.SubResource{ ID: ptr.To(azure.FrontendIPConfigID(lbSpec.SubscriptionID, lbSpec.ResourceGroup, lbSpec.Name, ipConfig.Name)), diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclusters.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclusters.yaml index 1edca6b7df9..29df5d71e00 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclusters.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_azureclusters.yaml @@ -689,6 +689,17 @@ spec: description: APIServerLB is the configuration for the control-plane load balancer. properties: + availabilityZones: + description: |- + AvailabilityZones is a list of availability zones for the load balancer. + When specified for an internal load balancer, the frontend IP configuration + will be zone-redundant across the specified zones. + For public load balancers, this should be set on the associated public IP addresses instead. + items: + type: string + maxItems: 3 + type: array + x-kubernetes-list-type: set backendPool: description: BackendPool describes the backend pool of the load balancer. @@ -772,6 +783,17 @@ spec: ControlPlaneOutboundLB is the configuration for the control-plane outbound load balancer. This is different from APIServerLB, and is used only in private clusters (optionally) for enabling outbound traffic. properties: + availabilityZones: + description: |- + AvailabilityZones is a list of availability zones for the load balancer. + When specified for an internal load balancer, the frontend IP configuration + will be zone-redundant across the specified zones. + For public load balancers, this should be set on the associated public IP addresses instead. + items: + type: string + maxItems: 3 + type: array + x-kubernetes-list-type: set backendPool: description: BackendPool describes the backend pool of the load balancer. @@ -854,6 +876,17 @@ spec: description: NodeOutboundLB is the configuration for the node outbound load balancer. properties: + availabilityZones: + description: |- + AvailabilityZones is a list of availability zones for the load balancer. + When specified for an internal load balancer, the frontend IP configuration + will be zone-redundant across the specified zones. + For public load balancers, this should be set on the associated public IP addresses instead. + items: + type: string + maxItems: 3 + type: array + x-kubernetes-list-type: set backendPool: description: BackendPool describes the backend pool of the load balancer. diff --git a/templates/cluster-template-private.yaml b/templates/cluster-template-private.yaml index 786b6d52fc2..759fe70d84f 100644 --- a/templates/cluster-template-private.yaml +++ b/templates/cluster-template-private.yaml @@ -32,6 +32,10 @@ spec: location: ${AZURE_LOCATION} networkSpec: apiServerLB: + availabilityZones: + - "1" + - "2" + - "3" name: ${CLUSTER_NAME}-internal-lb type: Internal controlPlaneOutboundLB: diff --git a/templates/flavors/private/patches/private-lb.yaml b/templates/flavors/private/patches/private-lb.yaml index 76e1539df2a..a2933e29963 100644 --- a/templates/flavors/private/patches/private-lb.yaml +++ b/templates/flavors/private/patches/private-lb.yaml @@ -7,6 +7,10 @@ spec: apiServerLB: name: ${CLUSTER_NAME}-internal-lb type: Internal + availabilityZones: + - "1" + - "2" + - "3" nodeOutboundLB: frontendIPsCount: 1 controlPlaneOutboundLB: diff --git a/templates/test/ci/cluster-template-prow-private.yaml b/templates/test/ci/cluster-template-prow-private.yaml index 26910fdcb43..47d6e99cc7a 100644 --- a/templates/test/ci/cluster-template-prow-private.yaml +++ b/templates/test/ci/cluster-template-prow-private.yaml @@ -49,6 +49,10 @@ spec: location: ${AZURE_LOCATION} networkSpec: apiServerLB: + availabilityZones: + - "1" + - "2" + - "3" frontendIPs: - name: ${CLUSTER_NAME}-internal-lb-frontend privateIP: ${AZURE_INTERNAL_LB_IP} From daacbf572f2ad15bd05f4fdada5084066cbfcccb Mon Sep 17 00:00:00 2001 From: Bryan Cox Date: Thu, 11 Dec 2025 13:44:44 -0500 Subject: [PATCH 3/3] test: Add tests for zone-redundant load balancers - Add unit tests for zone configuration on frontend IPs - Add E2E test for zone-redundant LB verification - Add apiserver-ilb-zones flavor for E2E testing --- .../loadbalancers/loadbalancers_test.go | 24 + azure/services/loadbalancers/spec_test.go | 16 + .../cluster-template-apiserver-ilb-zones.yaml | 225 ++++++++++ .../apiserver-ilb-zones/kustomization.yaml | 11 + .../apiserver-ilb-zones/patches/lb-zones.yaml | 11 + ...ter-template-prow-apiserver-ilb-zones.yaml | 410 ++++++++++++++++++ .../kustomization.yaml | 11 + .../patches/lb-zones.yaml | 11 + test/e2e/azure_test.go | 151 +++++++ test/e2e/config/azure-dev.yaml | 2 + 10 files changed, 872 insertions(+) create mode 100644 templates/cluster-template-apiserver-ilb-zones.yaml create mode 100644 templates/flavors/apiserver-ilb-zones/kustomization.yaml create mode 100644 templates/flavors/apiserver-ilb-zones/patches/lb-zones.yaml create mode 100644 templates/test/ci/cluster-template-prow-apiserver-ilb-zones.yaml create mode 100644 templates/test/ci/prow-apiserver-ilb-zones/kustomization.yaml create mode 100644 templates/test/ci/prow-apiserver-ilb-zones/patches/lb-zones.yaml diff --git a/azure/services/loadbalancers/loadbalancers_test.go b/azure/services/loadbalancers/loadbalancers_test.go index 3a61789a507..f6233a6a120 100644 --- a/azure/services/loadbalancers/loadbalancers_test.go +++ b/azure/services/loadbalancers/loadbalancers_test.go @@ -111,6 +111,30 @@ var ( APIServerPort: 6443, } + fakeInternalAPILBSpecWithZones = LBSpec{ + Name: "my-private-lb", + ResourceGroup: "my-rg", + SubscriptionID: "123", + ClusterName: "my-cluster", + Location: "my-location", + Role: infrav1.APIServerRole, + Type: infrav1.Internal, + SKU: infrav1.SKUStandard, + SubnetName: "my-cp-subnet", + BackendPoolName: "my-private-lb-backendPool", + IdleTimeoutInMinutes: ptr.To[int32](4), + AvailabilityZones: []string{"1", "2", "3"}, + FrontendIPConfigs: []infrav1.FrontendIP{ + { + Name: "my-private-lb-frontEnd", + FrontendIPClass: infrav1.FrontendIPClass{ + PrivateIPAddress: "10.0.0.10", + }, + }, + }, + APIServerPort: 6443, + } + fakeNodeOutboundLBSpec = LBSpec{ Name: "my-cluster", ResourceGroup: "my-rg", diff --git a/azure/services/loadbalancers/spec_test.go b/azure/services/loadbalancers/spec_test.go index 9e75779a7c1..14f2f2a3082 100644 --- a/azure/services/loadbalancers/spec_test.go +++ b/azure/services/loadbalancers/spec_test.go @@ -178,6 +178,22 @@ func TestParameters(t *testing.T) { }, expectedError: "", }, + { + name: "internal load balancer with availability zones", + spec: &fakeInternalAPILBSpecWithZones, + existing: nil, + expect: func(g *WithT, result interface{}) { + g.Expect(result).To(BeAssignableToTypeOf(armnetwork.LoadBalancer{})) + lb := result.(armnetwork.LoadBalancer) + // Verify zones are set on frontend IP configuration + g.Expect(lb.Properties.FrontendIPConfigurations).To(HaveLen(1)) + g.Expect(lb.Properties.FrontendIPConfigurations[0].Zones).To(HaveLen(3)) + g.Expect(*lb.Properties.FrontendIPConfigurations[0].Zones[0]).To(Equal("1")) + g.Expect(*lb.Properties.FrontendIPConfigurations[0].Zones[1]).To(Equal("2")) + g.Expect(*lb.Properties.FrontendIPConfigurations[0].Zones[2]).To(Equal("3")) + }, + expectedError: "", + }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { diff --git a/templates/cluster-template-apiserver-ilb-zones.yaml b/templates/cluster-template-apiserver-ilb-zones.yaml new file mode 100644 index 00000000000..64674d6a0a9 --- /dev/null +++ b/templates/cluster-template-apiserver-ilb-zones.yaml @@ -0,0 +1,225 @@ +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + name: ${CLUSTER_NAME} + namespace: default +spec: + clusterNetwork: + pods: + cidrBlocks: + - 192.168.0.0/16 + controlPlaneRef: + apiVersion: controlplane.cluster.x-k8s.io/v1beta1 + kind: KubeadmControlPlane + name: ${CLUSTER_NAME}-control-plane + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: AzureCluster + name: ${CLUSTER_NAME} +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: ${CLUSTER_NAME} + namespace: default +spec: + identityRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: AzureClusterIdentity + name: ${CLUSTER_IDENTITY_NAME} + location: ${AZURE_LOCATION} + networkSpec: + apiServerLB: + availabilityZones: + - "1" + - "2" + - "3" + frontendIPs: + - name: ${CLUSTER_NAME}-api-lb + publicIP: + dnsName: ${CLUSTER_NAME}-${APISERVER_LB_DNS_SUFFIX}.${AZURE_LOCATION}.cloudapp.azure.com + name: ${CLUSTER_NAME}-api-lb + - name: ${CLUSTER_NAME}-internal-lb-private-ip + privateIP: ${AZURE_INTERNAL_LB_PRIVATE_IP:-30.0.0.100} + subnets: + - cidrBlocks: + - 30.0.0.0/16 + name: control-plane-subnet + role: control-plane + - cidrBlocks: + - 30.1.0.0/16 + name: node-subnet + role: node + vnet: + cidrBlocks: + - 30.0.0.0/8 + name: ${AZURE_VNET_NAME:=${CLUSTER_NAME}-vnet} + resourceGroup: ${AZURE_RESOURCE_GROUP:=${CLUSTER_NAME}} + subscriptionID: ${AZURE_SUBSCRIPTION_ID} +--- +apiVersion: controlplane.cluster.x-k8s.io/v1beta1 +kind: KubeadmControlPlane +metadata: + name: ${CLUSTER_NAME}-control-plane + namespace: default +spec: + kubeadmConfigSpec: + clusterConfiguration: + apiServer: + extraArgs: {} + timeoutForControlPlane: 20m + controllerManager: + extraArgs: + allocate-node-cidrs: "false" + cloud-provider: external + cluster-name: ${CLUSTER_NAME} + etcd: + local: + dataDir: /var/lib/etcddisk/etcd + extraArgs: + quota-backend-bytes: "8589934592" + diskSetup: + filesystems: + - device: /dev/disk/azure/scsi1/lun0 + extraOpts: + - -E + - lazy_itable_init=1,lazy_journal_init=1 + filesystem: ext4 + label: etcd_disk + - device: ephemeral0.1 + filesystem: ext4 + label: ephemeral0 + replaceFS: ntfs + partitions: + - device: /dev/disk/azure/scsi1/lun0 + layout: true + overwrite: false + tableType: gpt + files: + - contentFrom: + secret: + key: control-plane-azure.json + name: ${CLUSTER_NAME}-control-plane-azure-json + owner: root:root + path: /etc/kubernetes/azure.json + permissions: "0644" + initConfiguration: + nodeRegistration: + kubeletExtraArgs: + cloud-provider: external + name: '{{ ds.meta_data["local_hostname"] }}' + joinConfiguration: + nodeRegistration: + kubeletExtraArgs: + cloud-provider: external + name: '{{ ds.meta_data["local_hostname"] }}' + mounts: + - - LABEL=etcd_disk + - /var/lib/etcddisk + postKubeadmCommands: [] + preKubeadmCommands: [] + verbosity: 10 + machineTemplate: + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: AzureMachineTemplate + name: ${CLUSTER_NAME}-control-plane + replicas: ${CONTROL_PLANE_MACHINE_COUNT:=1} + version: ${KUBERNETES_VERSION} +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureMachineTemplate +metadata: + name: ${CLUSTER_NAME}-control-plane + namespace: default +spec: + template: + spec: + dataDisks: + - diskSizeGB: 256 + lun: 0 + nameSuffix: etcddisk + identity: UserAssigned + osDisk: + diskSizeGB: 128 + osType: Linux + sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""} + userAssignedIdentities: + - providerID: azure:///subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${CI_RG:=capz-ci}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/${USER_IDENTITY:=cloud-provider-user-identity} + vmSize: ${AZURE_CONTROL_PLANE_MACHINE_TYPE} +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + name: ${CLUSTER_NAME}-md-0 + namespace: default +spec: + clusterName: ${CLUSTER_NAME} + replicas: ${WORKER_MACHINE_COUNT:=2} + selector: + matchLabels: null + template: + spec: + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: KubeadmConfigTemplate + name: ${CLUSTER_NAME}-md-0 + clusterName: ${CLUSTER_NAME} + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: AzureMachineTemplate + name: ${CLUSTER_NAME}-md-0 + version: ${KUBERNETES_VERSION} +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureMachineTemplate +metadata: + name: ${CLUSTER_NAME}-md-0 + namespace: default +spec: + template: + spec: + osDisk: + diskSizeGB: 128 + osType: Linux + sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""} + vmSize: ${AZURE_NODE_MACHINE_TYPE} +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 +kind: KubeadmConfigTemplate +metadata: + name: ${CLUSTER_NAME}-md-0 + namespace: default +spec: + template: + spec: + files: + - contentFrom: + secret: + key: worker-node-azure.json + name: ${CLUSTER_NAME}-md-0-azure-json + owner: root:root + path: /etc/kubernetes/azure.json + permissions: "0644" + joinConfiguration: + nodeRegistration: + kubeletExtraArgs: + cloud-provider: external + name: '{{ ds.meta_data["local_hostname"] }}' + preKubeadmCommands: + - echo '${AZURE_INTERNAL_LB_PRIVATE_IP:-30.0.0.100} ${CLUSTER_NAME}-${APISERVER_LB_DNS_SUFFIX}.${AZURE_LOCATION}.cloudapp.azure.com' + >> /etc/hosts +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureClusterIdentity +metadata: + labels: + clusterctl.cluster.x-k8s.io/move-hierarchy: "true" + name: ${CLUSTER_IDENTITY_NAME} + namespace: default +spec: + allowedNamespaces: {} + clientID: ${AZURE_CLIENT_ID_USER_ASSIGNED_IDENTITY} + tenantID: ${AZURE_TENANT_ID} + type: ${CLUSTER_IDENTITY_TYPE:=WorkloadIdentity} diff --git a/templates/flavors/apiserver-ilb-zones/kustomization.yaml b/templates/flavors/apiserver-ilb-zones/kustomization.yaml new file mode 100644 index 00000000000..f3a814b11ad --- /dev/null +++ b/templates/flavors/apiserver-ilb-zones/kustomization.yaml @@ -0,0 +1,11 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: default +resources: +- ../apiserver-ilb + +patches: +- path: patches/lb-zones.yaml + +sortOptions: + order: fifo diff --git a/templates/flavors/apiserver-ilb-zones/patches/lb-zones.yaml b/templates/flavors/apiserver-ilb-zones/patches/lb-zones.yaml new file mode 100644 index 00000000000..905f0abeafc --- /dev/null +++ b/templates/flavors/apiserver-ilb-zones/patches/lb-zones.yaml @@ -0,0 +1,11 @@ +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: ${CLUSTER_NAME} +spec: + networkSpec: + apiServerLB: + availabilityZones: + - "1" + - "2" + - "3" diff --git a/templates/test/ci/cluster-template-prow-apiserver-ilb-zones.yaml b/templates/test/ci/cluster-template-prow-apiserver-ilb-zones.yaml new file mode 100644 index 00000000000..e924f94fa16 --- /dev/null +++ b/templates/test/ci/cluster-template-prow-apiserver-ilb-zones.yaml @@ -0,0 +1,410 @@ +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + labels: + cloud-provider: ${CLOUD_PROVIDER_AZURE_LABEL:=azure} + cni: calico + name: ${CLUSTER_NAME} + namespace: default +spec: + clusterNetwork: + pods: + cidrBlocks: + - 192.168.0.0/16 + controlPlaneRef: + apiVersion: controlplane.cluster.x-k8s.io/v1beta1 + kind: KubeadmControlPlane + name: ${CLUSTER_NAME}-control-plane + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: AzureCluster + name: ${CLUSTER_NAME} +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: ${CLUSTER_NAME} + namespace: default +spec: + additionalTags: + buildProvenance: ${BUILD_PROVENANCE} + creationTimestamp: ${TIMESTAMP} + jobName: ${JOB_NAME} + identityRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: AzureClusterIdentity + name: ${CLUSTER_IDENTITY_NAME} + location: ${AZURE_LOCATION} + networkSpec: + apiServerLB: + availabilityZones: + - "1" + - "2" + - "3" + frontendIPs: + - name: ${CLUSTER_NAME}-api-lb + publicIP: + dnsName: ${CLUSTER_NAME}-${APISERVER_LB_DNS_SUFFIX}.${AZURE_LOCATION}.cloudapp.azure.com + name: ${CLUSTER_NAME}-api-lb + - name: ${CLUSTER_NAME}-internal-lb-private-ip + privateIP: ${AZURE_INTERNAL_LB_PRIVATE_IP} + subnets: + - cidrBlocks: + - ${AZURE_CP_SUBNET_CIDR} + name: control-plane-subnet + role: control-plane + - cidrBlocks: + - ${AZURE_NODE_SUBNET_CIDR} + name: node-subnet + role: node + vnet: + cidrBlocks: + - ${AZURE_VNET_CIDR} + name: ${AZURE_VNET_NAME:=${CLUSTER_NAME}-vnet} + resourceGroup: ${AZURE_RESOURCE_GROUP:=${CLUSTER_NAME}} + subscriptionID: ${AZURE_SUBSCRIPTION_ID} +--- +apiVersion: controlplane.cluster.x-k8s.io/v1beta1 +kind: KubeadmControlPlane +metadata: + name: ${CLUSTER_NAME}-control-plane + namespace: default +spec: + kubeadmConfigSpec: + clusterConfiguration: + apiServer: + extraArgs: {} + timeoutForControlPlane: 20m + controllerManager: + extraArgs: + allocate-node-cidrs: "false" + cloud-provider: external + cluster-name: ${CLUSTER_NAME} + v: "4" + etcd: + local: + dataDir: /var/lib/etcddisk/etcd + extraArgs: + quota-backend-bytes: "8589934592" + diskSetup: + filesystems: + - device: /dev/disk/azure/scsi1/lun0 + extraOpts: + - -E + - lazy_itable_init=1,lazy_journal_init=1 + filesystem: ext4 + label: etcd_disk + - device: ephemeral0.1 + filesystem: ext4 + label: ephemeral0 + replaceFS: ntfs + partitions: + - device: /dev/disk/azure/scsi1/lun0 + layout: true + overwrite: false + tableType: gpt + files: + - contentFrom: + secret: + key: control-plane-azure.json + name: ${CLUSTER_NAME}-control-plane-azure-json + owner: root:root + path: /etc/kubernetes/azure.json + permissions: "0644" + initConfiguration: + nodeRegistration: + kubeletExtraArgs: + cloud-provider: external + name: '{{ ds.meta_data["local_hostname"] }}' + joinConfiguration: + nodeRegistration: + kubeletExtraArgs: + cloud-provider: external + name: '{{ ds.meta_data["local_hostname"] }}' + mounts: + - - LABEL=etcd_disk + - /var/lib/etcddisk + postKubeadmCommands: [] + preKubeadmCommands: [] + verbosity: 10 + machineTemplate: + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: AzureMachineTemplate + name: ${CLUSTER_NAME}-control-plane + replicas: ${CONTROL_PLANE_MACHINE_COUNT:=1} + version: ${KUBERNETES_VERSION} +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureMachineTemplate +metadata: + name: ${CLUSTER_NAME}-control-plane + namespace: default +spec: + template: + spec: + dataDisks: + - diskSizeGB: 256 + lun: 0 + nameSuffix: etcddisk + identity: UserAssigned + osDisk: + diskSizeGB: 128 + osType: Linux + sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""} + userAssignedIdentities: + - providerID: azure:///subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${CI_RG:=capz-ci}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/${USER_IDENTITY:=cloud-provider-user-identity} + vmSize: ${AZURE_CONTROL_PLANE_MACHINE_TYPE} +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + name: ${CLUSTER_NAME}-md-0 + namespace: default +spec: + clusterName: ${CLUSTER_NAME} + replicas: ${WORKER_MACHINE_COUNT:=2} + selector: + matchLabels: null + template: + spec: + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: KubeadmConfigTemplate + name: ${CLUSTER_NAME}-md-0 + clusterName: ${CLUSTER_NAME} + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: AzureMachineTemplate + name: ${CLUSTER_NAME}-md-0 + version: ${KUBERNETES_VERSION} +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureMachineTemplate +metadata: + name: ${CLUSTER_NAME}-md-0 + namespace: default +spec: + template: + spec: + identity: UserAssigned + osDisk: + diskSizeGB: 128 + osType: Linux + sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""} + userAssignedIdentities: + - providerID: azure:///subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${CI_RG:=capz-ci}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/${USER_IDENTITY:=cloud-provider-user-identity} + vmSize: ${AZURE_NODE_MACHINE_TYPE} +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 +kind: KubeadmConfigTemplate +metadata: + name: ${CLUSTER_NAME}-md-0 + namespace: default +spec: + template: + spec: + files: + - contentFrom: + secret: + key: worker-node-azure.json + name: ${CLUSTER_NAME}-md-0-azure-json + owner: root:root + path: /etc/kubernetes/azure.json + permissions: "0644" + joinConfiguration: + nodeRegistration: + kubeletExtraArgs: + cloud-provider: external + name: '{{ ds.meta_data["local_hostname"] }}' + preKubeadmCommands: + - echo '${AZURE_INTERNAL_LB_PRIVATE_IP} ${CLUSTER_NAME}-${APISERVER_LB_DNS_SUFFIX}.${AZURE_LOCATION}.cloudapp.azure.com' + >> /etc/hosts +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureClusterIdentity +metadata: + labels: + clusterctl.cluster.x-k8s.io/move-hierarchy: "true" + name: ${CLUSTER_IDENTITY_NAME} + namespace: default +spec: + allowedNamespaces: {} + clientID: ${AZURE_CLIENT_ID_USER_ASSIGNED_IDENTITY} + tenantID: ${AZURE_TENANT_ID} + type: ${CLUSTER_IDENTITY_TYPE:=WorkloadIdentity} +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineHealthCheck +metadata: + name: ${CLUSTER_NAME}-control-plane + namespace: default +spec: + clusterName: ${CLUSTER_NAME} + maxUnhealthy: 100% + selector: + matchLabels: + cluster.x-k8s.io/control-plane: "" + unhealthyConditions: + - status: Unknown + timeout: 300s + type: Ready + - status: "False" + timeout: 300s + type: Ready +--- +apiVersion: addons.cluster.x-k8s.io/v1alpha1 +kind: HelmChartProxy +metadata: + name: calico + namespace: default +spec: + chartName: tigera-operator + clusterSelector: + matchLabels: + cni: calico + namespace: tigera-operator + releaseName: projectcalico + repoURL: https://docs.tigera.io/calico/charts + valuesTemplate: | + installation: + cni: + type: Calico + ipam: + type: Calico + calicoNetwork: + bgp: Disabled + windowsDataplane: HNS + mtu: 1350 + ipPools:{{range $i, $cidr := .Cluster.spec.clusterNetwork.pods.cidrBlocks }} + - cidr: {{ $cidr }} + encapsulation: VXLAN{{end}} + typhaDeployment: + spec: + template: + spec: + # By default, typha tolerates all NoSchedule taints. This breaks + # scale-ins when it continuously gets scheduled onto an + # out-of-date Node that is being deleted. Tolerate only the + # NoSchedule taints that are expected. + tolerations: + - effect: NoExecute + operator: Exists + - effect: NoSchedule + key: node-role.kubernetes.io/control-plane + operator: Exists + - effect: NoSchedule + key: node.kubernetes.io/not-ready + operator: Exists + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 50 + preference: + matchExpressions: + - key: node-role.kubernetes.io/control-plane + operator: Exists + registry: capzcicommunity.azurecr.io + serviceCIDRs: + - 10.96.0.0/12 # must match cluster service CIDR (this is the default) + # Image and registry configuration for the tigera/operator pod + tigeraOperator: + image: tigera/operator + registry: capzcicommunity.azurecr.io + calicoctl: + image: capzcicommunity.azurecr.io/calico/ctl + # when kubernetesServiceEndpoint (required for windows) is added + # DNS configuration is needed to look up the api server name properly + # https://github.com/projectcalico/calico/issues/9536 + dnsConfig: + nameservers: + - 127.0.0.53 + options: + - name: edns0 + - name: trust-ad + kubernetesServiceEndpoint: + host: "{{ .Cluster.spec.controlPlaneEndpoint.host }}" + port: "{{ .Cluster.spec.controlPlaneEndpoint.port }}" + # By default, tigera tolerates all NoSchedule taints. This breaks upgrades + # when it continuously gets scheduled onto an out-of-date Node that is being + # deleted. Tolerate only the NoSchedule taints that are expected. + tolerations: + - effect: NoExecute + operator: Exists + - effect: NoSchedule + key: node-role.kubernetes.io/control-plane + operator: Exists + - effect: NoSchedule + key: node.kubernetes.io/not-ready + operator: Exists + version: ${CALICO_VERSION} +--- +apiVersion: addons.cluster.x-k8s.io/v1alpha1 +kind: HelmChartProxy +metadata: + name: azuredisk-csi-driver-chart + namespace: default +spec: + chartName: azuredisk-csi-driver + clusterSelector: + matchLabels: + azuredisk-csi: "true" + namespace: kube-system + releaseName: azuredisk-csi-driver-oot + repoURL: https://raw.githubusercontent.com/kubernetes-sigs/azuredisk-csi-driver/master/charts + valuesTemplate: |- + controller: + replicas: 1 + runOnControlPlane: true + windows: + useHostProcessContainers: {{ hasKey .Cluster.metadata.labels "cni-windows" }} +--- +apiVersion: addons.cluster.x-k8s.io/v1alpha1 +kind: HelmChartProxy +metadata: + name: cloud-provider-azure-chart + namespace: default +spec: + chartName: cloud-provider-azure + clusterSelector: + matchLabels: + cloud-provider: azure + releaseName: cloud-provider-azure-oot + repoURL: https://raw.githubusercontent.com/kubernetes-sigs/cloud-provider-azure/master/helm/repo + valuesTemplate: | + infra: + clusterName: {{ .Cluster.metadata.name }} + cloudControllerManager: + clusterCIDR: {{ .Cluster.spec.clusterNetwork.pods.cidrBlocks | join "," }} + logVerbosity: 4 +--- +apiVersion: addons.cluster.x-k8s.io/v1alpha1 +kind: HelmChartProxy +metadata: + name: cloud-provider-azure-chart-ci + namespace: default +spec: + chartName: cloud-provider-azure + clusterSelector: + matchLabels: + cloud-provider: azure-ci + releaseName: cloud-provider-azure-oot + repoURL: https://raw.githubusercontent.com/kubernetes-sigs/cloud-provider-azure/master/helm/repo + valuesTemplate: | + infra: + clusterName: {{ .Cluster.metadata.name }} + cloudControllerManager: + cloudConfig: ${CLOUD_CONFIG:-"/etc/kubernetes/azure.json"} + cloudConfigSecretName: ${CONFIG_SECRET_NAME:-""} + clusterCIDR: {{ .Cluster.spec.clusterNetwork.pods.cidrBlocks | join "," }} + imageName: "${CCM_IMAGE_NAME:-""}" + imageRepository: "${IMAGE_REGISTRY:-""}" + imageTag: "${IMAGE_TAG_CCM:-""}" + logVerbosity: ${CCM_LOG_VERBOSITY:-4} + replicas: ${CCM_COUNT:-1} + enableDynamicReloading: ${ENABLE_DYNAMIC_RELOADING:-false} + cloudNodeManager: + imageName: "${CNM_IMAGE_NAME:-""}" + imageRepository: "${IMAGE_REGISTRY:-""}" + imageTag: "${IMAGE_TAG_CNM:-""}" diff --git a/templates/test/ci/prow-apiserver-ilb-zones/kustomization.yaml b/templates/test/ci/prow-apiserver-ilb-zones/kustomization.yaml new file mode 100644 index 00000000000..5f9f50c11f0 --- /dev/null +++ b/templates/test/ci/prow-apiserver-ilb-zones/kustomization.yaml @@ -0,0 +1,11 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: default +resources: + - ../prow-apiserver-ilb + +patches: + - path: patches/lb-zones.yaml + +sortOptions: + order: fifo diff --git a/templates/test/ci/prow-apiserver-ilb-zones/patches/lb-zones.yaml b/templates/test/ci/prow-apiserver-ilb-zones/patches/lb-zones.yaml new file mode 100644 index 00000000000..905f0abeafc --- /dev/null +++ b/templates/test/ci/prow-apiserver-ilb-zones/patches/lb-zones.yaml @@ -0,0 +1,11 @@ +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: ${CLUSTER_NAME} +spec: + networkSpec: + apiServerLB: + availabilityZones: + - "1" + - "2" + - "3" diff --git a/test/e2e/azure_test.go b/test/e2e/azure_test.go index 97eff3cfebf..b3268b090af 100644 --- a/test/e2e/azure_test.go +++ b/test/e2e/azure_test.go @@ -27,6 +27,8 @@ import ( "strings" "time" + "github.com/Azure/azure-sdk-for-go/sdk/azidentity" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork/v4" "github.com/Azure/azure-service-operator/v2/pkg/common/config" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -37,6 +39,8 @@ import ( "sigs.k8s.io/cluster-api/test/framework/clusterctl" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/controller-runtime/pkg/client" + + infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" ) var _ = Describe("Workload cluster creation", func() { @@ -1427,5 +1431,152 @@ var _ = Describe("Workload cluster creation", func() { }) }) + Context("Creating a cluster with zone-redundant load balancers [OPTIONAL]", func() { + It("with zone-redundant API server, node outbound, and control plane outbound load balancers", func() { + clusterName = getClusterName(clusterNamePrefix, "lb-zones") + + // Set up zone-redundant load balancer configuration + Expect(os.Setenv("EXP_APISERVER_ILB", "true")).To(Succeed()) + Expect(os.Setenv("AZURE_INTERNAL_LB_PRIVATE_IP", "40.0.0.100")).To(Succeed()) + Expect(os.Setenv("AZURE_VNET_CIDR", "40.0.0.0/8")).To(Succeed()) + Expect(os.Setenv("AZURE_CP_SUBNET_CIDR", "40.0.0.0/16")).To(Succeed()) + Expect(os.Setenv("AZURE_NODE_SUBNET_CIDR", "40.1.0.0/16")).To(Succeed()) + clusterctl.ApplyClusterTemplateAndWait(ctx, createApplyClusterTemplateInput( + specName, + withFlavor("apiserver-ilb-zones"), + withNamespace(namespace.Name), + withClusterName(clusterName), + withControlPlaneMachineCount(3), + withWorkerMachineCount(2), + withControlPlaneInterval(specName, "wait-control-plane-ha"), + withControlPlaneWaiters(clusterctl.ControlPlaneWaiters{ + WaitForControlPlaneInitialized: EnsureControlPlaneInitialized, + }), + withPostMachinesProvisioned(func() { + EnsureDaemonsets(ctx, func() DaemonsetsSpecInput { + return DaemonsetsSpecInput{ + BootstrapClusterProxy: bootstrapClusterProxy, + Namespace: namespace, + ClusterName: clusterName, + } + }) + }), + ), result) + + By("Verifying load balancer zones are configured correctly in Azure", func() { + expectedZones := []string{"1", "2", "3"} + + subscriptionID := getSubscriptionID(Default) + cred, err := azidentity.NewDefaultAzureCredential(nil) + Expect(err).NotTo(HaveOccurred()) + + mgmtClient := bootstrapClusterProxy.GetClient() + Expect(mgmtClient).NotTo(BeNil()) + + azureCluster := &infrav1.AzureCluster{} + err = mgmtClient.Get(ctx, client.ObjectKey{ + Namespace: namespace.Name, + Name: clusterName, + }, azureCluster) + Expect(err).NotTo(HaveOccurred()) + + resourceGroupName := azureCluster.Spec.ResourceGroup + Expect(resourceGroupName).NotTo(BeEmpty()) + + lbClient, err := armnetwork.NewLoadBalancersClient(subscriptionID, cred, nil) + Expect(err).NotTo(HaveOccurred()) + + // Verify API Server Load Balancer zones + if azureCluster.Spec.NetworkSpec.APIServerLB != nil { + Expect(azureCluster.Spec.NetworkSpec.APIServerLB.AvailabilityZones).To(Equal(expectedZones), + "APIServerLB should have zones configured in AzureCluster spec") + + lbName := azureCluster.Spec.NetworkSpec.APIServerLB.Name + Eventually(func(g Gomega) { + lb, err := lbClient.Get(ctx, resourceGroupName, lbName, nil) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(lb.Properties).NotTo(BeNil()) + g.Expect(lb.Properties.FrontendIPConfigurations).NotTo(BeEmpty()) + + for _, frontendIP := range lb.Properties.FrontendIPConfigurations { + g.Expect(frontendIP.Zones).NotTo(BeNil(), "Frontend IP should have zones configured") + g.Expect(frontendIP.Zones).To(HaveLen(3), "Frontend IP should have 3 zones") + + zonesMap := make(map[string]bool) + for _, zone := range frontendIP.Zones { + if zone != nil { + zonesMap[*zone] = true + } + } + for _, expectedZone := range expectedZones { + g.Expect(zonesMap[expectedZone]).To(BeTrue(), "Zone %s should be configured", expectedZone) + } + } + }, retryableOperationTimeout, retryableOperationSleepBetweenRetries).Should(Succeed()) + } + + // Verify Node Outbound Load Balancer zones + if azureCluster.Spec.NetworkSpec.NodeOutboundLB != nil { + Expect(azureCluster.Spec.NetworkSpec.NodeOutboundLB.AvailabilityZones).To(Equal(expectedZones), + "NodeOutboundLB should have zones configured in AzureCluster spec") + + lbName := azureCluster.Spec.NetworkSpec.NodeOutboundLB.Name + Eventually(func(g Gomega) { + lb, err := lbClient.Get(ctx, resourceGroupName, lbName, nil) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(lb.Properties).NotTo(BeNil()) + g.Expect(lb.Properties.FrontendIPConfigurations).NotTo(BeEmpty()) + + for _, frontendIP := range lb.Properties.FrontendIPConfigurations { + g.Expect(frontendIP.Zones).NotTo(BeNil(), "Frontend IP should have zones configured") + g.Expect(frontendIP.Zones).To(HaveLen(3), "Frontend IP should have 3 zones") + + zonesMap := make(map[string]bool) + for _, zone := range frontendIP.Zones { + if zone != nil { + zonesMap[*zone] = true + } + } + for _, expectedZone := range expectedZones { + g.Expect(zonesMap[expectedZone]).To(BeTrue(), "Zone %s should be configured", expectedZone) + } + } + }, retryableOperationTimeout, retryableOperationSleepBetweenRetries).Should(Succeed()) + } + + // Verify Control Plane Outbound Load Balancer zones + if azureCluster.Spec.NetworkSpec.ControlPlaneOutboundLB != nil { + Expect(azureCluster.Spec.NetworkSpec.ControlPlaneOutboundLB.AvailabilityZones).To(Equal(expectedZones), + "ControlPlaneOutboundLB should have zones configured in AzureCluster spec") + + lbName := azureCluster.Spec.NetworkSpec.ControlPlaneOutboundLB.Name + Eventually(func(g Gomega) { + lb, err := lbClient.Get(ctx, resourceGroupName, lbName, nil) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(lb.Properties).NotTo(BeNil()) + g.Expect(lb.Properties.FrontendIPConfigurations).NotTo(BeEmpty()) + + for _, frontendIP := range lb.Properties.FrontendIPConfigurations { + g.Expect(frontendIP.Zones).NotTo(BeNil(), "Frontend IP should have zones configured") + g.Expect(frontendIP.Zones).To(HaveLen(3), "Frontend IP should have 3 zones") + + zonesMap := make(map[string]bool) + for _, zone := range frontendIP.Zones { + if zone != nil { + zonesMap[*zone] = true + } + } + for _, expectedZone := range expectedZones { + g.Expect(zonesMap[expectedZone]).To(BeTrue(), "Zone %s should be configured", expectedZone) + } + } + }, retryableOperationTimeout, retryableOperationSleepBetweenRetries).Should(Succeed()) + } + }) + + By("PASSED!") + }) + }) + // TODO: add a same test as above for a windows cluster }) diff --git a/test/e2e/config/azure-dev.yaml b/test/e2e/config/azure-dev.yaml index b89ed3fb755..0d39ecc0e44 100644 --- a/test/e2e/config/azure-dev.yaml +++ b/test/e2e/config/azure-dev.yaml @@ -188,6 +188,8 @@ providers: targetName: "cluster-template-apiserver-ilb.yaml" - sourcePath: "${PWD}/templates/test/ci/cluster-template-prow-apiserver-ilb-custom-images.yaml" targetName: "cluster-template-apiserver-ilb-custom-images.yaml" + - sourcePath: "${PWD}/templates/test/ci/cluster-template-prow-apiserver-ilb-zones.yaml" + targetName: "cluster-template-apiserver-ilb-zones.yaml" - sourcePath: "${PWD}/templates/test/ci/cluster-template-prow-dalec-custom-builds.yaml" targetName: "cluster-template-dalec-custom-builds.yaml" - sourcePath: "${PWD}/templates/test/ci/cluster-template-prow-azl3.yaml"