diff --git a/api/v1beta1/awscluster_conversion.go b/api/v1beta1/awscluster_conversion.go index 1ebb979848..087f1cdd95 100644 --- a/api/v1beta1/awscluster_conversion.go +++ b/api/v1beta1/awscluster_conversion.go @@ -90,6 +90,7 @@ func (src *AWSCluster) ConvertTo(dstRaw conversion.Hub) error { dst.Spec.NetworkSpec.AdditionalControlPlaneIngressRules = restored.Spec.NetworkSpec.AdditionalControlPlaneIngressRules dst.Spec.NetworkSpec.AdditionalNodeIngressRules = restored.Spec.NetworkSpec.AdditionalNodeIngressRules dst.Spec.NetworkSpec.NodePortIngressRuleCidrBlocks = restored.Spec.NetworkSpec.NodePortIngressRuleCidrBlocks + dst.Spec.NetworkSpec.VPC.AvailabilityZones = restored.Spec.NetworkSpec.VPC.AvailabilityZones if restored.Spec.NetworkSpec.VPC.IPAMPool != nil { if dst.Spec.NetworkSpec.VPC.IPAMPool == nil { diff --git a/api/v1beta1/zz_generated.conversion.go b/api/v1beta1/zz_generated.conversion.go index 8e445247da..28de0a0cd3 100644 --- a/api/v1beta1/zz_generated.conversion.go +++ b/api/v1beta1/zz_generated.conversion.go @@ -2339,6 +2339,7 @@ func autoConvert_v1beta2_VPCSpec_To_v1beta1_VPCSpec(in *v1beta2.VPCSpec, out *VP out.Tags = *(*Tags)(unsafe.Pointer(&in.Tags)) out.AvailabilityZoneUsageLimit = (*int)(unsafe.Pointer(in.AvailabilityZoneUsageLimit)) out.AvailabilityZoneSelection = (*AZSelectionScheme)(unsafe.Pointer(in.AvailabilityZoneSelection)) + // WARNING: in.AvailabilityZones requires manual conversion: does not exist in peer-type // WARNING: in.EmptyRoutesDefaultVPCSecurityGroup requires manual conversion: does not exist in peer-type // WARNING: in.PrivateDNSHostnameTypeOnLaunch requires manual conversion: does not exist in peer-type // WARNING: in.ElasticIPPool requires manual conversion: does not exist in peer-type diff --git a/api/v1beta2/awscluster_webhook.go b/api/v1beta2/awscluster_webhook.go index ec4fac40af..99e9893eb8 100644 --- a/api/v1beta2/awscluster_webhook.go +++ b/api/v1beta2/awscluster_webhook.go @@ -356,6 +356,10 @@ func (r *AWSCluster) validateNetwork() field.ErrorList { } } + if err := r.Spec.NetworkSpec.VPC.ValidateAvailabilityZones(); err != nil { + allErrs = append(allErrs, err) + } + return allErrs } diff --git a/api/v1beta2/defaults.go b/api/v1beta2/defaults.go index f10bb895c1..d2be67c5c0 100644 --- a/api/v1beta2/defaults.go +++ b/api/v1beta2/defaults.go @@ -18,6 +18,7 @@ package v1beta2 import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" clusterv1 "sigs.k8s.io/cluster-api/cmd/clusterctl/api/v1alpha3" ) @@ -51,6 +52,15 @@ func SetDefaults_NetworkSpec(obj *NetworkSpec) { //nolint:golint,stylecheck }, } } + // If AvailabilityZones are not set, set defaults for AZ selection + if obj.VPC.AvailabilityZones == nil { + if obj.VPC.AvailabilityZoneUsageLimit == nil { + obj.VPC.AvailabilityZoneUsageLimit = ptr.To(3) + } + if obj.VPC.AvailabilityZoneSelection == nil { + obj.VPC.AvailabilityZoneSelection = &AZSelectionSchemeOrdered + } + } } // SetDefaults_AWSClusterSpec is used by defaulter-gen. diff --git a/api/v1beta2/network_types.go b/api/v1beta2/network_types.go index 0a27a1c4db..1dde676f28 100644 --- a/api/v1beta2/network_types.go +++ b/api/v1beta2/network_types.go @@ -23,6 +23,7 @@ import ( "github.com/aws/aws-sdk-go-v2/service/ec2/types" "github.com/aws/aws-sdk-go/aws" + "k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/utils/ptr" ) @@ -454,7 +455,7 @@ type VPCSpec struct { // should be used in a region when automatically creating subnets. If a region has more // than this number of AZs then this number of AZs will be picked randomly when creating // default subnets. Defaults to 3 - // +kubebuilder:default=3 + // +optional // +kubebuilder:validation:Minimum=1 AvailabilityZoneUsageLimit *int `json:"availabilityZoneUsageLimit,omitempty"` @@ -463,10 +464,16 @@ type VPCSpec struct { // Ordered - selects based on alphabetical order // Random - selects AZs randomly in a region // Defaults to Ordered - // +kubebuilder:default=Ordered + // +optional // +kubebuilder:validation:Enum=Ordered;Random AvailabilityZoneSelection *AZSelectionScheme `json:"availabilityZoneSelection,omitempty"` + // AvailabilityZones defines a list of Availability Zones in which to create network resources in. + // Cannot be defined at the same time as AvailabilityZoneSelection and AvailabilityZoneUsageLimit. + // +optional + // +kubebuilder:validation:MinItems=1 + AvailabilityZones []string `json:"availabilityZones,omitempty"` + // EmptyRoutesDefaultVPCSecurityGroup specifies whether the default VPC security group ingress // and egress rules should be removed. // @@ -539,6 +546,15 @@ func (v *VPCSpec) GetPublicIpv4Pool() *string { return nil } +// ValidateAvailabilityZones returns an error if the availability zones field combination is invalid. +func (v *VPCSpec) ValidateAvailabilityZones() *field.Error { + if len(v.AvailabilityZones) > 0 && (v.AvailabilityZoneSelection != nil || v.AvailabilityZoneUsageLimit != nil) { + availabilityZonesField := field.NewPath("spec", "network", "vpc", "availabilityZones") + return field.Invalid(availabilityZonesField, v.AvailabilityZoneSelection, "availabilityZones cannot be set if availabilityZoneUsageLimit and availabilityZoneSelection are set") + } + return nil +} + // SubnetSpec configures an AWS Subnet. type SubnetSpec struct { // ID defines a unique identifier to reference this resource. diff --git a/api/v1beta2/zz_generated.deepcopy.go b/api/v1beta2/zz_generated.deepcopy.go index 7d39649cfa..88b74559f4 100644 --- a/api/v1beta2/zz_generated.deepcopy.go +++ b/api/v1beta2/zz_generated.deepcopy.go @@ -2215,6 +2215,11 @@ func (in *VPCSpec) DeepCopyInto(out *VPCSpec) { *out = new(AZSelectionScheme) **out = **in } + if in.AvailabilityZones != nil { + in, out := &in.AvailabilityZones, &out.AvailabilityZones + *out = make([]string, len(*in)) + copy(*out, *in) + } if in.PrivateDNSHostnameTypeOnLaunch != nil { in, out := &in.PrivateDNSHostnameTypeOnLaunch, &out.PrivateDNSHostnameTypeOnLaunch *out = new(string) diff --git a/config/crd/bases/controlplane.cluster.x-k8s.io_awsmanagedcontrolplanes.yaml b/config/crd/bases/controlplane.cluster.x-k8s.io_awsmanagedcontrolplanes.yaml index 29cc567267..17bf82526a 100644 --- a/config/crd/bases/controlplane.cluster.x-k8s.io_awsmanagedcontrolplanes.yaml +++ b/config/crd/bases/controlplane.cluster.x-k8s.io_awsmanagedcontrolplanes.yaml @@ -674,7 +674,6 @@ spec: description: VPC configuration. properties: availabilityZoneSelection: - default: Ordered description: |- AvailabilityZoneSelection specifies how AZs should be selected if there are more AZs in a region than specified by AvailabilityZoneUsageLimit. There are 2 selection schemes: @@ -686,7 +685,6 @@ spec: - Random type: string availabilityZoneUsageLimit: - default: 3 description: |- AvailabilityZoneUsageLimit specifies the maximum number of availability zones (AZ) that should be used in a region when automatically creating subnets. If a region has more @@ -694,6 +692,14 @@ spec: default subnets. Defaults to 3 minimum: 1 type: integer + availabilityZones: + description: |- + AvailabilityZones defines a list of Availability Zones in which to create network resources in. + Cannot be defined at the same time as AvailabilityZoneSelection and AvailabilityZoneUsageLimit. + items: + type: string + minItems: 1 + type: array carrierGatewayId: description: |- CarrierGatewayID is the id of the internet gateway associated with the VPC, @@ -2813,7 +2819,6 @@ spec: description: VPC configuration. properties: availabilityZoneSelection: - default: Ordered description: |- AvailabilityZoneSelection specifies how AZs should be selected if there are more AZs in a region than specified by AvailabilityZoneUsageLimit. There are 2 selection schemes: @@ -2825,7 +2830,6 @@ spec: - Random type: string availabilityZoneUsageLimit: - default: 3 description: |- AvailabilityZoneUsageLimit specifies the maximum number of availability zones (AZ) that should be used in a region when automatically creating subnets. If a region has more @@ -2833,6 +2837,14 @@ spec: default subnets. Defaults to 3 minimum: 1 type: integer + availabilityZones: + description: |- + AvailabilityZones defines a list of Availability Zones in which to create network resources in. + Cannot be defined at the same time as AvailabilityZoneSelection and AvailabilityZoneUsageLimit. + items: + type: string + minItems: 1 + type: array carrierGatewayId: description: |- CarrierGatewayID is the id of the internet gateway associated with the VPC, diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsclusters.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsclusters.yaml index 7ff40608ac..26d6cce41f 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsclusters.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsclusters.yaml @@ -1626,7 +1626,6 @@ spec: description: VPC configuration. properties: availabilityZoneSelection: - default: Ordered description: |- AvailabilityZoneSelection specifies how AZs should be selected if there are more AZs in a region than specified by AvailabilityZoneUsageLimit. There are 2 selection schemes: @@ -1638,7 +1637,6 @@ spec: - Random type: string availabilityZoneUsageLimit: - default: 3 description: |- AvailabilityZoneUsageLimit specifies the maximum number of availability zones (AZ) that should be used in a region when automatically creating subnets. If a region has more @@ -1646,6 +1644,14 @@ spec: default subnets. Defaults to 3 minimum: 1 type: integer + availabilityZones: + description: |- + AvailabilityZones defines a list of Availability Zones in which to create network resources in. + Cannot be defined at the same time as AvailabilityZoneSelection and AvailabilityZoneUsageLimit. + items: + type: string + minItems: 1 + type: array carrierGatewayId: description: |- CarrierGatewayID is the id of the internet gateway associated with the VPC, diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsclustertemplates.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsclustertemplates.yaml index ab85f67681..e10cbd7eab 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsclustertemplates.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsclustertemplates.yaml @@ -1214,7 +1214,6 @@ spec: description: VPC configuration. properties: availabilityZoneSelection: - default: Ordered description: |- AvailabilityZoneSelection specifies how AZs should be selected if there are more AZs in a region than specified by AvailabilityZoneUsageLimit. There are 2 selection schemes: @@ -1226,7 +1225,6 @@ spec: - Random type: string availabilityZoneUsageLimit: - default: 3 description: |- AvailabilityZoneUsageLimit specifies the maximum number of availability zones (AZ) that should be used in a region when automatically creating subnets. If a region has more @@ -1234,6 +1232,14 @@ spec: default subnets. Defaults to 3 minimum: 1 type: integer + availabilityZones: + description: |- + AvailabilityZones defines a list of Availability Zones in which to create network resources in. + Cannot be defined at the same time as AvailabilityZoneSelection and AvailabilityZoneUsageLimit. + items: + type: string + minItems: 1 + type: array carrierGatewayId: description: |- CarrierGatewayID is the id of the internet gateway associated with the VPC, diff --git a/controlplane/eks/api/v1beta2/awsmanagedcontrolplane_webhook.go b/controlplane/eks/api/v1beta2/awsmanagedcontrolplane_webhook.go index 8970b29cd7..ec43f697fd 100644 --- a/controlplane/eks/api/v1beta2/awsmanagedcontrolplane_webhook.go +++ b/controlplane/eks/api/v1beta2/awsmanagedcontrolplane_webhook.go @@ -28,6 +28,7 @@ import ( "k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/apimachinery/pkg/util/version" "k8s.io/klog/v2" + "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/webhook" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" @@ -489,6 +490,10 @@ func (r *AWSManagedControlPlane) validateNetwork() field.ErrorList { allErrs = append(allErrs, field.Invalid(ipamPoolField, r.Spec.NetworkSpec.VPC.IPv6.IPAMPool, "ipamPool must have either id or name")) } + if err := r.Spec.NetworkSpec.VPC.ValidateAvailabilityZones(); err != nil { + allErrs = append(allErrs, err) + } + return allErrs } @@ -520,6 +525,16 @@ func (*awsManagedControlPlaneWebhook) Default(_ context.Context, obj runtime.Obj } } + // If AvailabilityZones are not set, set defaults for AZ selection + if r.Spec.NetworkSpec.VPC.AvailabilityZones == nil { + if r.Spec.NetworkSpec.VPC.AvailabilityZoneUsageLimit == nil { + r.Spec.NetworkSpec.VPC.AvailabilityZoneUsageLimit = ptr.To(3) + } + if r.Spec.NetworkSpec.VPC.AvailabilityZoneSelection == nil { + r.Spec.NetworkSpec.VPC.AvailabilityZoneSelection = &infrav1.AZSelectionSchemeOrdered + } + } + infrav1.SetDefaults_Bastion(&r.Spec.Bastion) infrav1.SetDefaults_NetworkSpec(&r.Spec.NetworkSpec) diff --git a/pkg/cloud/services/network/subnets.go b/pkg/cloud/services/network/subnets.go index 1cf991ec5b..6bc966c8ae 100644 --- a/pkg/cloud/services/network/subnets.go +++ b/pkg/cloud/services/network/subnets.go @@ -239,32 +239,18 @@ func (s *Service) reconcileZoneInfo(subnets infrav1.Subnets) error { } func (s *Service) getDefaultSubnets() (infrav1.Subnets, error) { - zones, err := s.getAvailableZones() - if err != nil { - return nil, err - } - - maxZones := defaultMaxNumAZs - if s.scope.VPC().AvailabilityZoneUsageLimit != nil { - maxZones = *s.scope.VPC().AvailabilityZoneUsageLimit - } - selectionScheme := infrav1.AZSelectionSchemeOrdered - if s.scope.VPC().AvailabilityZoneSelection != nil { - selectionScheme = *s.scope.VPC().AvailabilityZoneSelection - } + var ( + // By default, take the set availability zones. If they are not defined, + // fall back to `availabilityZoneUsageLimit`/`availabilityZoneSelection` + zones = s.scope.VPC().AvailabilityZones + err error + ) - if len(zones) > maxZones { - s.scope.Debug("region has more than AvailabilityZoneUsageLimit availability zones, picking zones to use", "region", s.scope.Region(), "AvailabilityZoneUsageLimit", maxZones) - if selectionScheme == infrav1.AZSelectionSchemeRandom { - rand.Shuffle(len(zones), func(i, j int) { - zones[i], zones[j] = zones[j], zones[i] - }) - } - if selectionScheme == infrav1.AZSelectionSchemeOrdered { - sort.Strings(zones) + if len(zones) == 0 { + zones, err = s.selectZones() + if err != nil { + return nil, errors.Wrap(err, "failed to select availability zones") } - zones = zones[:maxZones] - s.scope.Debug("zones selected", "region", s.scope.Region(), "zones", zones) } // 1 private subnet for each AZ plus 1 other subnet that will be further sub-divided for the public subnets or vice versa if @@ -354,6 +340,39 @@ func (s *Service) getDefaultSubnets() (infrav1.Subnets, error) { return subnets, nil } +func (s *Service) selectZones() ([]string, error) { + zones, err := s.getAvailableZones() + if err != nil { + return nil, err + } + + maxZones := defaultMaxNumAZs + if s.scope.VPC().AvailabilityZoneUsageLimit != nil { + maxZones = *s.scope.VPC().AvailabilityZoneUsageLimit + } + selectionScheme := infrav1.AZSelectionSchemeOrdered + if s.scope.VPC().AvailabilityZoneSelection != nil { + selectionScheme = *s.scope.VPC().AvailabilityZoneSelection + } + + if len(zones) > maxZones { + s.scope.Debug("region has more than AvailabilityZoneUsageLimit availability zones, picking zones to use", "region", s.scope.Region(), "AvailabilityZoneUsageLimit", maxZones) + switch selectionScheme { + case infrav1.AZSelectionSchemeRandom: + rand.Shuffle(len(zones), func(i, j int) { + zones[i], zones[j] = zones[j], zones[i] + }) + case infrav1.AZSelectionSchemeOrdered: + sort.Strings(zones) + default: + } + zones = zones[:maxZones] + s.scope.Debug("zones selected", "region", s.scope.Region(), "zones", zones) + } + + return zones, nil +} + func (s *Service) deleteSubnets() error { if s.scope.VPC().IsUnmanaged(s.scope.Name()) { s.scope.Trace("Skipping subnets deletion in unmanaged mode") diff --git a/pkg/cloud/services/network/subnets_test.go b/pkg/cloud/services/network/subnets_test.go index 11dbde0dd4..5d7ff0df8e 100644 --- a/pkg/cloud/services/network/subnets_test.go +++ b/pkg/cloud/services/network/subnets_test.go @@ -4162,6 +4162,77 @@ func TestReconcileSubnets(t *testing.T) { After(zone2PrivateSubnet) }, }, + { + name: "Managed VPC, no subnets exist, one az is explicitly defined, expect one private and one public from default", + input: NewClusterScope().WithNetwork(&infrav1.NetworkSpec{ + VPC: infrav1.VPCSpec{ + ID: subnetsVPCID, + Tags: infrav1.Tags{ + infrav1.ClusterTagKey("test-cluster"): "owned", + }, + CidrBlock: defaultVPCCidr, + AvailabilityZones: []string{"us-east-1b"}, + }, + Subnets: []infrav1.SubnetSpec{}, + }), + + expect: func(m *mocks.MockEC2APIMockRecorder) { + describeCall := m.DescribeSubnets(context.TODO(), gomock.Eq(&ec2.DescribeSubnetsInput{ + Filters: []types.Filter{ + { + Name: aws.String("state"), + Values: []string{"pending", "available"}, + }, + { + Name: aws.String("vpc-id"), + Values: []string{subnetsVPCID}, + }, + }, + })).Return(&ec2.DescribeSubnetsOutput{}, nil) + + m.DescribeAvailabilityZones(context.TODO(), gomock.Any()). + Return(&ec2.DescribeAvailabilityZonesOutput{ + AvailabilityZones: []types.AvailabilityZone{ + { + ZoneName: aws.String("us-east-1a"), + ZoneType: aws.String("availability-zone"), + }, + { + ZoneName: aws.String("us-east-1b"), + ZoneType: aws.String("availability-zone"), + }, + { + ZoneName: aws.String("us-east-1c"), + ZoneType: aws.String("availability-zone"), + }, + }, + }, nil).Times(3) // called once by getAvailableZones, and twice by createSubnet (one private, one public) + + m.DescribeRouteTables(context.TODO(), gomock.AssignableToTypeOf(&ec2.DescribeRouteTablesInput{})). + Return(&ec2.DescribeRouteTablesOutput{}, nil) + + m.DescribeNatGateways(context.TODO(), gomock.Eq(&ec2.DescribeNatGatewaysInput{ + Filter: []types.Filter{ + { + Name: aws.String("vpc-id"), + Values: []string{subnetsVPCID}, + }, + { + Name: aws.String("state"), + Values: []string{"pending", "available"}, + }, + }, + }), + gomock.Any()).Return(&ec2.DescribeNatGatewaysOutput{}, nil) + + zone1PublicSubnet := stubGenMockCreateSubnet(m, "test-cluster", "us-east-1b", "public", "10.0.0.0/17", false).After(describeCall) + + stubMockModifySubnetAttributeWithContext(m, "subnet-public-us-east-1b"). + After(zone1PublicSubnet) + + stubGenMockCreateSubnet(m, "test-cluster", "us-east-1b", "private", "10.0.128.0/17", false).After(zone1PublicSubnet) + }, + }, } for _, tc := range testCases {