diff --git a/controllers/awsmachine_controller.go b/controllers/awsmachine_controller.go index 8f5773d0f1..56c7cb983f 100644 --- a/controllers/awsmachine_controller.go +++ b/controllers/awsmachine_controller.go @@ -533,17 +533,20 @@ func (r *AWSMachineReconciler) reconcileNormal(_ context.Context, machineScope * } // BYO Public IPv4 Pool feature: allocates and associates an EIP to machine when PublicIP and - // cluster-wide Public IPv4 Pool configuration are set. The EIP must be associated after the instance - // is created and transictioned from Pending state. - // In the regular flow, if the instance have already a public IPv4 address (EIP) associated it will - // be released when a new is assigned, the createInstance() prevents that behavior by enforcing - // to not launch an instance with EIP, allowing ReconcileElasticIPFromPublicPool assigning - // a BYOIP without duplication. + // cluster-wide config Public IPv4 Pool configuration are set. The custom EIP is associated + // after the instance is created and transictioned to Running state. + // The CreateInstance() is enforcing to not assign public IP address when PublicIP is set with + // BYOIpv4 Pool, preventing a duplicated EIP creation. if pool := machineScope.GetElasticIPPool(); pool != nil { - if err := ec2svc.ReconcileElasticIPFromPublicPool(pool, instance); err != nil { - machineScope.Error(err, "failed to associate elastic IP address") + requeue, err := ec2svc.ReconcileElasticIPFromPublicPool(pool, instance) + if err != nil { + machineScope.Error(err, "Failed to reconcile BYO Public IPv4") return ctrl.Result{}, err } + if requeue { + machineScope.Debug("Found instance in pending state while reconciling publicIpv4Pool, requeue", "instance", instance.ID) + return ctrl.Result{RequeueAfter: DefaultReconcilerRequeue}, nil + } } if feature.Gates.Enabled(feature.EventBridgeInstanceState) { diff --git a/controllers/awsmachine_controller_unit_test.go b/controllers/awsmachine_controller_unit_test.go index 78058f3cec..9395db0794 100644 --- a/controllers/awsmachine_controller_unit_test.go +++ b/controllers/awsmachine_controller_unit_test.go @@ -1023,7 +1023,7 @@ func TestAWSMachineReconciler(t *testing.T) { PublicIpv4Pool: aws.String("ipv4pool-ec2-0123456789abcdef0"), PublicIpv4PoolFallBackOrder: ptr.To(infrav1.PublicIpv4PoolFallbackOrderAmazonPool), } - ec2Svc.EXPECT().ReconcileElasticIPFromPublicPool(gomock.Any(), gomock.Any()).Return(nil).AnyTimes() + ec2Svc.EXPECT().ReconcileElasticIPFromPublicPool(gomock.Any(), gomock.Any()).Return(false, nil).AnyTimes() _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs, cs) g.Expect(err).To(BeNil()) diff --git a/pkg/cloud/services/ec2/eip.go b/pkg/cloud/services/ec2/eip.go index 77484e201c..d638ace370 100644 --- a/pkg/cloud/services/ec2/eip.go +++ b/pkg/cloud/services/ec2/eip.go @@ -6,6 +6,7 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" + "k8s.io/utils/ptr" infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/record" @@ -16,17 +17,51 @@ func getElasticIPRoleName(instanceID string) string { } // ReconcileElasticIPFromPublicPool reconciles the elastic IP from a custom Public IPv4 Pool. -func (s *Service) ReconcileElasticIPFromPublicPool(pool *infrav1.ElasticIPPool, instance *infrav1.Instance) error { - // TODO: check if the instance is in the state allowing EIP association. - // Expected instance states: pending or running - // https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-lifecycle.html +func (s *Service) ReconcileElasticIPFromPublicPool(pool *infrav1.ElasticIPPool, instance *infrav1.Instance) (bool, error) { + shouldRequeue := true + // Should not happen + if pool == nil { + return shouldRequeue, fmt.Errorf("unexpected behavior, pool must be set when reconcile ElasticIPPool") + } + iip := ptr.Deref(instance.PublicIP, "") + s.scope.Debug("Reconciling machine with custom Public IPv4 Pool", "instance-id", instance.ID, "instance-state", instance.State, "instance-public-ip", iip, "publicIpv4PoolID", pool.PublicIpv4Pool) + + // Requeue when the instance is not ready to be associated. + if instance.State != infrav1.InstanceStateRunning { + s.scope.Debug("Unable to reconcile Elastic IP Pool for instance", "instance-id", instance.ID, "instance-state", instance.State) + return shouldRequeue, nil + } + + // All done, must reconcile only when the instance is in running state. + shouldRequeue = false + + // Prevent running association every reconciliation when it is already done. + addrs, err := s.netService.GetAddresses(getElasticIPRoleName(instance.ID)) + if err != nil { + s.scope.Error(err, "error checking if addresses exists for Elastic IP Pool to machine", "eip-role", getElasticIPRoleName(instance.ID)) + return shouldRequeue, err + } + if len(addrs.Addresses) > 0 { + if len(addrs.Addresses) != 1 { + return shouldRequeue, fmt.Errorf("unexpected number of EIPs allocated to the role. expected 1, got %d", len(addrs.Addresses)) + } + addr := addrs.Addresses[0] + // address is already associated. + if addr.AssociationId != nil && addr.InstanceId != nil && *addr.InstanceId == instance.ID { + s.scope.Debug("Machine is already associated with an Elastic IP with custom Public IPv4 pool", "eip", addr.AllocationId, "eip-address", addr.PublicIp, "eip-associationID", addr.AssociationId, "eip-instance", addr.InstanceId) + return shouldRequeue, nil + } + } + + // Get existing, or allocate an EIP, then Associate to the machine. + // Should requeue if any error is returned in the process. if err := s.getAndAssociateAddressesToInstance(pool, getElasticIPRoleName(instance.ID), instance.ID); err != nil { - return fmt.Errorf("failed to reconcile Elastic IP: %w", err) + return true, fmt.Errorf("failed to reconcile Elastic IP: %w", err) } - return nil + return shouldRequeue, nil } -// ReleaseElasticIP releases a specific elastic IP based on the instance role. +// ReleaseElasticIP releases a specific Elastic IP based on the instance role. func (s *Service) ReleaseElasticIP(instanceID string) error { return s.netService.ReleaseAddressByRole(getElasticIPRoleName(instanceID)) } diff --git a/pkg/cloud/services/ec2/instances.go b/pkg/cloud/services/ec2/instances.go index ac11fea9fd..0742b0c589 100644 --- a/pkg/cloud/services/ec2/instances.go +++ b/pkg/cloud/services/ec2/instances.go @@ -572,21 +572,13 @@ func (s *Service) runInstance(role string, i *infrav1.Instance) (*infrav1.Instan input.NetworkInterfaces = netInterfaces } else { - if ptr.Deref(i.PublicIPOnLaunch, false) { - input.NetworkInterfaces = []*ec2.InstanceNetworkInterfaceSpecification{ - { - DeviceIndex: aws.Int64(0), - SubnetId: aws.String(i.SubnetID), - Groups: aws.StringSlice(i.SecurityGroupIDs), - AssociatePublicIpAddress: i.PublicIPOnLaunch, - }, - } - } else { - input.SubnetId = aws.String(i.SubnetID) - - if len(i.SecurityGroupIDs) > 0 { - input.SecurityGroupIds = aws.StringSlice(i.SecurityGroupIDs) - } + input.NetworkInterfaces = []*ec2.InstanceNetworkInterfaceSpecification{ + { + DeviceIndex: aws.Int64(0), + SubnetId: aws.String(i.SubnetID), + Groups: aws.StringSlice(i.SecurityGroupIDs), + AssociatePublicIpAddress: i.PublicIPOnLaunch, + }, } } diff --git a/pkg/cloud/services/ec2/instances_test.go b/pkg/cloud/services/ec2/instances_test.go index 654403427f..0f5ab73781 100644 --- a/pkg/cloud/services/ec2/instances_test.go +++ b/pkg/cloud/services/ec2/instances_test.go @@ -672,11 +672,16 @@ func TestCreateInstance(t *testing.T) { }, nil) m. RunInstancesWithContext(context.TODO(), &ec2.RunInstancesInput{ - ImageId: aws.String("abc"), - InstanceType: aws.String("m5.2xlarge"), - KeyName: aws.String("default"), - SecurityGroupIds: aws.StringSlice([]string{"2", "3"}), - SubnetId: aws.String("subnet-3"), + ImageId: aws.String("abc"), + InstanceType: aws.String("m5.2xlarge"), + KeyName: aws.String("default"), + NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{ + { + DeviceIndex: aws.Int64(0), + SubnetId: aws.String("subnet-3"), + Groups: aws.StringSlice([]string{"2", "3"}), + }, + }, TagSpecifications: []*ec2.TagSpecification{ { ResourceType: aws.String("instance"), @@ -920,11 +925,16 @@ func TestCreateInstance(t *testing.T) { }, nil) m. RunInstancesWithContext(context.TODO(), &ec2.RunInstancesInput{ - ImageId: aws.String("abc"), - InstanceType: aws.String("m5.2xlarge"), - KeyName: aws.String("default"), - SecurityGroupIds: aws.StringSlice([]string{"4", "3"}), - SubnetId: aws.String("subnet-5"), + ImageId: aws.String("abc"), + InstanceType: aws.String("m5.2xlarge"), + KeyName: aws.String("default"), + NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{ + { + DeviceIndex: aws.Int64(0), + SubnetId: aws.String("subnet-5"), + Groups: aws.StringSlice([]string{"4", "3"}), + }, + }, TagSpecifications: []*ec2.TagSpecification{ { ResourceType: aws.String("instance"), @@ -3346,8 +3356,13 @@ func TestCreateInstance(t *testing.T) { Placement: &ec2.Placement{ Tenancy: &tenancy, }, - SecurityGroupIds: []*string{aws.String("2"), aws.String("3")}, - SubnetId: aws.String("subnet-1"), + NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{ + { + DeviceIndex: aws.Int64(0), + SubnetId: aws.String("subnet-1"), + Groups: []*string{aws.String("2"), aws.String("3")}, + }, + }, TagSpecifications: []*ec2.TagSpecification{ { ResourceType: aws.String("instance"), @@ -3555,8 +3570,13 @@ func TestCreateInstance(t *testing.T) { Placement: &ec2.Placement{ GroupName: aws.String("placement-group1"), }, - SecurityGroupIds: []*string{aws.String("2"), aws.String("3")}, - SubnetId: aws.String("subnet-1"), + NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{ + { + DeviceIndex: aws.Int64(0), + SubnetId: aws.String("subnet-1"), + Groups: []*string{aws.String("2"), aws.String("3")}, + }, + }, TagSpecifications: []*ec2.TagSpecification{ { ResourceType: aws.String("instance"), @@ -3785,8 +3805,13 @@ func TestCreateInstance(t *testing.T) { Tenancy: &tenancy, GroupName: aws.String("placement-group1"), }, - SecurityGroupIds: []*string{aws.String("2"), aws.String("3")}, - SubnetId: aws.String("subnet-1"), + NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{ + { + DeviceIndex: aws.Int64(0), + SubnetId: aws.String("subnet-1"), + Groups: []*string{aws.String("2"), aws.String("3")}, + }, + }, TagSpecifications: []*ec2.TagSpecification{ { ResourceType: aws.String("instance"), @@ -3976,8 +4001,13 @@ func TestCreateInstance(t *testing.T) { GroupName: aws.String("placement-group1"), PartitionNumber: aws.Int64(2), }, - SecurityGroupIds: []*string{aws.String("2"), aws.String("3")}, - SubnetId: aws.String("subnet-1"), + NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{ + { + DeviceIndex: aws.Int64(0), + SubnetId: aws.String("subnet-1"), + Groups: aws.StringSlice([]string{"2", "3"}), + }, + }, TagSpecifications: []*ec2.TagSpecification{ { ResourceType: aws.String("instance"), diff --git a/pkg/cloud/services/interfaces.go b/pkg/cloud/services/interfaces.go index f993d9bd84..5e0f3dc2e6 100644 --- a/pkg/cloud/services/interfaces.go +++ b/pkg/cloud/services/interfaces.go @@ -82,7 +82,7 @@ type EC2Interface interface { DeleteBastion() error ReconcileBastion() error // ReconcileElasticIPFromPublicPool reconciles the elastic IP from a custom Public IPv4 Pool. - ReconcileElasticIPFromPublicPool(pool *infrav1.ElasticIPPool, instance *infrav1.Instance) error + ReconcileElasticIPFromPublicPool(pool *infrav1.ElasticIPPool, instance *infrav1.Instance) (bool, error) // ReleaseElasticIP reconciles the elastic IP from a custom Public IPv4 Pool. ReleaseElasticIP(instanceID string) error diff --git a/pkg/cloud/services/mock_services/ec2_interface_mock.go b/pkg/cloud/services/mock_services/ec2_interface_mock.go index 02c3e09c47..d46dc001e4 100644 --- a/pkg/cloud/services/mock_services/ec2_interface_mock.go +++ b/pkg/cloud/services/mock_services/ec2_interface_mock.go @@ -334,11 +334,12 @@ func (mr *MockEC2InterfaceMockRecorder) ReconcileBastion() *gomock.Call { } // ReconcileElasticIPFromPublicPool mocks base method. -func (m *MockEC2Interface) ReconcileElasticIPFromPublicPool(arg0 *v1beta2.ElasticIPPool, arg1 *v1beta2.Instance) error { +func (m *MockEC2Interface) ReconcileElasticIPFromPublicPool(arg0 *v1beta2.ElasticIPPool, arg1 *v1beta2.Instance) (bool, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "ReconcileElasticIPFromPublicPool", arg0, arg1) - ret0, _ := ret[0].(error) - return ret0 + ret0, _ := ret[0].(bool) + ret1, _ := ret[1].(error) + return ret0, ret1 } // ReconcileElasticIPFromPublicPool indicates an expected call of ReconcileElasticIPFromPublicPool. diff --git a/pkg/cloud/services/network/eips.go b/pkg/cloud/services/network/eips.go index f301650797..0c546af5bc 100644 --- a/pkg/cloud/services/network/eips.go +++ b/pkg/cloud/services/network/eips.go @@ -191,12 +191,17 @@ func (s *Service) GetOrAllocateAddresses(pool *infrav1.ElasticIPPool, num int, r return s.getOrAllocateAddresses(num, role, pool) } +// GetAddresses returns the address associated to a given role. +func (s *Service) GetAddresses(role string) (*ec2.DescribeAddressesOutput, error) { + return s.describeAddresses(role) +} + // ReleaseAddressByRole releases EIP addresses filtering by tag CAPA provider role. func (s *Service) ReleaseAddressByRole(role string) error { - clusterFilter := []*ec2.Filter{filter.EC2.Cluster(s.scope.Name())} - clusterFilter = append(clusterFilter, filter.EC2.ProviderRole(role)) - - return s.releaseAddressesWithFilter(clusterFilter) + return s.releaseAddressesWithFilter([]*ec2.Filter{ + filter.EC2.ClusterOwned(s.scope.Name()), + filter.EC2.ProviderRole(role), + }) } // setByoPublicIpv4 check if the config has Public IPv4 Pool defined, then