Skip to content

Commit 172038a

Browse files
committed
🐛 awsmachine: only register machine to LB when it's running
The AWS docs [1] state that to register an instance with the Load Balancer target groups, the instance must be running. Currently, CAPA tries to register it while it's still pending, causing the following error: ``` 0314 22:24:18.512176 701419 awsmachine_controller.go:605] "failed to reconcile LB attachment" err=< [could not register machine to load balancer: could not register control plane instance "i-039bb22b1e8df7c99" with load balancer: failed to register instance with target group 'mrb-capa-67-d2pfx-int-22623': InvalidTarget: The following targets are not in a running state and cannot be registered: 'i-039bb22b1e8df7c99' status code: 400, request id: 17514354-77ac-42b1-a882-489760563bbd, could not register machine to load balancer: could not register control plane instance "i-039bb22b1e8df7c99" with load balancer: failed to register instance with target group 'mrb-capa-67-d2pfx-ext-6443': InvalidTarget: The following targets are not in a running state and cannot be registered: 'i-039bb22b1e8df7c99' status code: 400, request id: 84e1849c-abb7-4af9-9220-8791fdc1a3fb] > I0314 22:24:18.512325 701419 recorder.go:104] "events: Failed to register control plane instance \"i-039bb22b1e8df7c99\" with load balancer: failed to register instance with target group 'mrb-capa-67-d2pfx-ext-6443': InvalidTarget: The following targets are not in a running state and cannot be registered: 'i-039bb22b1e8df7c99'\n\tstatus code: 400, request id: 84e1849c-abb7-4af9-9220-8791fdc1a3fb" type="Warning" object={"kind":"AWSMachine","namespace":"openshift-cluster-api-guests","name":"mrb-capa-67-d2pfx-bootstrap","uid":"58af1162-380b-4f3e-93fe-c0e81401070e","apiVersion":"infrastructure.cluster.x-k8s.io/v1beta2","resourceVersion":"562"} reason="FailedAttachControlPlaneELB" ``` Even though this doesn't stop the install from succeeding, let's wait for the instance state to be "running" and with that avoid unnecessary AWS API calls. [1] https://docs.aws.amazon.com/elasticloadbalancing/latest/application/target-group-register-targets.html#register-instances
1 parent 1313226 commit 172038a

File tree

2 files changed

+30
-2
lines changed

2 files changed

+30
-2
lines changed

controllers/awsmachine_controller.go

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -619,8 +619,14 @@ func (r *AWSMachineReconciler) reconcileNormal(_ context.Context, machineScope *
619619
}
620620

621621
if err := r.reconcileLBAttachment(machineScope, elbScope, instance); err != nil {
622-
machineScope.Error(err, "failed to reconcile LB attachment")
623-
return ctrl.Result{}, err
622+
// We are tolerating InstanceNotRunning error, so we don't report it as an error condition.
623+
// Because we are reconciling all load balancers, attempt to treat the error as a list of errors.
624+
if err := kerrors.FilterOut(err, elb.IsInstanceNotRunning); err != nil {
625+
machineScope.Error(err, "failed to reconcile LB attachment")
626+
return ctrl.Result{}, err
627+
}
628+
// Cannot attach non-running instances to LB
629+
shouldRequeue = true
624630
}
625631
}
626632

@@ -1002,6 +1008,14 @@ func (r *AWSMachineReconciler) registerInstanceToV2LB(machineScope *scope.Machin
10021008
return nil
10031009
}
10041010

1011+
// See https://docs.aws.amazon.com/elasticloadbalancing/latest/application/target-group-register-targets.html#register-instances
1012+
if ptr.Deref(machineScope.GetInstanceState(), infrav1.InstanceStatePending) != infrav1.InstanceStateRunning {
1013+
r.Recorder.Eventf(machineScope.AWSMachine, corev1.EventTypeWarning, "FailedAttachControlPlaneELB",
1014+
"Cannot register control plane instance %q with load balancer: instance is not running", instance.ID)
1015+
conditions.MarkFalse(machineScope.AWSMachine, infrav1.ELBAttachedCondition, infrav1.ELBAttachFailedReason, clusterv1.ConditionSeverityInfo, "instance not running")
1016+
return elb.NewInstanceNotRunning("instance is not running")
1017+
}
1018+
10051019
if err := elbsvc.RegisterInstanceWithAPIServerLB(instance, lb); err != nil {
10061020
r.Recorder.Eventf(machineScope.AWSMachine, corev1.EventTypeWarning, "FailedAttachControlPlaneELB",
10071021
"Failed to register control plane instance %q with load balancer: %v", instance.ID, err)

pkg/cloud/services/elb/errors.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,15 @@ func NewConflict(msg string) error {
5656
}
5757
}
5858

59+
// NewInstanceNotRunning returns an error which indicates that the request cannot be processed due to the instance not
60+
// being in a running state.
61+
func NewInstanceNotRunning(msg string) error {
62+
return &ELBError{
63+
msg: msg,
64+
Code: http.StatusTooEarly,
65+
}
66+
}
67+
5968
// IsNotFound returns true if the error was created by NewNotFound.
6069
func IsNotFound(err error) bool {
6170
if ReasonForError(err) == http.StatusNotFound {
@@ -90,6 +99,11 @@ func IsSDKError(err error) (ok bool) {
9099
return
91100
}
92101

102+
// IsInstanceNotRunning returns true if the error was created by NewInstanceNotRunning.
103+
func IsInstanceNotRunning(err error) (ok bool) {
104+
return ReasonForError(err) == http.StatusTooEarly
105+
}
106+
93107
// ReasonForError returns the HTTP status for a particular error.
94108
func ReasonForError(err error) int {
95109
if t, ok := errors.Cause(err).(*ELBError); ok {

0 commit comments

Comments
 (0)