Skip to content

Commit 32ebcce

Browse files
committed
🐛elbv2: wait for LB active state instead of resolving DNS name
Using DNS name resolution as a way to check the load balancer is working can cause problems that are dependent on the host running CAPA. In some systems, the DNS resolution can fail with very large TTLs cached DNS responses, causing very long provisioning times. Instead of DNS resolution, let's use the AWS API to check for the load balancer "active" state. Waiting for resolvable DNS names should be left for the clients to do.
1 parent b25eef6 commit 32ebcce

File tree

5 files changed

+27
-33
lines changed

5 files changed

+27
-33
lines changed

controllers/awscluster_controller.go

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package controllers
1919
import (
2020
"context"
2121
"fmt"
22-
"net"
2322
"time"
2423

2524
"github.com/google/go-cmp/cmp"
@@ -288,13 +287,6 @@ func (r *AWSClusterReconciler) reconcileLoadBalancer(clusterScope *scope.Cluster
288287
return &retryAfterDuration, nil
289288
}
290289

291-
clusterScope.Debug("Looking up IP address for DNS", "dns", awsCluster.Status.Network.APIServerELB.DNSName)
292-
if _, err := net.LookupIP(awsCluster.Status.Network.APIServerELB.DNSName); err != nil {
293-
clusterScope.Error(err, "failed to get IP address for dns name", "dns", awsCluster.Status.Network.APIServerELB.DNSName)
294-
conditions.MarkFalse(awsCluster, infrav1.LoadBalancerReadyCondition, infrav1.WaitForDNSNameResolveReason, clusterv1.ConditionSeverityInfo, "")
295-
clusterScope.Info("Waiting on API server ELB DNS name to resolve")
296-
return &retryAfterDuration, nil
297-
}
298290
conditions.MarkTrue(awsCluster, infrav1.LoadBalancerReadyCondition)
299291

300292
awsCluster.Spec.ControlPlaneEndpoint = clusterv1.APIEndpoint{

controllers/awscluster_controller_unit_test.go

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -395,31 +395,6 @@ func TestAWSClusterReconcileOperations(t *testing.T) {
395395
g.Expect(err).To(BeNil())
396396
expectAWSClusterConditions(g, cs.AWSCluster, []conditionAssertion{{infrav1.LoadBalancerReadyCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitForDNSNameReason}})
397397
})
398-
t.Run("Should fail AWSCluster create with LoadBalancer reconcile failure with WaitForDNSNameResolve condition as false", func(t *testing.T) {
399-
g := NewWithT(t)
400-
awsCluster := getAWSCluster("test", "test")
401-
runningCluster := func() {
402-
networkSvc.EXPECT().ReconcileNetwork().Return(nil)
403-
sgSvc.EXPECT().ReconcileSecurityGroups().Return(nil)
404-
ec2Svc.EXPECT().ReconcileBastion().Return(nil)
405-
elbSvc.EXPECT().ReconcileLoadbalancers().Return(nil)
406-
}
407-
csClient := setup(t, &awsCluster)
408-
defer teardown()
409-
runningCluster()
410-
cs, err := scope.NewClusterScope(
411-
scope.ClusterScopeParams{
412-
Client: csClient,
413-
Cluster: &clusterv1.Cluster{},
414-
AWSCluster: &awsCluster,
415-
},
416-
)
417-
awsCluster.Status.Network.APIServerELB.DNSName = "test-apiserver.us-east-1.aws"
418-
g.Expect(err).To(BeNil())
419-
_, err = reconciler.reconcileNormal(cs)
420-
g.Expect(err).To(BeNil())
421-
expectAWSClusterConditions(g, cs.AWSCluster, []conditionAssertion{{infrav1.LoadBalancerReadyCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitForDNSNameResolveReason}})
422-
})
423398
})
424399
})
425400
t.Run("Reconcile delete AWSCluster", func(t *testing.T) {

controllers/helpers_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,9 @@ func mockedCreateLBV2Calls(t *testing.T, m *mocks.MockELBV2APIMockRecorder) {
291291
LoadBalancerArn: lbArn,
292292
SecurityGroups: aws.StringSlice([]string{"sg-apiserver-lb"}),
293293
})).MaxTimes(1)
294+
m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{
295+
LoadBalancerArns: []*string{lbArn},
296+
})).MaxTimes(1)
294297
}
295298

296299
func mockedDescribeTargetGroupsCall(t *testing.T, m *mocks.MockELBV2APIMockRecorder) {

pkg/cloud/services/elb/loadbalancer.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,17 @@ func (s *Service) reconcileV2LB(lbSpec *infrav1.AWSLoadBalancerSpec) error {
121121
return err
122122
}
123123

124+
wReq := &elbv2.DescribeLoadBalancersInput{
125+
LoadBalancerArns: aws.StringSlice([]string{lb.ARN}),
126+
}
127+
s.scope.Debug("Waiting for LB to become active", "api-server-lb-name", lb.Name)
128+
waitStart := time.Now()
129+
if err := s.ELBV2Client.WaitUntilLoadBalancerAvailableWithContext(context.TODO(), wReq); err != nil {
130+
s.scope.Error(err, "failed to wait for LB to become available", "time", time.Since(waitStart))
131+
return err
132+
}
133+
s.scope.Debug("LB reports active state", "api-server-lb-name", lb.Name, "time", time.Since(waitStart))
134+
124135
// set up the type for later processing
125136
lb.LoadBalancerType = lbSpec.LoadBalancerType
126137
if lb.IsManaged(s.scope.Name()) {

pkg/cloud/services/elb/loadbalancer_test.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2275,6 +2275,9 @@ func TestReconcileV2LB(t *testing.T) {
22752275
},
22762276
nil,
22772277
)
2278+
m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{
2279+
LoadBalancerArns: aws.StringSlice([]string{elbArn}),
2280+
})).Return(nil)
22782281
},
22792282
check: func(t *testing.T, lb *infrav1.LoadBalancer, err error) {
22802283
t.Helper()
@@ -2476,6 +2479,10 @@ func TestReconcileV2LB(t *testing.T) {
24762479
LoadBalancerArn: aws.String(elbArn),
24772480
Subnets: []*string{},
24782481
}).Return(&elbv2.SetSubnetsOutput{}, nil)
2482+
2483+
m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{
2484+
LoadBalancerArns: aws.StringSlice([]string{elbArn}),
2485+
})).Return(nil)
24792486
},
24802487
check: func(t *testing.T, lb *infrav1.LoadBalancer, err error) {
24812488
t.Helper()
@@ -2658,6 +2665,12 @@ func TestReconcileLoadbalancers(t *testing.T) {
26582665
},
26592666
nil,
26602667
)
2668+
m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{
2669+
LoadBalancerArns: aws.StringSlice([]string{elbArn}),
2670+
})).Return(nil)
2671+
m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{
2672+
LoadBalancerArns: aws.StringSlice([]string{secondElbArn}),
2673+
})).Return(nil)
26612674
},
26622675
check: func(t *testing.T, firstLB *infrav1.LoadBalancer, secondLB *infrav1.LoadBalancer, err error) {
26632676
t.Helper()

0 commit comments

Comments
 (0)