Skip to content

Commit 582357a

Browse files
committed
Query AWS to find subnets with explicitely specified subnet IDs
This basically unifies the code used to find subnets with filters and subnets by ID. This is done by handling the ID case as a simple filter instead of looking into the cluster network spec. The reason is that explicitely specified subnets are not necessarely part of the cluster network spec. For example, it might be desired to run a EKS control plane inside 3 specific subnets (so they are part of the network spec) while running workers in different subnets from the same VPC. A side effect of this change is that we don't have to check for the correct failure domain and/or public/private subnets anymore as this is implicitely handled by the criterias.
1 parent 655abf1 commit 582357a

File tree

2 files changed

+216
-40
lines changed

2 files changed

+216
-40
lines changed

pkg/cloud/services/ec2/instances.go

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -265,57 +265,59 @@ func (s *Service) findSubnet(scope *scope.MachineScope) (string, error) {
265265
failureDomain = scope.AWSMachine.Spec.FailureDomain
266266
}
267267

268+
// We basically have 2 sources for subnets:
269+
// 1. If subnet.id or subnet.filters are specified, we directly query AWS
270+
// 2. All other cases use the subnets provided in the cluster network spec without ever calling AWS
271+
268272
switch {
269-
case scope.AWSMachine.Spec.Subnet != nil && scope.AWSMachine.Spec.Subnet.ID != nil:
270-
subnet := s.scope.Subnets().FindByID(*scope.AWSMachine.Spec.Subnet.ID)
271-
if subnet == nil {
272-
errMessage := fmt.Sprintf("failed to run machine %q, subnet with id %q not found",
273-
scope.Name(), aws.StringValue(scope.AWSMachine.Spec.Subnet.ID))
274-
record.Warnf(scope.AWSMachine, "FailedCreate", errMessage)
275-
return "", awserrors.NewFailedDependency(errMessage)
276-
}
277-
if scope.AWSMachine.Spec.PublicIP != nil && *scope.AWSMachine.Spec.PublicIP {
278-
if !subnet.IsPublic {
279-
errMessage := fmt.Sprintf("failed to run machine %q with public IP, a specified subnet %q is a private subnet",
280-
scope.Name(), aws.StringValue(scope.AWSMachine.Spec.Subnet.ID))
281-
record.Eventf(scope.AWSMachine, "FailedCreate", errMessage)
282-
return "", awserrors.NewFailedDependency(errMessage)
283-
}
284-
}
285-
if failureDomain != nil && subnet.AvailabilityZone != *failureDomain {
286-
errMessage := fmt.Sprintf("failed to run machine %q, subnet's availability zone %q does not match with the failure domain %q",
287-
scope.Name(), subnet.AvailabilityZone, *failureDomain)
288-
record.Warnf(scope.AWSMachine, "FailedCreate", errMessage)
289-
return "", awserrors.NewFailedDependency(errMessage)
290-
}
291-
return subnet.ID, nil
292-
case scope.AWSMachine.Spec.Subnet != nil && scope.AWSMachine.Spec.Subnet.Filters != nil:
273+
case scope.AWSMachine.Spec.Subnet != nil && (scope.AWSMachine.Spec.Subnet.ID != nil || scope.AWSMachine.Spec.Subnet.Filters != nil):
293274
criteria := []*ec2.Filter{
294275
filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
295276
}
296277
if !scope.IsExternallyManaged() {
297278
criteria = append(criteria, filter.EC2.VPC(s.scope.VPC().ID))
298279
}
299-
if failureDomain != nil {
300-
criteria = append(criteria, filter.EC2.AvailabilityZone(*failureDomain))
301-
}
302-
if scope.AWSMachine.Spec.PublicIP != nil && *scope.AWSMachine.Spec.PublicIP {
303-
criteria = append(criteria, &ec2.Filter{Name: aws.String("map-public-ip-on-launch"), Values: aws.StringSlice([]string{"true"})})
280+
if scope.AWSMachine.Spec.Subnet.ID != nil {
281+
criteria = append(criteria, &ec2.Filter{Name: aws.String("subnet-id"), Values: aws.StringSlice([]string{*scope.AWSMachine.Spec.Subnet.ID})})
304282
}
305283
for _, f := range scope.AWSMachine.Spec.Subnet.Filters {
306284
criteria = append(criteria, &ec2.Filter{Name: aws.String(f.Name), Values: aws.StringSlice(f.Values)})
307285
}
286+
308287
subnets, err := s.getFilteredSubnets(criteria...)
309288
if err != nil {
310289
return "", errors.Wrapf(err, "failed to filter subnets for criteria %q", criteria)
311290
}
312291
if len(subnets) == 0 {
313-
errMessage := fmt.Sprintf("failed to run machine %q, no subnets available matching filters %q",
314-
scope.Name(), scope.AWSMachine.Spec.Subnet.Filters)
292+
errMessage := fmt.Sprintf("failed to run machine %q, no subnets available matching criteria %q",
293+
scope.Name(), criteria)
294+
record.Warnf(scope.AWSMachine, "FailedCreate", errMessage)
295+
return "", awserrors.NewFailedDependency(errMessage)
296+
}
297+
298+
var filtered []*ec2.Subnet
299+
var errMessage string
300+
for _, subnet := range subnets {
301+
if failureDomain != nil && *subnet.AvailabilityZone != *failureDomain {
302+
// we could have included the failure domain in the query criteria, but then we end up with EC2 error
303+
// messages that don't give a good hint about what is really wrong
304+
errMessage += fmt.Sprintf(" subnet %q availability zone %q does not match failure domain %q.",
305+
*subnet.SubnetId, *subnet.AvailabilityZone, *failureDomain)
306+
continue
307+
}
308+
if scope.AWSMachine.Spec.PublicIP != nil && *scope.AWSMachine.Spec.PublicIP && !*subnet.MapPublicIpOnLaunch {
309+
errMessage += fmt.Sprintf(" subnet %q is a private subnet.", *subnet.SubnetId)
310+
continue
311+
}
312+
filtered = append(filtered, subnet)
313+
}
314+
if len(filtered) == 0 {
315+
errMessage = fmt.Sprintf("failed to run machine %q, found %d subnets matching criteria but post-filtering failed.",
316+
scope.Name(), len(subnets)) + errMessage
315317
record.Warnf(scope.AWSMachine, "FailedCreate", errMessage)
316318
return "", awserrors.NewFailedDependency(errMessage)
317319
}
318-
return *subnets[0].SubnetId, nil
320+
return *filtered[0].SubnetId, nil
319321
case failureDomain != nil:
320322
if scope.AWSMachine.Spec.PublicIP != nil && *scope.AWSMachine.Spec.PublicIP {
321323
subnets := s.scope.Subnets().FilterPublic().FilterByZone(*failureDomain)

pkg/cloud/services/ec2/instances_test.go

Lines changed: 182 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -952,13 +952,13 @@ func TestCreateInstance(t *testing.T) {
952952
Filters: []*ec2.Filter{
953953
filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
954954
filter.EC2.VPC("vpc-id"),
955-
filter.EC2.AvailabilityZone("us-east-1b"),
956955
{Name: aws.String("tag:some-tag"), Values: aws.StringSlice([]string{"some-value"})},
957956
},
958957
}).
959958
Return(&ec2.DescribeSubnetsOutput{
960959
Subnets: []*ec2.Subnet{{
961-
SubnetId: aws.String("filtered-subnet-1"),
960+
SubnetId: aws.String("filtered-subnet-1"),
961+
AvailabilityZone: aws.String("us-east-1b"),
962962
}},
963963
}, nil)
964964
m.
@@ -1053,6 +1053,20 @@ func TestCreateInstance(t *testing.T) {
10531053
},
10541054
},
10551055
expect: func(m *mock_ec2iface.MockEC2APIMockRecorder) {
1056+
m.
1057+
DescribeSubnets(&ec2.DescribeSubnetsInput{
1058+
Filters: []*ec2.Filter{
1059+
filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
1060+
filter.EC2.VPC("vpc-id"),
1061+
{Name: aws.String("subnet-id"), Values: aws.StringSlice([]string{"matching-subnet"})},
1062+
},
1063+
}).
1064+
Return(&ec2.DescribeSubnetsOutput{
1065+
Subnets: []*ec2.Subnet{{
1066+
SubnetId: aws.String("matching-subnet"),
1067+
AvailabilityZone: aws.String("us-east-1b"),
1068+
}},
1069+
}, nil)
10561070
m.
10571071
RunInstances(gomock.Any()).
10581072
Return(&ec2.Reservation{
@@ -1093,7 +1107,7 @@ func TestCreateInstance(t *testing.T) {
10931107
},
10941108
},
10951109
{
1096-
name: "with subnet ID that does not belong to Cluster",
1110+
name: "with subnet ID that does not exist",
10971111
machine: clusterv1.Machine{
10981112
ObjectMeta: metav1.ObjectMeta{
10991113
Labels: map[string]string{"set": "node"},
@@ -1145,9 +1159,20 @@ func TestCreateInstance(t *testing.T) {
11451159
},
11461160
},
11471161
expect: func(m *mock_ec2iface.MockEC2APIMockRecorder) {
1162+
m.
1163+
DescribeSubnets(&ec2.DescribeSubnetsInput{
1164+
Filters: []*ec2.Filter{
1165+
filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
1166+
filter.EC2.VPC("vpc-id"),
1167+
{Name: aws.String("subnet-id"), Values: aws.StringSlice([]string{"non-matching-subnet"})},
1168+
},
1169+
}).
1170+
Return(&ec2.DescribeSubnetsOutput{
1171+
Subnets: []*ec2.Subnet{},
1172+
}, nil)
11481173
},
11491174
check: func(instance *infrav1.Instance, err error) {
1150-
expectedErrMsg := "failed to run machine \"aws-test1\", subnet with id \"non-matching-subnet\" not found"
1175+
expectedErrMsg := "failed to run machine \"aws-test1\", no subnets available matching criteria"
11511176
if err == nil {
11521177
t.Fatalf("Expected error, but got nil")
11531178
}
@@ -1157,6 +1182,111 @@ func TestCreateInstance(t *testing.T) {
11571182
}
11581183
},
11591184
},
1185+
{
1186+
name: "with subnet ID that does not belong to Cluster",
1187+
machine: clusterv1.Machine{
1188+
ObjectMeta: metav1.ObjectMeta{
1189+
Labels: map[string]string{"set": "node"},
1190+
},
1191+
Spec: clusterv1.MachineSpec{
1192+
Bootstrap: clusterv1.Bootstrap{
1193+
DataSecretName: pointer.StringPtr("bootstrap-data"),
1194+
},
1195+
},
1196+
},
1197+
machineConfig: &infrav1.AWSMachineSpec{
1198+
AMI: infrav1.AMIReference{
1199+
ID: aws.String("abc"),
1200+
},
1201+
InstanceType: "m5.large",
1202+
Subnet: &infrav1.AWSResourceReference{
1203+
ID: aws.String("matching-subnet"),
1204+
},
1205+
},
1206+
awsCluster: &infrav1.AWSCluster{
1207+
ObjectMeta: metav1.ObjectMeta{Name: "test"},
1208+
Spec: infrav1.AWSClusterSpec{
1209+
NetworkSpec: infrav1.NetworkSpec{
1210+
VPC: infrav1.VPCSpec{
1211+
ID: "vpc-id",
1212+
},
1213+
Subnets: infrav1.Subnets{{
1214+
ID: "subnet-1",
1215+
}},
1216+
},
1217+
},
1218+
Status: infrav1.AWSClusterStatus{
1219+
Network: infrav1.NetworkStatus{
1220+
SecurityGroups: map[infrav1.SecurityGroupRole]infrav1.SecurityGroup{
1221+
infrav1.SecurityGroupControlPlane: {
1222+
ID: "1",
1223+
},
1224+
infrav1.SecurityGroupNode: {
1225+
ID: "2",
1226+
},
1227+
infrav1.SecurityGroupLB: {
1228+
ID: "3",
1229+
},
1230+
},
1231+
APIServerELB: infrav1.ClassicELB{
1232+
DNSName: "test-apiserver.us-east-1.aws",
1233+
},
1234+
},
1235+
},
1236+
},
1237+
expect: func(m *mock_ec2iface.MockEC2APIMockRecorder) {
1238+
m.
1239+
DescribeSubnets(&ec2.DescribeSubnetsInput{
1240+
Filters: []*ec2.Filter{
1241+
filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
1242+
filter.EC2.VPC("vpc-id"),
1243+
{Name: aws.String("subnet-id"), Values: aws.StringSlice([]string{"matching-subnet"})},
1244+
},
1245+
}).
1246+
Return(&ec2.DescribeSubnetsOutput{
1247+
Subnets: []*ec2.Subnet{{
1248+
SubnetId: aws.String("matching-subnet"),
1249+
}},
1250+
}, nil)
1251+
m.
1252+
RunInstances(gomock.Any()).
1253+
Return(&ec2.Reservation{
1254+
Instances: []*ec2.Instance{
1255+
{
1256+
State: &ec2.InstanceState{
1257+
Name: aws.String(ec2.InstanceStateNamePending),
1258+
},
1259+
IamInstanceProfile: &ec2.IamInstanceProfile{
1260+
Arn: aws.String("arn:aws:iam::123456789012:instance-profile/foo"),
1261+
},
1262+
InstanceId: aws.String("two"),
1263+
InstanceType: aws.String("m5.large"),
1264+
SubnetId: aws.String("matching-subnet"),
1265+
ImageId: aws.String("ami-1"),
1266+
RootDeviceName: aws.String("device-1"),
1267+
BlockDeviceMappings: []*ec2.InstanceBlockDeviceMapping{
1268+
{
1269+
DeviceName: aws.String("device-1"),
1270+
Ebs: &ec2.EbsInstanceBlockDevice{
1271+
VolumeId: aws.String("volume-1"),
1272+
},
1273+
},
1274+
},
1275+
Placement: &ec2.Placement{
1276+
AvailabilityZone: &az,
1277+
},
1278+
},
1279+
},
1280+
}, nil)
1281+
m.WaitUntilInstanceRunningWithContext(gomock.Any(), gomock.Any(), gomock.Any()).
1282+
Return(nil)
1283+
},
1284+
check: func(instance *infrav1.Instance, err error) {
1285+
if err != nil {
1286+
t.Fatalf("did not expect error: %v", err)
1287+
}
1288+
},
1289+
},
11601290
{
11611291
name: "subnet id and failureDomain don't match",
11621292
machine: clusterv1.Machine{
@@ -1212,9 +1342,23 @@ func TestCreateInstance(t *testing.T) {
12121342
},
12131343
},
12141344
expect: func(m *mock_ec2iface.MockEC2APIMockRecorder) {
1345+
m.
1346+
DescribeSubnets(&ec2.DescribeSubnetsInput{
1347+
Filters: []*ec2.Filter{
1348+
filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
1349+
filter.EC2.VPC("vpc-id"),
1350+
{Name: aws.String("subnet-id"), Values: aws.StringSlice([]string{"subnet-1"})},
1351+
},
1352+
}).
1353+
Return(&ec2.DescribeSubnetsOutput{
1354+
Subnets: []*ec2.Subnet{{
1355+
SubnetId: aws.String("subnet-1"),
1356+
AvailabilityZone: aws.String("us-west-1b"),
1357+
}},
1358+
}, nil)
12151359
},
12161360
check: func(instance *infrav1.Instance, err error) {
1217-
expectedErrMsg := "subnet's availability zone \"us-west-1b\" does not match with the failure domain \"us-east-1b\""
1361+
expectedErrMsg := "failed to run machine \"aws-test1\", found 1 subnets matching criteria but post-filtering failed. subnet \"subnet-1\" availability zone \"us-west-1b\" does not match failure domain \"us-east-1b\""
12181362
if err == nil {
12191363
t.Fatalf("Expected error, but got nil")
12201364
}
@@ -1345,6 +1489,21 @@ func TestCreateInstance(t *testing.T) {
13451489
},
13461490
},
13471491
expect: func(m *mock_ec2iface.MockEC2APIMockRecorder) {
1492+
m.
1493+
DescribeSubnets(&ec2.DescribeSubnetsInput{
1494+
Filters: []*ec2.Filter{
1495+
filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
1496+
filter.EC2.VPC("vpc-id"),
1497+
{Name: aws.String("subnet-id"), Values: aws.StringSlice([]string{"public-subnet-1"})},
1498+
},
1499+
}).
1500+
Return(&ec2.DescribeSubnetsOutput{
1501+
Subnets: []*ec2.Subnet{{
1502+
SubnetId: aws.String("public-subnet-1"),
1503+
AvailabilityZone: aws.String("us-east-1b"),
1504+
MapPublicIpOnLaunch: aws.Bool(true),
1505+
}},
1506+
}, nil)
13481507
m.
13491508
RunInstances(gomock.Any()).
13501509
Return(&ec2.Reservation{
@@ -1439,9 +1598,24 @@ func TestCreateInstance(t *testing.T) {
14391598
},
14401599
},
14411600
expect: func(m *mock_ec2iface.MockEC2APIMockRecorder) {
1601+
m.
1602+
DescribeSubnets(&ec2.DescribeSubnetsInput{
1603+
Filters: []*ec2.Filter{
1604+
filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
1605+
filter.EC2.VPC("vpc-id"),
1606+
{Name: aws.String("subnet-id"), Values: aws.StringSlice([]string{"private-subnet-1"})},
1607+
},
1608+
}).
1609+
Return(&ec2.DescribeSubnetsOutput{
1610+
Subnets: []*ec2.Subnet{{
1611+
SubnetId: aws.String("private-subnet-1"),
1612+
AvailabilityZone: aws.String("us-east-1b"),
1613+
MapPublicIpOnLaunch: aws.Bool(false),
1614+
}},
1615+
}, nil)
14421616
},
14431617
check: func(instance *infrav1.Instance, err error) {
1444-
expectedErrMsg := "failed to run machine \"aws-test1\" with public IP, a specified subnet \"private-subnet-1\" is a private subnet"
1618+
expectedErrMsg := "failed to run machine \"aws-test1\", found 1 subnets matching criteria but post-filtering failed. subnet \"private-subnet-1\" is a private subnet."
14451619
if err == nil {
14461620
t.Fatalf("Expected error, but got nil")
14471621
}
@@ -1520,13 +1694,13 @@ func TestCreateInstance(t *testing.T) {
15201694
Filters: []*ec2.Filter{
15211695
filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
15221696
filter.EC2.VPC("vpc-id"),
1523-
{Name: aws.String("map-public-ip-on-launch"), Values: aws.StringSlice([]string{"true"})},
15241697
{Name: aws.String("tag:some-tag"), Values: aws.StringSlice([]string{"some-value"})},
15251698
},
15261699
}).
15271700
Return(&ec2.DescribeSubnetsOutput{
15281701
Subnets: []*ec2.Subnet{{
1529-
SubnetId: aws.String("filtered-subnet-1"),
1702+
SubnetId: aws.String("filtered-subnet-1"),
1703+
MapPublicIpOnLaunch: aws.Bool(true),
15301704
}},
15311705
}, nil)
15321706
m.

0 commit comments

Comments
 (0)