Skip to content

Commit fb12a8b

Browse files
authored
Merge pull request #2864 from codablock/fix-awsmachinespec-subnet-id
Query AWS to find subnets with explicitely specified subnet IDs
2 parents 655abf1 + 582357a commit fb12a8b

File tree

2 files changed

+216
-40
lines changed

2 files changed

+216
-40
lines changed

pkg/cloud/services/ec2/instances.go

Lines changed: 34 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -265,57 +265,59 @@ func (s *Service) findSubnet(scope *scope.MachineScope) (string, error) {
265265
failureDomain = scope.AWSMachine.Spec.FailureDomain
266266
}
267267

268+
// We basically have 2 sources for subnets:
269+
// 1. If subnet.id or subnet.filters are specified, we directly query AWS
270+
// 2. All other cases use the subnets provided in the cluster network spec without ever calling AWS
271+
268272
switch {
269-
case scope.AWSMachine.Spec.Subnet != nil && scope.AWSMachine.Spec.Subnet.ID != nil:
270-
subnet := s.scope.Subnets().FindByID(*scope.AWSMachine.Spec.Subnet.ID)
271-
if subnet == nil {
272-
errMessage := fmt.Sprintf("failed to run machine %q, subnet with id %q not found",
273-
scope.Name(), aws.StringValue(scope.AWSMachine.Spec.Subnet.ID))
274-
record.Warnf(scope.AWSMachine, "FailedCreate", errMessage)
275-
return "", awserrors.NewFailedDependency(errMessage)
276-
}
277-
if scope.AWSMachine.Spec.PublicIP != nil && *scope.AWSMachine.Spec.PublicIP {
278-
if !subnet.IsPublic {
279-
errMessage := fmt.Sprintf("failed to run machine %q with public IP, a specified subnet %q is a private subnet",
280-
scope.Name(), aws.StringValue(scope.AWSMachine.Spec.Subnet.ID))
281-
record.Eventf(scope.AWSMachine, "FailedCreate", errMessage)
282-
return "", awserrors.NewFailedDependency(errMessage)
283-
}
284-
}
285-
if failureDomain != nil && subnet.AvailabilityZone != *failureDomain {
286-
errMessage := fmt.Sprintf("failed to run machine %q, subnet's availability zone %q does not match with the failure domain %q",
287-
scope.Name(), subnet.AvailabilityZone, *failureDomain)
288-
record.Warnf(scope.AWSMachine, "FailedCreate", errMessage)
289-
return "", awserrors.NewFailedDependency(errMessage)
290-
}
291-
return subnet.ID, nil
292-
case scope.AWSMachine.Spec.Subnet != nil && scope.AWSMachine.Spec.Subnet.Filters != nil:
273+
case scope.AWSMachine.Spec.Subnet != nil && (scope.AWSMachine.Spec.Subnet.ID != nil || scope.AWSMachine.Spec.Subnet.Filters != nil):
293274
criteria := []*ec2.Filter{
294275
filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
295276
}
296277
if !scope.IsExternallyManaged() {
297278
criteria = append(criteria, filter.EC2.VPC(s.scope.VPC().ID))
298279
}
299-
if failureDomain != nil {
300-
criteria = append(criteria, filter.EC2.AvailabilityZone(*failureDomain))
301-
}
302-
if scope.AWSMachine.Spec.PublicIP != nil && *scope.AWSMachine.Spec.PublicIP {
303-
criteria = append(criteria, &ec2.Filter{Name: aws.String("map-public-ip-on-launch"), Values: aws.StringSlice([]string{"true"})})
280+
if scope.AWSMachine.Spec.Subnet.ID != nil {
281+
criteria = append(criteria, &ec2.Filter{Name: aws.String("subnet-id"), Values: aws.StringSlice([]string{*scope.AWSMachine.Spec.Subnet.ID})})
304282
}
305283
for _, f := range scope.AWSMachine.Spec.Subnet.Filters {
306284
criteria = append(criteria, &ec2.Filter{Name: aws.String(f.Name), Values: aws.StringSlice(f.Values)})
307285
}
286+
308287
subnets, err := s.getFilteredSubnets(criteria...)
309288
if err != nil {
310289
return "", errors.Wrapf(err, "failed to filter subnets for criteria %q", criteria)
311290
}
312291
if len(subnets) == 0 {
313-
errMessage := fmt.Sprintf("failed to run machine %q, no subnets available matching filters %q",
314-
scope.Name(), scope.AWSMachine.Spec.Subnet.Filters)
292+
errMessage := fmt.Sprintf("failed to run machine %q, no subnets available matching criteria %q",
293+
scope.Name(), criteria)
294+
record.Warnf(scope.AWSMachine, "FailedCreate", errMessage)
295+
return "", awserrors.NewFailedDependency(errMessage)
296+
}
297+
298+
var filtered []*ec2.Subnet
299+
var errMessage string
300+
for _, subnet := range subnets {
301+
if failureDomain != nil && *subnet.AvailabilityZone != *failureDomain {
302+
// we could have included the failure domain in the query criteria, but then we end up with EC2 error
303+
// messages that don't give a good hint about what is really wrong
304+
errMessage += fmt.Sprintf(" subnet %q availability zone %q does not match failure domain %q.",
305+
*subnet.SubnetId, *subnet.AvailabilityZone, *failureDomain)
306+
continue
307+
}
308+
if scope.AWSMachine.Spec.PublicIP != nil && *scope.AWSMachine.Spec.PublicIP && !*subnet.MapPublicIpOnLaunch {
309+
errMessage += fmt.Sprintf(" subnet %q is a private subnet.", *subnet.SubnetId)
310+
continue
311+
}
312+
filtered = append(filtered, subnet)
313+
}
314+
if len(filtered) == 0 {
315+
errMessage = fmt.Sprintf("failed to run machine %q, found %d subnets matching criteria but post-filtering failed.",
316+
scope.Name(), len(subnets)) + errMessage
315317
record.Warnf(scope.AWSMachine, "FailedCreate", errMessage)
316318
return "", awserrors.NewFailedDependency(errMessage)
317319
}
318-
return *subnets[0].SubnetId, nil
320+
return *filtered[0].SubnetId, nil
319321
case failureDomain != nil:
320322
if scope.AWSMachine.Spec.PublicIP != nil && *scope.AWSMachine.Spec.PublicIP {
321323
subnets := s.scope.Subnets().FilterPublic().FilterByZone(*failureDomain)

pkg/cloud/services/ec2/instances_test.go

Lines changed: 182 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -952,13 +952,13 @@ func TestCreateInstance(t *testing.T) {
952952
Filters: []*ec2.Filter{
953953
filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
954954
filter.EC2.VPC("vpc-id"),
955-
filter.EC2.AvailabilityZone("us-east-1b"),
956955
{Name: aws.String("tag:some-tag"), Values: aws.StringSlice([]string{"some-value"})},
957956
},
958957
}).
959958
Return(&ec2.DescribeSubnetsOutput{
960959
Subnets: []*ec2.Subnet{{
961-
SubnetId: aws.String("filtered-subnet-1"),
960+
SubnetId: aws.String("filtered-subnet-1"),
961+
AvailabilityZone: aws.String("us-east-1b"),
962962
}},
963963
}, nil)
964964
m.
@@ -1053,6 +1053,20 @@ func TestCreateInstance(t *testing.T) {
10531053
},
10541054
},
10551055
expect: func(m *mock_ec2iface.MockEC2APIMockRecorder) {
1056+
m.
1057+
DescribeSubnets(&ec2.DescribeSubnetsInput{
1058+
Filters: []*ec2.Filter{
1059+
filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
1060+
filter.EC2.VPC("vpc-id"),
1061+
{Name: aws.String("subnet-id"), Values: aws.StringSlice([]string{"matching-subnet"})},
1062+
},
1063+
}).
1064+
Return(&ec2.DescribeSubnetsOutput{
1065+
Subnets: []*ec2.Subnet{{
1066+
SubnetId: aws.String("matching-subnet"),
1067+
AvailabilityZone: aws.String("us-east-1b"),
1068+
}},
1069+
}, nil)
10561070
m.
10571071
RunInstances(gomock.Any()).
10581072
Return(&ec2.Reservation{
@@ -1093,7 +1107,7 @@ func TestCreateInstance(t *testing.T) {
10931107
},
10941108
},
10951109
{
1096-
name: "with subnet ID that does not belong to Cluster",
1110+
name: "with subnet ID that does not exist",
10971111
machine: clusterv1.Machine{
10981112
ObjectMeta: metav1.ObjectMeta{
10991113
Labels: map[string]string{"set": "node"},
@@ -1145,9 +1159,20 @@ func TestCreateInstance(t *testing.T) {
11451159
},
11461160
},
11471161
expect: func(m *mock_ec2iface.MockEC2APIMockRecorder) {
1162+
m.
1163+
DescribeSubnets(&ec2.DescribeSubnetsInput{
1164+
Filters: []*ec2.Filter{
1165+
filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
1166+
filter.EC2.VPC("vpc-id"),
1167+
{Name: aws.String("subnet-id"), Values: aws.StringSlice([]string{"non-matching-subnet"})},
1168+
},
1169+
}).
1170+
Return(&ec2.DescribeSubnetsOutput{
1171+
Subnets: []*ec2.Subnet{},
1172+
}, nil)
11481173
},
11491174
check: func(instance *infrav1.Instance, err error) {
1150-
expectedErrMsg := "failed to run machine \"aws-test1\", subnet with id \"non-matching-subnet\" not found"
1175+
expectedErrMsg := "failed to run machine \"aws-test1\", no subnets available matching criteria"
11511176
if err == nil {
11521177
t.Fatalf("Expected error, but got nil")
11531178
}
@@ -1157,6 +1182,111 @@ func TestCreateInstance(t *testing.T) {
11571182
}
11581183
},
11591184
},
1185+
{
1186+
name: "with subnet ID that does not belong to Cluster",
1187+
machine: clusterv1.Machine{
1188+
ObjectMeta: metav1.ObjectMeta{
1189+
Labels: map[string]string{"set": "node"},
1190+
},
1191+
Spec: clusterv1.MachineSpec{
1192+
Bootstrap: clusterv1.Bootstrap{
1193+
DataSecretName: pointer.StringPtr("bootstrap-data"),
1194+
},
1195+
},
1196+
},
1197+
machineConfig: &infrav1.AWSMachineSpec{
1198+
AMI: infrav1.AMIReference{
1199+
ID: aws.String("abc"),
1200+
},
1201+
InstanceType: "m5.large",
1202+
Subnet: &infrav1.AWSResourceReference{
1203+
ID: aws.String("matching-subnet"),
1204+
},
1205+
},
1206+
awsCluster: &infrav1.AWSCluster{
1207+
ObjectMeta: metav1.ObjectMeta{Name: "test"},
1208+
Spec: infrav1.AWSClusterSpec{
1209+
NetworkSpec: infrav1.NetworkSpec{
1210+
VPC: infrav1.VPCSpec{
1211+
ID: "vpc-id",
1212+
},
1213+
Subnets: infrav1.Subnets{{
1214+
ID: "subnet-1",
1215+
}},
1216+
},
1217+
},
1218+
Status: infrav1.AWSClusterStatus{
1219+
Network: infrav1.NetworkStatus{
1220+
SecurityGroups: map[infrav1.SecurityGroupRole]infrav1.SecurityGroup{
1221+
infrav1.SecurityGroupControlPlane: {
1222+
ID: "1",
1223+
},
1224+
infrav1.SecurityGroupNode: {
1225+
ID: "2",
1226+
},
1227+
infrav1.SecurityGroupLB: {
1228+
ID: "3",
1229+
},
1230+
},
1231+
APIServerELB: infrav1.ClassicELB{
1232+
DNSName: "test-apiserver.us-east-1.aws",
1233+
},
1234+
},
1235+
},
1236+
},
1237+
expect: func(m *mock_ec2iface.MockEC2APIMockRecorder) {
1238+
m.
1239+
DescribeSubnets(&ec2.DescribeSubnetsInput{
1240+
Filters: []*ec2.Filter{
1241+
filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
1242+
filter.EC2.VPC("vpc-id"),
1243+
{Name: aws.String("subnet-id"), Values: aws.StringSlice([]string{"matching-subnet"})},
1244+
},
1245+
}).
1246+
Return(&ec2.DescribeSubnetsOutput{
1247+
Subnets: []*ec2.Subnet{{
1248+
SubnetId: aws.String("matching-subnet"),
1249+
}},
1250+
}, nil)
1251+
m.
1252+
RunInstances(gomock.Any()).
1253+
Return(&ec2.Reservation{
1254+
Instances: []*ec2.Instance{
1255+
{
1256+
State: &ec2.InstanceState{
1257+
Name: aws.String(ec2.InstanceStateNamePending),
1258+
},
1259+
IamInstanceProfile: &ec2.IamInstanceProfile{
1260+
Arn: aws.String("arn:aws:iam::123456789012:instance-profile/foo"),
1261+
},
1262+
InstanceId: aws.String("two"),
1263+
InstanceType: aws.String("m5.large"),
1264+
SubnetId: aws.String("matching-subnet"),
1265+
ImageId: aws.String("ami-1"),
1266+
RootDeviceName: aws.String("device-1"),
1267+
BlockDeviceMappings: []*ec2.InstanceBlockDeviceMapping{
1268+
{
1269+
DeviceName: aws.String("device-1"),
1270+
Ebs: &ec2.EbsInstanceBlockDevice{
1271+
VolumeId: aws.String("volume-1"),
1272+
},
1273+
},
1274+
},
1275+
Placement: &ec2.Placement{
1276+
AvailabilityZone: &az,
1277+
},
1278+
},
1279+
},
1280+
}, nil)
1281+
m.WaitUntilInstanceRunningWithContext(gomock.Any(), gomock.Any(), gomock.Any()).
1282+
Return(nil)
1283+
},
1284+
check: func(instance *infrav1.Instance, err error) {
1285+
if err != nil {
1286+
t.Fatalf("did not expect error: %v", err)
1287+
}
1288+
},
1289+
},
11601290
{
11611291
name: "subnet id and failureDomain don't match",
11621292
machine: clusterv1.Machine{
@@ -1212,9 +1342,23 @@ func TestCreateInstance(t *testing.T) {
12121342
},
12131343
},
12141344
expect: func(m *mock_ec2iface.MockEC2APIMockRecorder) {
1345+
m.
1346+
DescribeSubnets(&ec2.DescribeSubnetsInput{
1347+
Filters: []*ec2.Filter{
1348+
filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
1349+
filter.EC2.VPC("vpc-id"),
1350+
{Name: aws.String("subnet-id"), Values: aws.StringSlice([]string{"subnet-1"})},
1351+
},
1352+
}).
1353+
Return(&ec2.DescribeSubnetsOutput{
1354+
Subnets: []*ec2.Subnet{{
1355+
SubnetId: aws.String("subnet-1"),
1356+
AvailabilityZone: aws.String("us-west-1b"),
1357+
}},
1358+
}, nil)
12151359
},
12161360
check: func(instance *infrav1.Instance, err error) {
1217-
expectedErrMsg := "subnet's availability zone \"us-west-1b\" does not match with the failure domain \"us-east-1b\""
1361+
expectedErrMsg := "failed to run machine \"aws-test1\", found 1 subnets matching criteria but post-filtering failed. subnet \"subnet-1\" availability zone \"us-west-1b\" does not match failure domain \"us-east-1b\""
12181362
if err == nil {
12191363
t.Fatalf("Expected error, but got nil")
12201364
}
@@ -1345,6 +1489,21 @@ func TestCreateInstance(t *testing.T) {
13451489
},
13461490
},
13471491
expect: func(m *mock_ec2iface.MockEC2APIMockRecorder) {
1492+
m.
1493+
DescribeSubnets(&ec2.DescribeSubnetsInput{
1494+
Filters: []*ec2.Filter{
1495+
filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
1496+
filter.EC2.VPC("vpc-id"),
1497+
{Name: aws.String("subnet-id"), Values: aws.StringSlice([]string{"public-subnet-1"})},
1498+
},
1499+
}).
1500+
Return(&ec2.DescribeSubnetsOutput{
1501+
Subnets: []*ec2.Subnet{{
1502+
SubnetId: aws.String("public-subnet-1"),
1503+
AvailabilityZone: aws.String("us-east-1b"),
1504+
MapPublicIpOnLaunch: aws.Bool(true),
1505+
}},
1506+
}, nil)
13481507
m.
13491508
RunInstances(gomock.Any()).
13501509
Return(&ec2.Reservation{
@@ -1439,9 +1598,24 @@ func TestCreateInstance(t *testing.T) {
14391598
},
14401599
},
14411600
expect: func(m *mock_ec2iface.MockEC2APIMockRecorder) {
1601+
m.
1602+
DescribeSubnets(&ec2.DescribeSubnetsInput{
1603+
Filters: []*ec2.Filter{
1604+
filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
1605+
filter.EC2.VPC("vpc-id"),
1606+
{Name: aws.String("subnet-id"), Values: aws.StringSlice([]string{"private-subnet-1"})},
1607+
},
1608+
}).
1609+
Return(&ec2.DescribeSubnetsOutput{
1610+
Subnets: []*ec2.Subnet{{
1611+
SubnetId: aws.String("private-subnet-1"),
1612+
AvailabilityZone: aws.String("us-east-1b"),
1613+
MapPublicIpOnLaunch: aws.Bool(false),
1614+
}},
1615+
}, nil)
14421616
},
14431617
check: func(instance *infrav1.Instance, err error) {
1444-
expectedErrMsg := "failed to run machine \"aws-test1\" with public IP, a specified subnet \"private-subnet-1\" is a private subnet"
1618+
expectedErrMsg := "failed to run machine \"aws-test1\", found 1 subnets matching criteria but post-filtering failed. subnet \"private-subnet-1\" is a private subnet."
14451619
if err == nil {
14461620
t.Fatalf("Expected error, but got nil")
14471621
}
@@ -1520,13 +1694,13 @@ func TestCreateInstance(t *testing.T) {
15201694
Filters: []*ec2.Filter{
15211695
filter.EC2.SubnetStates(ec2.SubnetStatePending, ec2.SubnetStateAvailable),
15221696
filter.EC2.VPC("vpc-id"),
1523-
{Name: aws.String("map-public-ip-on-launch"), Values: aws.StringSlice([]string{"true"})},
15241697
{Name: aws.String("tag:some-tag"), Values: aws.StringSlice([]string{"some-value"})},
15251698
},
15261699
}).
15271700
Return(&ec2.DescribeSubnetsOutput{
15281701
Subnets: []*ec2.Subnet{{
1529-
SubnetId: aws.String("filtered-subnet-1"),
1702+
SubnetId: aws.String("filtered-subnet-1"),
1703+
MapPublicIpOnLaunch: aws.Bool(true),
15301704
}},
15311705
}, nil)
15321706
m.

0 commit comments

Comments
 (0)