Skip to content

Commit 9cec957

Browse files
committed
AI first step
1 parent 96fdab6 commit 9cec957

File tree

5 files changed

+142
-36
lines changed

5 files changed

+142
-36
lines changed

packages/aws-library/src/aws_library/ec2/__init__.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
from ._client import SimcoreEC2API
2-
from ._errors import EC2AccessError, EC2NotConnectedError, EC2RuntimeError
2+
from ._errors import (
3+
EC2AccessError,
4+
EC2InsufficientCapacityError,
5+
EC2NotConnectedError,
6+
EC2RuntimeError,
7+
)
38
from ._models import (
49
AWS_TAG_KEY_MAX_LENGTH,
510
AWS_TAG_KEY_MIN_LENGTH,
@@ -16,22 +21,22 @@
1621
)
1722

1823
__all__: tuple[str, ...] = (
19-
"AWSTagKey",
20-
"AWSTagValue",
21-
"AWS_TAG_KEY_MIN_LENGTH",
2224
"AWS_TAG_KEY_MAX_LENGTH",
23-
"AWS_TAG_VALUE_MIN_LENGTH",
25+
"AWS_TAG_KEY_MIN_LENGTH",
2426
"AWS_TAG_VALUE_MAX_LENGTH",
27+
"AWS_TAG_VALUE_MIN_LENGTH",
28+
"AWSTagKey",
29+
"AWSTagValue",
2530
"EC2AccessError",
2631
"EC2InstanceBootSpecific",
2732
"EC2InstanceConfig",
2833
"EC2InstanceData",
2934
"EC2InstanceType",
35+
"EC2InsufficientCapacityError",
3036
"EC2NotConnectedError",
3137
"EC2RuntimeError",
3238
"EC2Tags",
3339
"Resources",
3440
"SimcoreEC2API",
3541
)
36-
3742
# nopycln: file

packages/aws-library/src/aws_library/ec2/_client.py

Lines changed: 74 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@
1616
from types_aiobotocore_ec2.type_defs import FilterTypeDef, TagTypeDef
1717

1818
from ._error_handler import ec2_exception_handler
19-
from ._errors import EC2InstanceNotFoundError, EC2TooManyInstancesError
19+
from ._errors import (
20+
EC2InstanceNotFoundError,
21+
EC2InsufficientCapacityError,
22+
EC2TooManyInstancesError,
23+
)
2024
from ._models import (
2125
AWSTagKey,
2226
EC2InstanceConfig,
@@ -152,33 +156,75 @@ async def launch_instances(
152156
for tag_key, tag_value in instance_config.tags.items()
153157
]
154158

155-
instances = await self.client.run_instances(
156-
ImageId=instance_config.ami_id,
157-
MinCount=min_number_of_instances,
158-
MaxCount=number_of_instances,
159-
IamInstanceProfile=(
160-
{"Arn": instance_config.iam_instance_profile}
161-
if instance_config.iam_instance_profile
162-
else {}
163-
),
164-
InstanceType=instance_config.type.name,
165-
InstanceInitiatedShutdownBehavior="terminate",
166-
KeyName=instance_config.key_name,
167-
TagSpecifications=[
168-
{"ResourceType": "instance", "Tags": resource_tags},
169-
{"ResourceType": "volume", "Tags": resource_tags},
170-
{"ResourceType": "network-interface", "Tags": resource_tags},
171-
],
172-
UserData=compose_user_data(instance_config.startup_script),
173-
NetworkInterfaces=[
174-
{
175-
"AssociatePublicIpAddress": True,
176-
"DeviceIndex": 0,
177-
"SubnetId": instance_config.subnet_ids[0],
178-
"Groups": instance_config.security_group_ids,
179-
}
180-
],
181-
)
159+
# Try each subnet in order until one succeeds
160+
last_error = None
161+
for subnet_id in instance_config.subnet_ids:
162+
try:
163+
_logger.debug(
164+
"Attempting to launch instances in subnet %s", subnet_id
165+
)
166+
instances = await self.client.run_instances(
167+
ImageId=instance_config.ami_id,
168+
MinCount=min_number_of_instances,
169+
MaxCount=number_of_instances,
170+
IamInstanceProfile=(
171+
{"Arn": instance_config.iam_instance_profile}
172+
if instance_config.iam_instance_profile
173+
else {}
174+
),
175+
InstanceType=instance_config.type.name,
176+
InstanceInitiatedShutdownBehavior="terminate",
177+
KeyName=instance_config.key_name,
178+
TagSpecifications=[
179+
{"ResourceType": "instance", "Tags": resource_tags},
180+
{"ResourceType": "volume", "Tags": resource_tags},
181+
{
182+
"ResourceType": "network-interface",
183+
"Tags": resource_tags,
184+
},
185+
],
186+
UserData=compose_user_data(instance_config.startup_script),
187+
NetworkInterfaces=[
188+
{
189+
"AssociatePublicIpAddress": True,
190+
"DeviceIndex": 0,
191+
"SubnetId": subnet_id,
192+
"Groups": instance_config.security_group_ids,
193+
}
194+
],
195+
)
196+
# If we get here, the launch succeeded
197+
break
198+
except botocore.exceptions.ClientError as exc:
199+
error_code = exc.response.get("Error", {}).get("Code")
200+
if error_code == "InsufficientInstanceCapacity":
201+
_logger.warning(
202+
"Insufficient capacity in subnet %s for instance type %s, trying next subnet",
203+
subnet_id,
204+
instance_config.type.name,
205+
)
206+
last_error = EC2InsufficientCapacityError(
207+
subnet_id=subnet_id,
208+
instance_type=instance_config.type.name,
209+
)
210+
continue
211+
# For any other ClientError, re-raise to let the decorator handle it
212+
raise
213+
except Exception:
214+
# For any other error (not AWS-related), fail immediately
215+
raise
216+
else:
217+
# All subnets failed with capacity errors
218+
_logger.error(
219+
"All subnets failed with insufficient capacity for instance type %s",
220+
instance_config.type.name,
221+
)
222+
if last_error:
223+
raise last_error
224+
raise EC2InsufficientCapacityError(
225+
subnet_id="all_configured_subnets",
226+
instance_type=instance_config.type.name,
227+
)
182228
instance_ids = [i["InstanceId"] for i in instances["Instances"]]
183229
_logger.info(
184230
"%s New instances launched: %s, waiting for them to start now...",

packages/aws-library/src/aws_library/ec2/_error_handler.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
EC2AccessError,
1010
EC2InstanceNotFoundError,
1111
EC2InstanceTypeInvalidError,
12+
EC2InsufficientCapacityError,
1213
EC2NotConnectedError,
1314
EC2RuntimeError,
1415
EC2TimeoutError,
@@ -29,13 +30,15 @@
2930
def _map_botocore_client_exception(
3031
botocore_error: botocore_exc.ClientError,
3132
*args, # pylint: disable=unused-argument # noqa: ARG001
32-
**kwargs, # pylint: disable=unused-argument # noqa: ARG001
33+
**kwargs,
3334
) -> EC2AccessError:
3435
status_code = int(
3536
botocore_error.response.get("ResponseMetadata", {}).get("HTTPStatusCode")
3637
or botocore_error.response.get("Error", {}).get("Code", -1)
3738
)
3839
operation_name = botocore_error.operation_name
40+
error_code = botocore_error.response.get("Error", {}).get("Code")
41+
3942
match status_code, operation_name:
4043
case 400, "StartInstances":
4144
return EC2InstanceNotFoundError()
@@ -46,6 +49,12 @@ def _map_botocore_client_exception(
4649
case 400, "DescribeInstanceTypes":
4750
return EC2InstanceTypeInvalidError()
4851
case _:
52+
# Check for specific error codes regardless of HTTP status
53+
if error_code == "InsufficientInstanceCapacity":
54+
return EC2InsufficientCapacityError(
55+
subnet_id=kwargs.get("subnet_id", "unknown"),
56+
instance_type=kwargs.get("instance_type", "unknown"),
57+
)
4958
return EC2AccessError(
5059
operation_name=operation_name,
5160
code=status_code,

packages/aws-library/src/aws_library/ec2/_errors.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,7 @@ class EC2TooManyInstancesError(EC2AccessError):
3636
msg_template: str = (
3737
"The maximum amount of instances {num_instances} is already reached!"
3838
)
39+
40+
41+
class EC2InsufficientCapacityError(EC2AccessError):
42+
msg_template: str = "Insufficient capacity in {subnet_id} for {instance_type}"

packages/aws-library/tests/test_ec2_client.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def ec2_instance_config(
9797
ami_id=aws_ami_id,
9898
key_name=faker.pystr(),
9999
security_group_ids=[aws_security_group_id],
100-
subnet_ids=aws_subnet_id,
100+
subnet_ids=[aws_subnet_id],
101101
iam_instance_profile="",
102102
)
103103

@@ -575,3 +575,45 @@ async def test_remove_instance_tags_not_existing_raises(
575575
await simcore_ec2_api.remove_instances_tags(
576576
[fake_ec2_instance_data()], tag_keys=[]
577577
)
578+
579+
580+
async def test_launch_instances_multi_subnet_fallback(
581+
simcore_ec2_api: SimcoreEC2API,
582+
ec2_client: EC2Client,
583+
fake_ec2_instance_type: EC2InstanceType,
584+
faker: Faker,
585+
aws_subnet_id: str,
586+
aws_security_group_id: str,
587+
aws_ami_id: str,
588+
):
589+
"""Test that launch_instances works with multiple subnet IDs."""
590+
await _assert_no_instances_in_ec2(ec2_client)
591+
592+
# Create a config with multiple subnet IDs, including the valid one
593+
ec2_instance_config = EC2InstanceConfig(
594+
type=fake_ec2_instance_type,
595+
tags=faker.pydict(allowed_types=(str,)),
596+
startup_script=faker.pystr(),
597+
ami_id=aws_ami_id,
598+
key_name=faker.pystr(),
599+
security_group_ids=[aws_security_group_id],
600+
subnet_ids=[aws_subnet_id, "subnet-backup1", "subnet-backup2"],
601+
iam_instance_profile="",
602+
)
603+
604+
# This should succeed using the first (valid) subnet
605+
await simcore_ec2_api.launch_instances(
606+
ec2_instance_config,
607+
min_number_of_instances=1,
608+
number_of_instances=1,
609+
)
610+
611+
# Verify that the instance was created
612+
await _assert_instances_in_ec2(
613+
ec2_client,
614+
expected_num_reservations=1,
615+
expected_num_instances=1,
616+
expected_instance_type=ec2_instance_config.type,
617+
expected_tags=ec2_instance_config.tags,
618+
expected_state="running",
619+
)

0 commit comments

Comments
 (0)