1313from settings_library .ec2 import EC2Settings
1414from types_aiobotocore_ec2 import EC2Client
1515from types_aiobotocore_ec2 .literals import InstanceStateNameType , InstanceTypeType
16- from types_aiobotocore_ec2 .type_defs import FilterTypeDef , TagTypeDef
16+ from types_aiobotocore_ec2 .type_defs import (
17+ FilterTypeDef ,
18+ TagTypeDef ,
19+ )
1720
1821from ._error_handler import ec2_exception_handler
19- from ._errors import EC2InstanceNotFoundError , EC2TooManyInstancesError
22+ from ._errors import (
23+ EC2InstanceNotFoundError ,
24+ EC2InsufficientCapacityError ,
25+ EC2SubnetsNotEnoughIPsError ,
26+ )
2027from ._models import (
2128 AWSTagKey ,
2229 EC2InstanceConfig ,
2532 EC2Tags ,
2633 Resources ,
2734)
28- from ._utils import compose_user_data , ec2_instance_data_from_aws_instance
35+ from ._utils import (
36+ check_max_number_of_instances_not_exceeded ,
37+ compose_user_data ,
38+ ec2_instance_data_from_aws_instance ,
39+ get_subnet_azs ,
40+ get_subnet_capacity ,
41+ )
2942
3043_logger = logging .getLogger (__name__ )
3144
@@ -92,6 +105,11 @@ async def get_ec2_instance_capabilities(
92105 list_instances : list [EC2InstanceType ] = []
93106 for instance in instance_types .get ("InstanceTypes" , []):
94107 with contextlib .suppress (KeyError ):
108+ assert "InstanceType" in instance # nosec
109+ assert "VCpuInfo" in instance # nosec
110+ assert "DefaultVCpus" in instance ["VCpuInfo" ] # nosec
111+ assert "MemoryInfo" in instance # nosec
112+ assert "SizeInMiB" in instance ["MemoryInfo" ] # nosec
95113 list_instances .append (
96114 EC2InstanceType (
97115 name = instance ["InstanceType" ],
@@ -118,94 +136,145 @@ async def launch_instances(
118136
119137 Arguments:
120138 instance_config -- The EC2 instance configuration
121- min_number_of_instances -- the minimal number of instances needed (fails if this amount cannot be reached)
139+ min_number_of_instances -- the minimal number of instances required (fails if this amount cannot be reached)
122140 number_of_instances -- the ideal number of instances needed (it it cannot be reached AWS will return a number >=min_number_of_instances)
123-
124- Keyword Arguments:
125- max_total_number_of_instances -- The total maximum allowed number of instances for this given instance_config (default: {10})
141+ max_total_number_of_instances -- The total maximum allowed number of instances for this given instance_config
126142
127143 Raises:
128- EC2TooManyInstancesError:
144+ EC2TooManyInstancesError: max_total_number_of_instances would be exceeded
145+ EC2SubnetsNotEnoughIPsError: not enough IPs in the subnets
146+ EC2InsufficientCapacityError: not enough capacity in the subnets
147+
129148
130149 Returns:
131150 The created instance data infos
132151 """
152+
133153 with log_context (
134154 _logger ,
135155 logging .INFO ,
136- msg = f"launch { number_of_instances } AWS instance(s) { instance_config .type .name } with { instance_config .tags = } " ,
156+ msg = f"launch { number_of_instances } AWS instance(s) { instance_config .type .name } "
157+ f" with { instance_config .tags = } in { instance_config .subnet_ids = } " ,
137158 ):
138159 # first check the max amount is not already reached
139- current_instances = await self .get_instances (
140- key_names = [instance_config .key_name ], tags = instance_config .tags
160+ await check_max_number_of_instances_not_exceeded (
161+ self ,
162+ instance_config ,
163+ required_number_instances = number_of_instances ,
164+ max_total_number_of_instances = max_total_number_of_instances ,
141165 )
142- if (
143- len (current_instances ) + number_of_instances
144- > max_total_number_of_instances
145- ):
146- raise EC2TooManyInstancesError (
147- num_instances = max_total_number_of_instances
166+
167+ # NOTE: checking subnets capacity is not strictly needed as AWS will do it for us
168+ # but it gives us a chance to give early feedback to the user
169+ # and avoid trying to launch instances in subnets that are already full
170+ # and also allows to circumvent a moto bug that does not raise
171+ # InsufficientInstanceCapacity when a subnet is full
172+ subnet_id_to_available_ips = await get_subnet_capacity (
173+ self .client , subnet_ids = instance_config .subnet_ids
174+ )
175+
176+ total_available_ips = sum (subnet_id_to_available_ips .values ())
177+ if total_available_ips < min_number_of_instances :
178+ raise EC2SubnetsNotEnoughIPsError (
179+ subnet_ids = instance_config .subnet_ids ,
180+ instance_type = instance_config .type .name ,
181+ available_ips = total_available_ips ,
148182 )
149183
184+ # now let's not try to run instances in subnets that have not enough IPs
185+ subnet_ids_with_capacity = [
186+ subnet_id
187+ for subnet_id , capacity in subnet_id_to_available_ips .items ()
188+ if capacity >= min_number_of_instances
189+ ]
190+
150191 resource_tags : list [TagTypeDef ] = [
151192 {"Key" : tag_key , "Value" : tag_value }
152193 for tag_key , tag_value in instance_config .tags .items ()
153194 ]
154195
155- instances = await self .client .run_instances (
156- ImageId = instance_config .ami_id ,
157- MinCount = min_number_of_instances ,
158- MaxCount = number_of_instances ,
159- IamInstanceProfile = (
160- {"Arn" : instance_config .iam_instance_profile }
161- if instance_config .iam_instance_profile
162- else {}
163- ),
164- InstanceType = instance_config .type .name ,
165- InstanceInitiatedShutdownBehavior = "terminate" ,
166- KeyName = instance_config .key_name ,
167- TagSpecifications = [
168- {"ResourceType" : "instance" , "Tags" : resource_tags },
169- {"ResourceType" : "volume" , "Tags" : resource_tags },
170- {"ResourceType" : "network-interface" , "Tags" : resource_tags },
171- ],
172- UserData = compose_user_data (instance_config .startup_script ),
173- NetworkInterfaces = [
174- {
175- "AssociatePublicIpAddress" : True ,
176- "DeviceIndex" : 0 ,
177- "SubnetId" : instance_config .subnet_id ,
178- "Groups" : instance_config .security_group_ids ,
179- }
180- ],
181- )
182- instance_ids = [i ["InstanceId" ] for i in instances ["Instances" ]]
183- _logger .info (
184- "%s New instances launched: %s, waiting for them to start now..." ,
185- len (instance_ids ),
186- instance_ids ,
187- )
196+ # Try each subnet in order until one succeeds
197+ for subnet_id in subnet_ids_with_capacity :
198+ try :
199+ _logger .debug (
200+ "Attempting to launch instances in subnet %s" , subnet_id
201+ )
188202
189- # wait for the instance to be in a pending state
190- # NOTE: reference to EC2 states https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-lifecycle.html
191- waiter = self .client .get_waiter ("instance_exists" )
192- await waiter .wait (InstanceIds = instance_ids )
193- _logger .debug ("instances %s exists now." , instance_ids )
203+ instances = await self .client .run_instances (
204+ ImageId = instance_config .ami_id ,
205+ MinCount = min_number_of_instances ,
206+ MaxCount = number_of_instances ,
207+ IamInstanceProfile = (
208+ {"Arn" : instance_config .iam_instance_profile }
209+ if instance_config .iam_instance_profile
210+ else {}
211+ ),
212+ InstanceType = instance_config .type .name ,
213+ InstanceInitiatedShutdownBehavior = "terminate" ,
214+ KeyName = instance_config .key_name ,
215+ TagSpecifications = [
216+ {"ResourceType" : "instance" , "Tags" : resource_tags },
217+ {"ResourceType" : "volume" , "Tags" : resource_tags },
218+ {
219+ "ResourceType" : "network-interface" ,
220+ "Tags" : resource_tags ,
221+ },
222+ ],
223+ UserData = compose_user_data (instance_config .startup_script ),
224+ NetworkInterfaces = [
225+ {
226+ "AssociatePublicIpAddress" : True ,
227+ "DeviceIndex" : 0 ,
228+ "SubnetId" : subnet_id ,
229+ "Groups" : instance_config .security_group_ids ,
230+ }
231+ ],
232+ )
233+ # If we get here, the launch succeeded
234+ break
235+ except botocore .exceptions .ClientError as exc :
236+ error_code = exc .response .get ("Error" , {}).get ("Code" )
237+ if error_code == "InsufficientInstanceCapacity" :
238+ _logger .warning (
239+ "Insufficient capacity in subnet %s for instance type %s, trying next subnet" ,
240+ subnet_id ,
241+ instance_config .type .name ,
242+ )
243+ continue
244+ # For any other ClientError, re-raise to let the decorator handle it
245+ raise
246+
247+ else :
248+ subnet_zones = await get_subnet_azs (
249+ self .client , subnet_ids = subnet_ids_with_capacity
250+ )
251+ raise EC2InsufficientCapacityError (
252+ availability_zones = subnet_zones ,
253+ instance_type = instance_config .type .name ,
254+ )
255+ instance_ids = [
256+ i ["InstanceId" ] # pyright: ignore[reportTypedDictNotRequiredAccess]
257+ for i in instances ["Instances" ]
258+ ]
259+ with log_context (
260+ _logger ,
261+ logging .INFO ,
262+ msg = f"{ len (instance_ids )} instances: { instance_ids = } launched. Wait to reach pending state" ,
263+ ):
264+ # wait for the instance to be in a pending state
265+ # NOTE: reference to EC2 states https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-lifecycle.html
266+ waiter = self .client .get_waiter ("instance_exists" )
267+ await waiter .wait (InstanceIds = instance_ids )
194268
195- # NOTE: waiting for pending ensure we get all the IPs back
269+ # NOTE: waiting for pending ensures we get all the IPs back
196270 described_instances = await self .client .describe_instances (
197271 InstanceIds = instance_ids
198272 )
199273 assert "Instances" in described_instances ["Reservations" ][0 ] # nosec
200- instance_datas = [
274+ return [
201275 await ec2_instance_data_from_aws_instance (self , i )
202276 for i in described_instances ["Reservations" ][0 ]["Instances" ]
203277 ]
204- _logger .info (
205- "%s are pending now" ,
206- f"{ instance_ids = } " ,
207- )
208- return instance_datas
209278
210279 @ec2_exception_handler (_logger )
211280 async def get_instances (
0 commit comments