1313from  settings_library .ec2  import  EC2Settings 
1414from  types_aiobotocore_ec2  import  EC2Client 
1515from  types_aiobotocore_ec2 .literals  import  InstanceStateNameType , InstanceTypeType 
16- from  types_aiobotocore_ec2 .type_defs  import  FilterTypeDef , TagTypeDef 
16+ from  types_aiobotocore_ec2 .type_defs  import  (
17+     FilterTypeDef ,
18+     TagTypeDef ,
19+ )
1720
1821from  ._error_handler  import  ec2_exception_handler 
19- from  ._errors  import  EC2InstanceNotFoundError , EC2TooManyInstancesError 
22+ from  ._errors  import  (
23+     EC2InstanceNotFoundError ,
24+     EC2InsufficientCapacityError ,
25+     EC2SubnetsNotEnoughIPsError ,
26+ )
2027from  ._models  import  (
2128    AWSTagKey ,
2229    EC2InstanceConfig ,
2532    EC2Tags ,
2633    Resources ,
2734)
28- from  ._utils  import  compose_user_data , ec2_instance_data_from_aws_instance 
35+ from  ._utils  import  (
36+     check_max_number_of_instances_not_exceeded ,
37+     compose_user_data ,
38+     ec2_instance_data_from_aws_instance ,
39+     get_subnet_azs ,
40+     get_subnet_capacity ,
41+ )
2942
3043_logger  =  logging .getLogger (__name__ )
3144
@@ -92,6 +105,11 @@ async def get_ec2_instance_capabilities(
92105        list_instances : list [EC2InstanceType ] =  []
93106        for  instance  in  instance_types .get ("InstanceTypes" , []):
94107            with  contextlib .suppress (KeyError ):
108+                 assert  "InstanceType"  in  instance   # nosec 
109+                 assert  "VCpuInfo"  in  instance   # nosec 
110+                 assert  "DefaultVCpus"  in  instance ["VCpuInfo" ]  # nosec 
111+                 assert  "MemoryInfo"  in  instance   # nosec 
112+                 assert  "SizeInMiB"  in  instance ["MemoryInfo" ]  # nosec 
95113                list_instances .append (
96114                    EC2InstanceType (
97115                        name = instance ["InstanceType" ],
@@ -118,94 +136,145 @@ async def launch_instances(
118136
119137        Arguments: 
120138            instance_config -- The EC2 instance configuration 
121-             min_number_of_instances -- the minimal number of instances needed  (fails if this amount cannot be reached) 
139+             min_number_of_instances -- the minimal number of instances required  (fails if this amount cannot be reached) 
122140            number_of_instances -- the ideal number of instances needed (it it cannot be reached AWS will return a number >=min_number_of_instances) 
123- 
124-         Keyword Arguments: 
125-             max_total_number_of_instances -- The total maximum allowed number of instances for this given instance_config (default: {10}) 
141+             max_total_number_of_instances -- The total maximum allowed number of instances for this given instance_config 
126142
127143        Raises: 
128-             EC2TooManyInstancesError: 
144+             EC2TooManyInstancesError: max_total_number_of_instances would be exceeded 
145+             EC2SubnetsNotEnoughIPsError: not enough IPs in the subnets 
146+             EC2InsufficientCapacityError: not enough capacity in the subnets 
147+ 
129148
130149        Returns: 
131150            The created instance data infos 
132151        """ 
152+ 
133153        with  log_context (
134154            _logger ,
135155            logging .INFO ,
136-             msg = f"launch { number_of_instances }   AWS instance(s) { instance_config .type .name }   with { instance_config .tags = }  " ,
156+             msg = f"launch { number_of_instances }   AWS instance(s) { instance_config .type .name }  " 
157+             f" with { instance_config .tags = }   in { instance_config .subnet_ids = }  " ,
137158        ):
138159            # first check the max amount is not already reached 
139-             current_instances  =  await  self .get_instances (
140-                 key_names = [instance_config .key_name ], tags = instance_config .tags 
160+             await  check_max_number_of_instances_not_exceeded (
161+                 self ,
162+                 instance_config ,
163+                 required_number_instances = number_of_instances ,
164+                 max_total_number_of_instances = max_total_number_of_instances ,
141165            )
142-             if  (
143-                 len (current_instances ) +  number_of_instances 
144-                 >  max_total_number_of_instances 
145-             ):
146-                 raise  EC2TooManyInstancesError (
147-                     num_instances = max_total_number_of_instances 
166+ 
167+             # NOTE: checking subnets capacity is not strictly needed as AWS will do it for us 
168+             # but it gives us a chance to give early feedback to the user 
169+             # and avoid trying to launch instances in subnets that are already full 
170+             # and also allows to circumvent a moto bug that does not raise 
171+             # InsufficientInstanceCapacity when a subnet is full 
172+             subnet_id_to_available_ips  =  await  get_subnet_capacity (
173+                 self .client , subnet_ids = instance_config .subnet_ids 
174+             )
175+ 
176+             total_available_ips  =  sum (subnet_id_to_available_ips .values ())
177+             if  total_available_ips  <  min_number_of_instances :
178+                 raise  EC2SubnetsNotEnoughIPsError (
179+                     subnet_ids = instance_config .subnet_ids ,
180+                     instance_type = instance_config .type .name ,
181+                     available_ips = total_available_ips ,
148182                )
149183
184+             # now let's not try to run instances in subnets that have not enough IPs 
185+             subnet_ids_with_capacity  =  [
186+                 subnet_id 
187+                 for  subnet_id , capacity  in  subnet_id_to_available_ips .items ()
188+                 if  capacity  >=  min_number_of_instances 
189+             ]
190+ 
150191            resource_tags : list [TagTypeDef ] =  [
151192                {"Key" : tag_key , "Value" : tag_value }
152193                for  tag_key , tag_value  in  instance_config .tags .items ()
153194            ]
154195
155-             instances  =  await  self .client .run_instances (
156-                 ImageId = instance_config .ami_id ,
157-                 MinCount = min_number_of_instances ,
158-                 MaxCount = number_of_instances ,
159-                 IamInstanceProfile = (
160-                     {"Arn" : instance_config .iam_instance_profile }
161-                     if  instance_config .iam_instance_profile 
162-                     else  {}
163-                 ),
164-                 InstanceType = instance_config .type .name ,
165-                 InstanceInitiatedShutdownBehavior = "terminate" ,
166-                 KeyName = instance_config .key_name ,
167-                 TagSpecifications = [
168-                     {"ResourceType" : "instance" , "Tags" : resource_tags },
169-                     {"ResourceType" : "volume" , "Tags" : resource_tags },
170-                     {"ResourceType" : "network-interface" , "Tags" : resource_tags },
171-                 ],
172-                 UserData = compose_user_data (instance_config .startup_script ),
173-                 NetworkInterfaces = [
174-                     {
175-                         "AssociatePublicIpAddress" : True ,
176-                         "DeviceIndex" : 0 ,
177-                         "SubnetId" : instance_config .subnet_id ,
178-                         "Groups" : instance_config .security_group_ids ,
179-                     }
180-                 ],
181-             )
182-             instance_ids  =  [i ["InstanceId" ] for  i  in  instances ["Instances" ]]
183-             _logger .info (
184-                 "%s New instances launched: %s, waiting for them to start now..." ,
185-                 len (instance_ids ),
186-                 instance_ids ,
187-             )
196+             # Try each subnet in order until one succeeds 
197+             for  subnet_id  in  subnet_ids_with_capacity :
198+                 try :
199+                     _logger .debug (
200+                         "Attempting to launch instances in subnet %s" , subnet_id 
201+                     )
188202
189-             # wait for the instance to be in a pending state 
190-             # NOTE: reference to EC2 states https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-lifecycle.html 
191-             waiter  =  self .client .get_waiter ("instance_exists" )
192-             await  waiter .wait (InstanceIds = instance_ids )
193-             _logger .debug ("instances %s exists now." , instance_ids )
203+                     instances  =  await  self .client .run_instances (
204+                         ImageId = instance_config .ami_id ,
205+                         MinCount = min_number_of_instances ,
206+                         MaxCount = number_of_instances ,
207+                         IamInstanceProfile = (
208+                             {"Arn" : instance_config .iam_instance_profile }
209+                             if  instance_config .iam_instance_profile 
210+                             else  {}
211+                         ),
212+                         InstanceType = instance_config .type .name ,
213+                         InstanceInitiatedShutdownBehavior = "terminate" ,
214+                         KeyName = instance_config .key_name ,
215+                         TagSpecifications = [
216+                             {"ResourceType" : "instance" , "Tags" : resource_tags },
217+                             {"ResourceType" : "volume" , "Tags" : resource_tags },
218+                             {
219+                                 "ResourceType" : "network-interface" ,
220+                                 "Tags" : resource_tags ,
221+                             },
222+                         ],
223+                         UserData = compose_user_data (instance_config .startup_script ),
224+                         NetworkInterfaces = [
225+                             {
226+                                 "AssociatePublicIpAddress" : True ,
227+                                 "DeviceIndex" : 0 ,
228+                                 "SubnetId" : subnet_id ,
229+                                 "Groups" : instance_config .security_group_ids ,
230+                             }
231+                         ],
232+                     )
233+                     # If we get here, the launch succeeded 
234+                     break 
235+                 except  botocore .exceptions .ClientError  as  exc :
236+                     error_code  =  exc .response .get ("Error" , {}).get ("Code" )
237+                     if  error_code  ==  "InsufficientInstanceCapacity" :
238+                         _logger .warning (
239+                             "Insufficient capacity in subnet %s for instance type %s, trying next subnet" ,
240+                             subnet_id ,
241+                             instance_config .type .name ,
242+                         )
243+                         continue 
244+                     # For any other ClientError, re-raise to let the decorator handle it 
245+                     raise 
246+ 
247+             else :
248+                 subnet_zones  =  await  get_subnet_azs (
249+                     self .client , subnet_ids = subnet_ids_with_capacity 
250+                 )
251+                 raise  EC2InsufficientCapacityError (
252+                     availability_zones = subnet_zones ,
253+                     instance_type = instance_config .type .name ,
254+                 )
255+             instance_ids  =  [
256+                 i ["InstanceId" ]  # pyright: ignore[reportTypedDictNotRequiredAccess] 
257+                 for  i  in  instances ["Instances" ]
258+             ]
259+             with  log_context (
260+                 _logger ,
261+                 logging .INFO ,
262+                 msg = f"{ len (instance_ids )}   instances: { instance_ids = }   launched. Wait to reach pending state" ,
263+             ):
264+                 # wait for the instance to be in a pending state 
265+                 # NOTE: reference to EC2 states https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-lifecycle.html 
266+                 waiter  =  self .client .get_waiter ("instance_exists" )
267+                 await  waiter .wait (InstanceIds = instance_ids )
194268
195-             # NOTE: waiting for pending ensure  we get all the IPs back 
269+             # NOTE: waiting for pending ensures  we get all the IPs back 
196270            described_instances  =  await  self .client .describe_instances (
197271                InstanceIds = instance_ids 
198272            )
199273            assert  "Instances"  in  described_instances ["Reservations" ][0 ]  # nosec 
200-             instance_datas   =  [
274+             return  [
201275                await  ec2_instance_data_from_aws_instance (self , i )
202276                for  i  in  described_instances ["Reservations" ][0 ]["Instances" ]
203277            ]
204-             _logger .info (
205-                 "%s are pending now" ,
206-                 f"{ instance_ids = }  " ,
207-             )
208-             return  instance_datas 
209278
210279    @ec2_exception_handler (_logger ) 
211280    async  def  get_instances (
0 commit comments