| 
15 | 15 |     EC2Tags,  | 
16 | 16 |     Resources,  | 
17 | 17 | )  | 
18 |  | -from aws_library.ec2._errors import EC2TooManyInstancesError  | 
 | 18 | +from aws_library.ec2._errors import EC2AccessError, EC2TooManyInstancesError  | 
19 | 19 | from fastapi import FastAPI  | 
20 | 20 | from models_library.generated_models.docker_rest_api import Node  | 
21 | 21 | from models_library.rabbitmq_messages import ProgressType  | 
@@ -421,7 +421,7 @@ async def _activate_drained_nodes(  | 
421 | 421 |     )  | 
422 | 422 | 
 
  | 
423 | 423 | 
 
  | 
424 |  | -async def _start_warm_buffer_instances(  | 
 | 424 | +async def _try_start_warm_buffer_instances(  | 
425 | 425 |     app: FastAPI, cluster: Cluster, auto_scaling_mode: AutoscalingProvider  | 
426 | 426 | ) -> Cluster:  | 
427 | 427 |     """starts warm buffer if there are assigned tasks, or if a hot buffer of the same type is needed"""  | 
@@ -471,9 +471,20 @@ async def _start_warm_buffer_instances(  | 
471 | 471 |     with log_context(  | 
472 | 472 |         _logger, logging.INFO, f"start {len(instances_to_start)} warm buffer machines"  | 
473 | 473 |     ):  | 
474 |  | -        started_instances = await get_ec2_client(app).start_instances(  | 
475 |  | -            instances_to_start  | 
476 |  | -        )  | 
 | 474 | +        try:  | 
 | 475 | +            started_instances = await get_ec2_client(app).start_instances(  | 
 | 476 | +                instances_to_start  | 
 | 477 | +            )  | 
 | 478 | +        except EC2AccessError:  | 
 | 479 | +            _logger.warning(  | 
 | 480 | +                "Could not start warm buffer instances! "  | 
 | 481 | +                "TIP: This can happen in case of Insufficient "  | 
 | 482 | +                "Capacity on AWS AZ(s) where the warm buffers were created. "  | 
 | 483 | +                "Scaling up will be achieved via launching new EC2 instances instead.",  | 
 | 484 | +                exc_info=True,  | 
 | 485 | +            )  | 
 | 486 | +            # we need to re-assign the tasks assigned to the warm buffer instances  | 
 | 487 | +            return cluster  | 
477 | 488 |         # NOTE: first start the instance and then set the tags in case the instance cannot start (e.g. InsufficientInstanceCapacity)  | 
478 | 489 |         await get_ec2_client(app).set_instances_tags(  | 
479 | 490 |             started_instances,  | 
@@ -1231,7 +1242,7 @@ async def _autoscale_cluster(  | 
1231 | 1242 |     cluster = await _activate_drained_nodes(app, cluster)  | 
1232 | 1243 | 
 
  | 
1233 | 1244 |     # 3. start warm buffer instances to cover the remaining tasks  | 
1234 |  | -    cluster = await _start_warm_buffer_instances(app, cluster, auto_scaling_mode)  | 
 | 1245 | +    cluster = await _try_start_warm_buffer_instances(app, cluster, auto_scaling_mode)  | 
1235 | 1246 | 
 
  | 
1236 | 1247 |     # 4. scale down unused instances  | 
1237 | 1248 |     cluster = await _scale_down_unused_cluster_instances(  | 
 | 
0 commit comments