Skip to content

Commit 2fe3409

Browse files
committed
renamed function and properly catch error
1 parent bbfa71a commit 2fe3409

File tree

1 file changed

+17
-6
lines changed

1 file changed

+17
-6
lines changed

services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
EC2Tags,
1616
Resources,
1717
)
18-
from aws_library.ec2._errors import EC2TooManyInstancesError
18+
from aws_library.ec2._errors import EC2AccessError, EC2TooManyInstancesError
1919
from fastapi import FastAPI
2020
from models_library.generated_models.docker_rest_api import Node
2121
from models_library.rabbitmq_messages import ProgressType
@@ -421,7 +421,7 @@ async def _activate_drained_nodes(
421421
)
422422

423423

424-
async def _start_warm_buffer_instances(
424+
async def _try_start_warm_buffer_instances(
425425
app: FastAPI, cluster: Cluster, auto_scaling_mode: AutoscalingProvider
426426
) -> Cluster:
427427
"""starts warm buffer if there are assigned tasks, or if a hot buffer of the same type is needed"""
@@ -471,9 +471,20 @@ async def _start_warm_buffer_instances(
471471
with log_context(
472472
_logger, logging.INFO, f"start {len(instances_to_start)} warm buffer machines"
473473
):
474-
started_instances = await get_ec2_client(app).start_instances(
475-
instances_to_start
476-
)
474+
try:
475+
started_instances = await get_ec2_client(app).start_instances(
476+
instances_to_start
477+
)
478+
except EC2AccessError:
479+
_logger.warning(
480+
"Could not start warm buffer instances! "
481+
"TIP: This can happen in case of Insufficient "
482+
"Capacity on AWS AZ(s) where the warm buffers were created. "
483+
"Scaling up will be achieved via launching new EC2 instances instead.",
484+
exc_info=True,
485+
)
486+
# we need to re-assign the tasks assigned to the warm buffer instances
487+
return cluster
477488
# NOTE: first start the instance and then set the tags in case the instance cannot start (e.g. InsufficientInstanceCapacity)
478489
await get_ec2_client(app).set_instances_tags(
479490
started_instances,
@@ -1231,7 +1242,7 @@ async def _autoscale_cluster(
12311242
cluster = await _activate_drained_nodes(app, cluster)
12321243

12331244
# 3. start warm buffer instances to cover the remaining tasks
1234-
cluster = await _start_warm_buffer_instances(app, cluster, auto_scaling_mode)
1245+
cluster = await _try_start_warm_buffer_instances(app, cluster, auto_scaling_mode)
12351246

12361247
# 4. scale down unused instances
12371248
cluster = await _scale_down_unused_cluster_instances(

0 commit comments

Comments
 (0)