1111 EC2InstanceConfig ,
1212 EC2InstanceData ,
1313 EC2InstanceType ,
14+ EC2Tags ,
1415 Resources ,
1516)
1617from fastapi import FastAPI
@@ -441,6 +442,73 @@ async def _find_needed_instances(
441442 return num_instances_per_type
442443
443444
445+ async def _cap_needed_instances (
446+ app : FastAPI , needed_instances : dict [EC2InstanceType , int ], ec2_tags : EC2Tags
447+ ) -> dict [EC2InstanceType , int ]:
448+ """caps the needed instances dict[EC2InstanceType, int] to the maximal allowed number of instances by
449+ 1. limiting to 1 per asked type
450+ 2. increasing each by 1 until the maximum allowed number of instances is reached
451+ NOTE: the maximum allowed number of instances contains the current number of running/pending machines
452+
453+ Raises:
454+ Ec2TooManyInstancesError: raised when the maximum of machines is already running/pending
455+ """
456+ ec2_client = get_ec2_client (app )
457+ app_settings = get_application_settings (app )
458+ assert app_settings .AUTOSCALING_EC2_INSTANCES # nosec
459+ current_instances = await ec2_client .get_instances (
460+ key_names = [app_settings .AUTOSCALING_EC2_INSTANCES .EC2_INSTANCES_KEY_NAME ],
461+ tags = ec2_tags ,
462+ )
463+ current_number_of_instances = len (current_instances )
464+ if (
465+ current_number_of_instances
466+ >= app_settings .AUTOSCALING_EC2_INSTANCES .EC2_INSTANCES_MAX_INSTANCES
467+ ):
468+ # ok that is already too much
469+ raise Ec2TooManyInstancesError (
470+ num_instances = app_settings .AUTOSCALING_EC2_INSTANCES .EC2_INSTANCES_MAX_INSTANCES
471+ )
472+
473+ total_number_of_needed_instances = sum (needed_instances .values ())
474+ if (
475+ current_number_of_instances + total_number_of_needed_instances
476+ <= app_settings .AUTOSCALING_EC2_INSTANCES .EC2_INSTANCES_MAX_INSTANCES
477+ ):
478+ # ok that fits no need to do anything here
479+ return needed_instances
480+
481+ # this is asking for too many, so let's cap them
482+ max_number_of_creatable_instances = (
483+ app_settings .AUTOSCALING_EC2_INSTANCES .EC2_INSTANCES_MAX_INSTANCES
484+ - current_number_of_instances
485+ )
486+
487+ # we start with 1 machine of each type until the max
488+ capped_needed_instances = {
489+ k : 1
490+ for count , k in enumerate (needed_instances )
491+ if (count + 1 ) <= max_number_of_creatable_instances
492+ }
493+
494+ if len (capped_needed_instances ) < len (needed_instances ):
495+ # there were too many types for the number of possible instances
496+ return capped_needed_instances
497+
498+ # all instance types were added, now create more of them if possible
499+ while sum (capped_needed_instances .values ()) < max_number_of_creatable_instances :
500+ for instance_type , num_to_create in needed_instances .items ():
501+ if (
502+ sum (capped_needed_instances .values ())
503+ == max_number_of_creatable_instances
504+ ):
505+ break
506+ if num_to_create > capped_needed_instances [instance_type ]:
507+ capped_needed_instances [instance_type ] += 1
508+
509+ return capped_needed_instances
510+
511+
444512async def _start_instances (
445513 app : FastAPI ,
446514 needed_instances : dict [EC2InstanceType , int ],
@@ -450,14 +518,28 @@ async def _start_instances(
450518 ec2_client = get_ec2_client (app )
451519 app_settings = get_application_settings (app )
452520 assert app_settings .AUTOSCALING_EC2_INSTANCES # nosec
521+ new_instance_tags = auto_scaling_mode .get_ec2_tags (app )
522+ capped_needed_machines = {}
523+ try :
524+ capped_needed_machines = await _cap_needed_instances (
525+ app , needed_instances , new_instance_tags
526+ )
527+ except Ec2TooManyInstancesError :
528+ await auto_scaling_mode .log_message_from_tasks (
529+ app ,
530+ tasks ,
531+ "The maximum number of machines in the cluster was reached. Please wait for your running jobs "
532+ "to complete and try again later or contact osparc support if this issue does not resolve." ,
533+ level = logging .ERROR ,
534+ )
535+ return []
453536
454- instance_tags = auto_scaling_mode .get_ec2_tags (app )
455537 results = await asyncio .gather (
456538 * [
457539 ec2_client .start_aws_instance (
458540 EC2InstanceConfig (
459541 type = instance_type ,
460- tags = instance_tags ,
542+ tags = new_instance_tags ,
461543 startup_script = await ec2_startup_script (
462544 app_settings .AUTOSCALING_EC2_INSTANCES .EC2_INSTANCES_ALLOWED_TYPES [
463545 instance_type .name
@@ -474,7 +556,7 @@ async def _start_instances(
474556 number_of_instances = instance_num ,
475557 max_number_of_instances = app_settings .AUTOSCALING_EC2_INSTANCES .EC2_INSTANCES_MAX_INSTANCES ,
476558 )
477- for instance_type , instance_num in needed_instances .items ()
559+ for instance_type , instance_num in capped_needed_machines .items ()
478560 ],
479561 return_exceptions = True ,
480562 )
@@ -497,7 +579,10 @@ async def _start_instances(
497579 else :
498580 new_pending_instances .append (r )
499581
500- log_message = f"{ sum (n for n in needed_instances .values ())} new machines launched, it might take up to 3 minutes to start, Please wait..."
582+ log_message = (
583+ f"{ sum (n for n in capped_needed_machines .values ())} new machines launched"
584+ ", it might take up to 3 minutes to start, Please wait..."
585+ )
501586 await auto_scaling_mode .log_message_from_tasks (
502587 app , tasks , log_message , level = logging .INFO
503588 )
0 commit comments