@@ -1187,6 +1187,92 @@ async def test_cluster_scaling_up_starts_multiple_instances(
11871187 mock_rabbitmq_post_message .reset_mock ()
11881188
11891189
1190+ @pytest .mark .parametrize (
1191+ "scale_up_params1, scale_up_params2" ,
1192+ [
1193+ pytest .param (
1194+ _ScaleUpParams (
1195+ imposed_instance_type = "g3.4xlarge" , # 1 GPU, 16 CPUs, 122GiB
1196+ service_resources = Resources (
1197+ cpus = 5 , ram = TypeAdapter (ByteSize ).validate_python ("30Gib" )
1198+ ),
1199+ num_services = 10 ,
1200+ expected_instance_type = "g3.4xlarge" , # 1 GPU, 16 CPUs, 122GiB
1201+ expected_num_instances = 4 ,
1202+ ),
1203+ _ScaleUpParams (
1204+ imposed_instance_type = "g4dn.8xlarge" , # 32CPUs, 128GiB
1205+ service_resources = Resources (
1206+ cpus = 5 , ram = TypeAdapter (ByteSize ).validate_python ("20480MB" )
1207+ ),
1208+ num_services = 7 ,
1209+ expected_instance_type = "g4dn.8xlarge" , # 32CPUs, 128GiB
1210+ expected_num_instances = 2 ,
1211+ ),
1212+ id = "Two different instance types are needed" ,
1213+ ),
1214+ ],
1215+ )
1216+ async def test_cluster_adapts_machines_on_the_fly (
1217+ minimal_configuration : None ,
1218+ ec2_client : EC2Client ,
1219+ initialized_app : FastAPI ,
1220+ create_service : Callable [
1221+ [dict [str , Any ], dict [DockerLabelKey , str ], str , list [str ]], Awaitable [Service ]
1222+ ],
1223+ task_template : dict [str , Any ],
1224+ create_task_reservations : Callable [[int , int ], dict [str , Any ]],
1225+ service_monitored_labels : dict [DockerLabelKey , str ],
1226+ osparc_docker_label_keys : StandardSimcoreDockerLabels ,
1227+ ec2_instance_custom_tags : dict [str , str ],
1228+ instance_type_filters : Sequence [FilterTypeDef ],
1229+ scale_up_params1 : _ScaleUpParams ,
1230+ scale_up_params2 : _ScaleUpParams ,
1231+ ):
1232+ # we have nothing running now
1233+ all_instances = await ec2_client .describe_instances ()
1234+ assert not all_instances ["Reservations" ]
1235+
1236+ # create several tasks that needs more power
1237+ await asyncio .gather (
1238+ * (
1239+ create_service (
1240+ task_template
1241+ | create_task_reservations (
1242+ int (scale_up_params1 .service_resources .cpus ),
1243+ scale_up_params1 .service_resources .ram ,
1244+ ),
1245+ service_monitored_labels
1246+ | osparc_docker_label_keys .to_simcore_runtime_docker_labels (),
1247+ "pending" ,
1248+ (
1249+ [
1250+ f"node.labels.{ DOCKER_TASK_EC2_INSTANCE_TYPE_PLACEMENT_CONSTRAINT_KEY } =={ scale_up_params1 .imposed_instance_type } "
1251+ ]
1252+ if scale_up_params1 .imposed_instance_type
1253+ else []
1254+ ),
1255+ )
1256+ for _ in range (scale_up_params1 .num_services )
1257+ )
1258+ )
1259+
1260+ await auto_scale_cluster (
1261+ app = initialized_app , auto_scaling_mode = DynamicAutoscaling ()
1262+ )
1263+
1264+ # check the instances were started
1265+ await assert_autoscaled_dynamic_ec2_instances (
1266+ ec2_client ,
1267+ expected_num_reservations = 1 ,
1268+ expected_num_instances = scale_up_params1 .expected_num_instances ,
1269+ expected_instance_type = scale_up_params1 .expected_instance_type ,
1270+ expected_instance_state = "running" ,
1271+ expected_additional_tag_keys = list (ec2_instance_custom_tags ),
1272+ instance_filters = instance_type_filters ,
1273+ )
1274+
1275+
11901276@pytest .mark .parametrize (
11911277 "docker_service_imposed_ec2_type, docker_service_ram, expected_ec2_type" ,
11921278 [
0 commit comments