Skip to content

Commit 7f245bb

Browse files
committed
initial test
1 parent 15a8bcd commit 7f245bb

File tree

1 file changed

+67
-20
lines changed

1 file changed

+67
-20
lines changed

services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py

Lines changed: 67 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1194,22 +1194,22 @@ async def test_cluster_scaling_up_starts_multiple_instances(
11941194
_ScaleUpParams(
11951195
imposed_instance_type="g3.4xlarge", # 1 GPU, 16 CPUs, 122GiB
11961196
service_resources=Resources(
1197-
cpus=5, ram=TypeAdapter(ByteSize).validate_python("30Gib")
1197+
cpus=16, ram=TypeAdapter(ByteSize).validate_python("30Gib")
11981198
),
1199-
num_services=10,
1199+
num_services=12,
12001200
expected_instance_type="g3.4xlarge", # 1 GPU, 16 CPUs, 122GiB
1201-
expected_num_instances=4,
1201+
expected_num_instances=10,
12021202
),
12031203
_ScaleUpParams(
12041204
imposed_instance_type="g4dn.8xlarge", # 32CPUs, 128GiB
12051205
service_resources=Resources(
1206-
cpus=5, ram=TypeAdapter(ByteSize).validate_python("20480MB")
1206+
cpus=32, ram=TypeAdapter(ByteSize).validate_python("20480MB")
12071207
),
12081208
num_services=7,
12091209
expected_instance_type="g4dn.8xlarge", # 32CPUs, 128GiB
1210-
expected_num_instances=2,
1210+
expected_num_instances=7,
12111211
),
1212-
id="Two different instance types are needed",
1212+
id="A batch of services requiring g3.4xlarge and a batch requiring g4dn.8xlarge",
12131213
),
12141214
],
12151215
)
@@ -1218,6 +1218,7 @@ async def test_cluster_adapts_machines_on_the_fly(
12181218
minimal_configuration: None,
12191219
ec2_client: EC2Client,
12201220
initialized_app: FastAPI,
1221+
app_settings: ApplicationSettings,
12211222
create_service: Callable[
12221223
[dict[str, Any], dict[DockerLabelKey, str], str, list[str]], Awaitable[Service]
12231224
],
@@ -1230,11 +1231,19 @@ async def test_cluster_adapts_machines_on_the_fly(
12301231
scale_up_params1: _ScaleUpParams,
12311232
scale_up_params2: _ScaleUpParams,
12321233
):
1234+
# pre-requisites
1235+
assert app_settings.AUTOSCALING_EC2_INSTANCES
1236+
assert app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES > 0
1237+
assert (
1238+
scale_up_params1.num_services
1239+
>= app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MAX_INSTANCES
1240+
), "this test requires to run a first batch of more services than the maximum number of instances allowed"
12331241
# we have nothing running now
12341242
all_instances = await ec2_client.describe_instances()
12351243
assert not all_instances["Reservations"]
12361244

1237-
# create several tasks that needs more power
1245+
#
1246+
# 1. create the first batch of services requiring the initial machines
12381247
await asyncio.gather(
12391248
*(
12401249
create_service(
@@ -1257,21 +1266,59 @@ async def test_cluster_adapts_machines_on_the_fly(
12571266
for _ in range(scale_up_params1.num_services)
12581267
)
12591268
)
1269+
for _ in range(3):
1270+
# it will only scale once and do nothing else
1271+
await auto_scale_cluster(
1272+
app=initialized_app, auto_scaling_mode=DynamicAutoscaling()
1273+
)
1274+
await assert_autoscaled_dynamic_ec2_instances(
1275+
ec2_client,
1276+
expected_num_reservations=1,
1277+
expected_num_instances=scale_up_params1.expected_num_instances,
1278+
expected_instance_type=scale_up_params1.expected_instance_type,
1279+
expected_instance_state="running",
1280+
expected_additional_tag_keys=list(ec2_instance_custom_tags),
1281+
instance_filters=instance_type_filters,
1282+
)
12601283

1261-
await auto_scale_cluster(
1262-
app=initialized_app, auto_scaling_mode=DynamicAutoscaling()
1263-
)
1264-
1265-
# check the instances were started
1266-
await assert_autoscaled_dynamic_ec2_instances(
1267-
ec2_client,
1268-
expected_num_reservations=1,
1269-
expected_num_instances=scale_up_params1.expected_num_instances,
1270-
expected_instance_type=scale_up_params1.expected_instance_type,
1271-
expected_instance_state="running",
1272-
expected_additional_tag_keys=list(ec2_instance_custom_tags),
1273-
instance_filters=instance_type_filters,
1284+
#
1285+
# 2. now we start the second batch of services requiring a different type of machines
1286+
await asyncio.gather(
1287+
*(
1288+
create_service(
1289+
task_template
1290+
| create_task_reservations(
1291+
int(scale_up_params2.service_resources.cpus),
1292+
scale_up_params2.service_resources.ram,
1293+
),
1294+
service_monitored_labels
1295+
| osparc_docker_label_keys.to_simcore_runtime_docker_labels(),
1296+
"pending",
1297+
(
1298+
[
1299+
f"node.labels.{DOCKER_TASK_EC2_INSTANCE_TYPE_PLACEMENT_CONSTRAINT_KEY}=={scale_up_params2.imposed_instance_type}"
1300+
]
1301+
if scale_up_params2.imposed_instance_type
1302+
else []
1303+
),
1304+
)
1305+
for _ in range(scale_up_params2.num_services)
1306+
)
12741307
)
1308+
for _ in range(3):
1309+
# scaling will do nothing since we have hit the maximum number of machines
1310+
await auto_scale_cluster(
1311+
app=initialized_app, auto_scaling_mode=DynamicAutoscaling()
1312+
)
1313+
await assert_autoscaled_dynamic_ec2_instances(
1314+
ec2_client,
1315+
expected_num_reservations=1,
1316+
expected_num_instances=scale_up_params1.expected_num_instances,
1317+
expected_instance_type=scale_up_params1.expected_instance_type,
1318+
expected_instance_state="running",
1319+
expected_additional_tag_keys=list(ec2_instance_custom_tags),
1320+
instance_filters=instance_type_filters,
1321+
)
12751322

12761323

12771324
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)