Skip to content

Commit d080407

Browse files
authored
✨🚨Autoscaling: Prepare Warmed EBS-backed volumes to use as buffer for machines (⚠️ Devops) 🚨 (#5923)
1 parent 9cee5db commit d080407

36 files changed

+1724
-366
lines changed

.env-devel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ AUTOSCALING_EC2_INSTANCES=null
3030
AUTOSCALING_LOGLEVEL=WARNING
3131
AUTOSCALING_NODES_MONITORING=null
3232
AUTOSCALING_POLL_INTERVAL=10
33+
AUTOSCALING_SSM_ACCESS=null
3334

3435
CATALOG_BACKGROUND_TASK_REST_TIME=60
3536
CATALOG_DEV_FEATURES_ENABLED=0

.github/workflows/ci-testing-deploy.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ on:
3939
env:
4040
DEFAULT_MAX_NANO_CPUS: 10000000
4141
DEFAULT_MAX_MEMORY: 268435456
42+
COLUMNS: 120
4243

4344
concurrency:
4445
group: ${{ github.workflow }}-${{ github.ref }}

docs/remote-work-aws-ssm.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# How to use VSCode on a remote private EC2
2+
[reference](https://medium.com/@dbpprt/transparently-develop-on-an-ec2-instance-with-vscode-remote-ssh-through-ssm-6e5c5e599ee1)
3+
4+
## to use from the terminal
5+
6+
```bash
7+
host i-* mi-*
8+
User ec2-user
9+
ProxyCommand sh -c "aws ssm start-session --target %h --document-name AWS-StartSSHSession --parameters 'portNumber=%p'"
10+
```
11+
12+
## to use from VSCode
13+
14+
```bash
15+
host i-*.*.*
16+
User ec2-user
17+
ProxyCommand bash -c "aws ssm start-session --target $(echo %h|cut -d'.' -f1) --profile $(echo %h|/usr/bin/cut -d'.' -f2) --region $(echo %h|/usr/bin/cut -d'.' -f3) --document-name AWS-StartSSHSession --parameters 'portNumber=%p'"
18+
```

packages/aws-library/src/aws_library/ec2/models.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
ConstrainedStr,
1313
Field,
1414
NonNegativeFloat,
15+
NonNegativeInt,
1516
validator,
1617
)
1718
from types_aiobotocore_ec2.literals import InstanceStateNameType, InstanceTypeType
@@ -139,6 +140,9 @@ class EC2InstanceBootSpecific(BaseModel):
139140
description="time interval between pulls of images (minimum is 1 minute) "
140141
"(default to seconds, or see https://pydantic-docs.helpmanual.io/usage/types/#datetime-types for string formating)",
141142
)
143+
buffer_count: NonNegativeInt = Field(
144+
default=0, description="number of buffer EC2s to keep (defaults to 0)"
145+
)
142146

143147
class Config:
144148
schema_extra: ClassVar[dict[str, Any]] = {
@@ -184,6 +188,17 @@ class Config:
184188
],
185189
"pre_pull_images_cron_interval": "01:00:00",
186190
},
191+
{
192+
# AMI + pre-pull + buffer count
193+
"ami_id": "ami-123456789abcdef",
194+
"pre_pull_images": [
195+
"nginx:latest",
196+
"itisfoundation/my-very-nice-service:latest",
197+
"simcore/services/dynamic/another-nice-one:2.4.5",
198+
"asd",
199+
],
200+
"buffer_count": 10,
201+
},
187202
]
188203
}
189204

packages/aws-library/src/aws_library/ssm/_client.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ async def create(cls, settings: SSMSettings) -> "SimcoreSSMAPI":
4242
session = aioboto3.Session()
4343
session_client = session.client(
4444
"ssm",
45-
endpoint_url=f"{settings.SSM_ENDPOINT}",
45+
endpoint_url=settings.SSM_ENDPOINT,
4646
aws_access_key_id=settings.SSM_ACCESS_KEY_ID.get_secret_value(),
4747
aws_secret_access_key=settings.SSM_SECRET_ACCESS_KEY.get_secret_value(),
4848
region_name=settings.SSM_REGION_NAME,
@@ -77,6 +77,10 @@ async def send_command(
7777
DocumentName="AWS-RunShellScript",
7878
Comment=command_name,
7979
Parameters={"commands": [command]},
80+
CloudWatchOutputConfig={
81+
"CloudWatchOutputEnabled": True,
82+
"CloudWatchLogGroupName": "simcore-ssm-logs",
83+
},
8084
)
8185
assert response["Command"] # nosec
8286
assert "Comment" in response["Command"] # nosec
@@ -120,9 +124,13 @@ async def is_instance_connected_to_ssm_server(self, instance_id: str) -> bool:
120124
],
121125
)
122126
assert response["InstanceInformationList"] # nosec
123-
assert len(response["InstanceInformationList"]) == 1 # nosec
124-
assert "PingStatus" in response["InstanceInformationList"][0] # nosec
125-
return bool(response["InstanceInformationList"][0]["PingStatus"] == "Online")
127+
if response["InstanceInformationList"]:
128+
assert len(response["InstanceInformationList"]) == 1 # nosec
129+
assert "PingStatus" in response["InstanceInformationList"][0] # nosec
130+
return bool(
131+
response["InstanceInformationList"][0]["PingStatus"] == "Online"
132+
)
133+
return False
126134

127135
@log_decorator(_logger, logging.DEBUG)
128136
@ssm_exception_handler(_logger)
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# pylint: disable=redefined-outer-name
2+
# pylint: disable=unused-argument
3+
# pylint: disable=unused-import
4+
5+
import contextlib
6+
import logging
7+
from collections.abc import AsyncIterator
8+
from typing import cast
9+
10+
import aioboto3
11+
import pytest
12+
from aiobotocore.session import ClientCreatorContext
13+
from faker import Faker
14+
from settings_library.ec2 import EC2Settings
15+
from types_aiobotocore_iam.client import IAMClient
16+
17+
from .helpers.logging_tools import log_context
18+
19+
20+
@pytest.fixture
21+
async def iam_client(
22+
ec2_settings: EC2Settings,
23+
) -> AsyncIterator[IAMClient]:
24+
session = aioboto3.Session()
25+
exit_stack = contextlib.AsyncExitStack()
26+
session_client = session.client(
27+
"iam",
28+
endpoint_url=ec2_settings.EC2_ENDPOINT,
29+
aws_access_key_id=ec2_settings.EC2_ACCESS_KEY_ID,
30+
aws_secret_access_key=ec2_settings.EC2_SECRET_ACCESS_KEY,
31+
region_name=ec2_settings.EC2_REGION_NAME,
32+
)
33+
assert isinstance(session_client, ClientCreatorContext)
34+
iam_client = cast(IAMClient, await exit_stack.enter_async_context(session_client))
35+
36+
yield iam_client
37+
38+
await exit_stack.aclose()
39+
40+
41+
@pytest.fixture
42+
async def aws_instance_profile(
43+
iam_client: IAMClient, faker: Faker
44+
) -> AsyncIterator[str]:
45+
46+
profile = await iam_client.create_instance_profile(
47+
InstanceProfileName=faker.pystr(),
48+
)
49+
profile_arn = profile["InstanceProfile"]["Arn"]
50+
with log_context(
51+
logging.INFO, msg=f"Created InstanceProfile in AWS with {profile_arn=}"
52+
):
53+
yield profile_arn
54+
55+
await iam_client.delete_instance_profile(
56+
InstanceProfileName=profile["InstanceProfile"]["InstanceProfileName"]
57+
)

packages/pytest-simcore/src/pytest_simcore/aws_server.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99
import requests
1010
from aiohttp.test_utils import unused_port
1111
from faker import Faker
12+
from models_library.utils.fastapi_encoders import jsonable_encoder
1213
from moto.server import ThreadedMotoServer
13-
from pydantic import AnyHttpUrl, parse_obj_as
14+
from pydantic import AnyHttpUrl, SecretStr, parse_obj_as
1415
from pytest_mock.plugin import MockerFixture
16+
from settings_library.basic_types import IDStr
1517
from settings_library.ec2 import EC2Settings
1618
from settings_library.s3 import S3Settings
1719
from settings_library.ssm import SSMSettings
@@ -74,7 +76,7 @@ def mocked_ec2_server_envs(
7476
monkeypatch: pytest.MonkeyPatch,
7577
) -> EnvVarsDict:
7678
changed_envs: EnvVarsDict = mocked_ec2_server_settings.dict()
77-
return setenvs_from_dict(monkeypatch, changed_envs)
79+
return setenvs_from_dict(monkeypatch, {**changed_envs})
7880

7981

8082
@pytest.fixture
@@ -98,9 +100,12 @@ def mocked_ssm_server_settings(
98100
reset_aws_server_state: None,
99101
) -> SSMSettings:
100102
return SSMSettings(
101-
SSM_ACCESS_KEY_ID="xxx",
102-
SSM_ENDPOINT=f"http://{mocked_aws_server._ip_address}:{mocked_aws_server._port}", # pylint: disable=protected-access # noqa: SLF001
103-
SSM_SECRET_ACCESS_KEY="xxx", # noqa: S106
103+
SSM_ACCESS_KEY_ID=SecretStr("xxx"),
104+
SSM_ENDPOINT=parse_obj_as(
105+
AnyHttpUrl,
106+
f"http://{mocked_aws_server._ip_address}:{mocked_aws_server._port}", # pylint: disable=protected-access # noqa: SLF001
107+
),
108+
SSM_SECRET_ACCESS_KEY=SecretStr("xxx"),
104109
)
105110

106111

@@ -109,23 +114,23 @@ def mocked_ssm_server_envs(
109114
mocked_ssm_server_settings: SSMSettings,
110115
monkeypatch: pytest.MonkeyPatch,
111116
) -> EnvVarsDict:
112-
changed_envs: EnvVarsDict = mocked_ssm_server_settings.dict()
113-
return setenvs_from_dict(monkeypatch, changed_envs)
117+
changed_envs: EnvVarsDict = jsonable_encoder(mocked_ssm_server_settings)
118+
return setenvs_from_dict(monkeypatch, {**changed_envs})
114119

115120

116121
@pytest.fixture
117122
def mocked_s3_server_settings(
118123
mocked_aws_server: ThreadedMotoServer, reset_aws_server_state: None, faker: Faker
119124
) -> S3Settings:
120125
return S3Settings(
121-
S3_ACCESS_KEY="xxx",
126+
S3_ACCESS_KEY=IDStr("xxx"),
122127
S3_ENDPOINT=parse_obj_as(
123128
AnyHttpUrl,
124129
f"http://{mocked_aws_server._ip_address}:{mocked_aws_server._port}", # pylint: disable=protected-access # noqa: SLF001
125130
),
126-
S3_SECRET_KEY="xxx", # noqa: S106
127-
S3_BUCKET_NAME=f"pytest{faker.pystr().lower()}",
128-
S3_REGION="us-east-1",
131+
S3_SECRET_KEY=IDStr("xxx"),
132+
S3_BUCKET_NAME=IDStr(f"pytest{faker.pystr().lower()}"),
133+
S3_REGION=IDStr("us-east-1"),
129134
)
130135

131136

@@ -135,4 +140,4 @@ def mocked_s3_server_envs(
135140
monkeypatch: pytest.MonkeyPatch,
136141
) -> EnvVarsDict:
137142
changed_envs: EnvVarsDict = mocked_s3_server_settings.dict(exclude_unset=True)
138-
return setenvs_from_dict(monkeypatch, changed_envs)
143+
return setenvs_from_dict(monkeypatch, {**changed_envs})
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
# pytest_simcore.docker_compose fixture module config variables
2+
import pytest
23

34
FIXTURE_CONFIG_CORE_SERVICES_SELECTION = "pytest_simcore_core_services_selection"
45
FIXTURE_CONFIG_OPS_SERVICES_SELECTION = "pytest_simcore_ops_services_selection"
6+
7+
# NOTE: this ensures that assertion printouts are nicely formated and complete see https://lorepirri.com/pytest-register-assert-rewrite.html
8+
pytest.register_assert_rewrite("pytest_simcore.helpers")

packages/pytest-simcore/src/pytest_simcore/helpers/aws_ec2.py

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ async def assert_autoscaled_computational_ec2_instances(
1313
expected_num_instances: int,
1414
expected_instance_type: InstanceTypeType,
1515
expected_instance_state: InstanceStateNameType,
16+
expected_additional_tag_keys: list[str],
1617
) -> list[InstanceTypeDef]:
1718
return await assert_ec2_instances(
1819
ec2_client,
@@ -24,7 +25,7 @@ async def assert_autoscaled_computational_ec2_instances(
2425
"io.simcore.autoscaling.dask-scheduler_url",
2526
"user_id",
2627
"wallet_id",
27-
"osparc-tag",
28+
*expected_additional_tag_keys,
2829
],
2930
expected_user_data=["docker swarm join"],
3031
)
@@ -37,6 +38,7 @@ async def assert_autoscaled_dynamic_ec2_instances(
3738
expected_num_instances: int,
3839
expected_instance_type: InstanceTypeType,
3940
expected_instance_state: InstanceStateNameType,
41+
expected_additional_tag_keys: list[str],
4042
) -> list[InstanceTypeDef]:
4143
return await assert_ec2_instances(
4244
ec2_client,
@@ -47,9 +49,7 @@ async def assert_autoscaled_dynamic_ec2_instances(
4749
expected_instance_tag_keys=[
4850
"io.simcore.autoscaling.monitored_nodes_labels",
4951
"io.simcore.autoscaling.monitored_services_labels",
50-
"user_id",
51-
"wallet_id",
52-
"osparc-tag",
52+
*expected_additional_tag_keys,
5353
],
5454
expected_user_data=["docker swarm join"],
5555
)
@@ -74,7 +74,7 @@ async def assert_autoscaled_dynamic_warm_pools_ec2_instances(
7474
expected_instance_tag_keys=[
7575
"io.simcore.autoscaling.monitored_nodes_labels",
7676
"io.simcore.autoscaling.monitored_services_labels",
77-
"buffer-machine",
77+
"io.simcore.autoscaling.buffer_machine",
7878
*expected_additional_tag_keys,
7979
],
8080
expected_user_data=[],
@@ -106,20 +106,14 @@ async def assert_ec2_instances(
106106
assert instance["InstanceType"] == expected_instance_type
107107
assert "Tags" in instance
108108
assert instance["Tags"]
109-
expected_tag_keys = [
109+
expected_tag_keys = {
110110
*expected_instance_tag_keys,
111111
"io.simcore.autoscaling.version",
112112
"Name",
113-
]
114-
instance_tag_keys = [tag["Key"] for tag in instance["Tags"] if "Key" in tag]
115-
for tag_key in instance_tag_keys:
116-
assert (
117-
tag_key in expected_tag_keys
118-
), f"instance has additional unexpected {tag_key=} vs {expected_tag_keys=}"
119-
for tag in expected_instance_tag_keys:
120-
assert (
121-
tag in instance_tag_keys
122-
), f"instance missing {tag=} vs {instance_tag_keys=}"
113+
}
114+
instance_tag_keys = {tag["Key"] for tag in instance["Tags"] if "Key" in tag}
115+
116+
assert instance_tag_keys == expected_tag_keys
123117

124118
assert "PrivateDnsName" in instance
125119
instance_private_dns_name = instance["PrivateDnsName"]

services/autoscaling/requirements/_base.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,8 @@ types-aiobotocore-ec2==2.13.0
449449
# via types-aiobotocore
450450
types-aiobotocore-s3==2.13.0
451451
# via types-aiobotocore
452+
types-aiobotocore-ssm==2.13.0
453+
# via types-aiobotocore
452454
types-awscrt==0.20.9
453455
# via botocore-stubs
454456
types-python-dateutil==2.9.0.20240316
@@ -465,6 +467,7 @@ typing-extensions==4.11.0
465467
# types-aiobotocore
466468
# types-aiobotocore-ec2
467469
# types-aiobotocore-s3
470+
# types-aiobotocore-ssm
468471
# uvicorn
469472
urllib3==2.2.1
470473
# via

0 commit comments

Comments
 (0)