Skip to content

Commit 4bf6e90

Browse files
authored
Comp backend/clusters creation functionalities (#4602)
1 parent df7114d commit 4bf6e90

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+1960
-290
lines changed

.ruff.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ target-version = "py310"
5151

5252

5353
[per-file-ignores]
54-
"**/tests/**" = [
54+
"{**/{tests, pytest_simcore}/**}" = [
5555
"T201", # print found
5656
"ARG001", # unused function argument
5757
"PT019", # user pytest.mark.usefixture

.vscode/launch.template.json

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,19 @@
4646
"name": "Python: Remote Attach api-server",
4747
"type": "python",
4848
"request": "attach",
49+
"port": 3015,
50+
"host": "127.0.0.1",
51+
"pathMappings": [
52+
{
53+
"localRoot": "${workspaceFolder}",
54+
"remoteRoot": "/devel"
55+
}
56+
]
57+
},
58+
{
59+
"name": "Python: Remote Attach clusters-keeper",
60+
"type": "python",
61+
"request": "attach",
4962
"port": 3006,
5063
"host": "127.0.0.1",
5164
"pathMappings": [
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# pylint: disable=unused-argument
2+
# pylint: disable=redefined-outer-name
3+
4+
from collections.abc import Callable
5+
from typing import AsyncIterator, NamedTuple
6+
7+
import pytest
8+
import traitlets.config
9+
from dask_gateway import Gateway, GatewayCluster, auth
10+
from dask_gateway_server.app import DaskGateway
11+
from dask_gateway_server.backends.local import UnsafeLocalBackend
12+
from distributed import Client
13+
14+
15+
@pytest.fixture
16+
def local_dask_gateway_server_config(
17+
unused_tcp_port_factory: Callable,
18+
) -> traitlets.config.Config:
19+
c = traitlets.config.Config()
20+
assert isinstance(c.DaskGateway, traitlets.config.Config)
21+
assert isinstance(c.ClusterConfig, traitlets.config.Config)
22+
assert isinstance(c.Proxy, traitlets.config.Config)
23+
assert isinstance(c.SimpleAuthenticator, traitlets.config.Config)
24+
c.DaskGateway.backend_class = UnsafeLocalBackend
25+
c.DaskGateway.address = f"127.0.0.1:{unused_tcp_port_factory()}"
26+
c.Proxy.address = f"127.0.0.1:{unused_tcp_port_factory()}"
27+
c.DaskGateway.authenticator_class = "dask_gateway_server.auth.SimpleAuthenticator"
28+
c.SimpleAuthenticator.password = "qweqwe" # noqa: S105
29+
c.ClusterConfig.worker_cmd = [
30+
"dask-worker",
31+
"--resources",
32+
f"CPU=12,GPU=1,RAM={16e9}",
33+
]
34+
# NOTE: This must be set such that the local unsafe backend creates a worker with enough cores/memory
35+
c.ClusterConfig.worker_cores = 12
36+
c.ClusterConfig.worker_memory = "16G"
37+
c.ClusterConfig.cluster_max_workers = 3
38+
39+
c.DaskGateway.log_level = "DEBUG"
40+
return c
41+
42+
43+
class DaskGatewayServer(NamedTuple):
44+
address: str
45+
proxy_address: str
46+
password: str
47+
server: DaskGateway
48+
49+
50+
@pytest.fixture
51+
async def local_dask_gateway_server(
52+
local_dask_gateway_server_config: traitlets.config.Config,
53+
) -> AsyncIterator[DaskGatewayServer]:
54+
print("--> creating local dask gateway server")
55+
dask_gateway_server = DaskGateway(config=local_dask_gateway_server_config)
56+
dask_gateway_server.initialize([]) # that is a shitty one!
57+
print("--> local dask gateway server initialized")
58+
await dask_gateway_server.setup()
59+
await dask_gateway_server.backend.proxy._proxy_contacted # pylint: disable=protected-access
60+
61+
print("--> local dask gateway server setup completed")
62+
yield DaskGatewayServer(
63+
f"http://{dask_gateway_server.backend.proxy.address}",
64+
f"gateway://{dask_gateway_server.backend.proxy.tcp_address}",
65+
local_dask_gateway_server_config.SimpleAuthenticator.password, # type: ignore
66+
dask_gateway_server,
67+
)
68+
print("--> local dask gateway server switching off...")
69+
await dask_gateway_server.cleanup()
70+
print("...done")
71+
72+
73+
@pytest.fixture
74+
async def dask_gateway(
75+
local_dask_gateway_server: DaskGatewayServer,
76+
) -> Gateway:
77+
async with Gateway(
78+
local_dask_gateway_server.address,
79+
local_dask_gateway_server.proxy_address,
80+
asynchronous=True,
81+
auth=auth.BasicAuth("pytest_user", local_dask_gateway_server.password),
82+
) as gateway:
83+
print(f"--> {gateway=} created")
84+
cluster_options = await gateway.cluster_options()
85+
gateway_versions = await gateway.get_versions()
86+
clusters_list = await gateway.list_clusters()
87+
print(f"--> {gateway_versions=}, {cluster_options=}, {clusters_list=}")
88+
for option in cluster_options.items():
89+
print(f"--> {option=}")
90+
return gateway
91+
92+
93+
@pytest.fixture
94+
async def dask_gateway_cluster(dask_gateway: Gateway) -> AsyncIterator[GatewayCluster]:
95+
async with dask_gateway.new_cluster() as cluster:
96+
yield cluster
97+
98+
99+
@pytest.fixture
100+
async def dask_gateway_cluster_client(
101+
dask_gateway_cluster: GatewayCluster,
102+
) -> AsyncIterator[Client]:
103+
async with dask_gateway_cluster.get_client() as client:
104+
yield client
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
CLUSTERS_KEEPER_DEBUG=true
2+
CLUSTERS_KEEPER_LOGLEVEL=INFO
3+
CLUSTERS_KEEPER_MAX_MISSED_HEARTBEATS_BEFORE_CLUSTER_TERMINATION=60
4+
CLUSTERS_KEEPER_TASK_INTERVAL=30
5+
EC2_ACCESS_KEY_ID=XXXXXXXXXX
6+
EC2_INSTANCES_ALLOWED_TYPES="[\"t2.micro\"]"
7+
EC2_INSTANCES_AMI_ID=XXXXXXXXXX
8+
EC2_INSTANCES_KEY_NAME=XXXXXXXXXX
9+
EC2_INSTANCES_SECURITY_GROUP_IDS=XXXXXXXXXX
10+
EC2_INSTANCES_SUBNET_ID=XXXXXXXXXX
11+
EC2_SECRET_ACCESS_KEY=XXXXXXXXXX
12+
LOG_FORMAT_LOCAL_DEV_ENABLED=True
13+
RABBIT_HOST=rabbit
14+
RABBIT_PASSWORD=test
15+
RABBIT_PORT=5672
16+
RABBIT_SECURE=false
17+
RABBIT_USER=test
18+
REDIS_HOST=redis
19+
REDIS_PORT=6379
20+
SC_BOOT_MODE=debug-ptvsd
21+
SC_BUILD_TARGET=development

services/clusters-keeper/Makefile

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,17 @@
33
#
44
include ../../scripts/common.Makefile
55
include ../../scripts/common-service.Makefile
6+
7+
.env: .env-devel ## creates .env file from defaults in .env-devel
8+
$(if $(wildcard $@), \
9+
@echo "WARNING ##### $< is newer than $@ ####"; diff -uN $@ $<; false;,\
10+
@echo "WARNING ##### $@ does not exist, cloning $< as $@ ############"; cp $< $@)
11+
12+
13+
.PHONY: test-local
14+
up-devel: .env ## starts local test application (running bare metal against AWS)
15+
# setting up dependencies
16+
@docker compose up
17+
18+
down: .env ## stops local test app dependencies (running bare metal against AWS)
19+
-@docker compose down
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
version: "3.8"
2+
services:
3+
rabbit:
4+
image: itisfoundation/rabbitmq:3.11.2-management
5+
init: true
6+
ports:
7+
- "5672:5672"
8+
- "15672:15672"
9+
- "15692"
10+
environment:
11+
- RABBITMQ_DEFAULT_USER=${RABBIT_USER}
12+
- RABBITMQ_DEFAULT_PASS=${RABBIT_PASSWORD}
13+
healthcheck:
14+
# see https://www.rabbitmq.com/monitoring.html#individual-checks for info about health-checks available in rabbitmq
15+
test: rabbitmq-diagnostics -q status
16+
interval: 5s
17+
timeout: 30s
18+
retries: 5
19+
start_period: 5s
20+
21+
redis:
22+
image: "redis:6.2.6@sha256:4bed291aa5efb9f0d77b76ff7d4ab71eee410962965d052552db1fb80576431d"
23+
init: true
24+
ports:
25+
- "6379:6379"
26+
healthcheck:
27+
test: [ "CMD", "redis-cli", "ping" ]
28+
interval: 5s
29+
timeout: 30s
30+
retries: 50
31+
32+
redis-commander:
33+
image: rediscommander/redis-commander:latest
34+
init: true
35+
ports:
36+
- "18081:8081"
37+
environment:
38+
- REDIS_HOSTS=resources:${REDIS_HOST}:${REDIS_PORT}:0,locks:${REDIS_HOST}:${REDIS_PORT}:1,validation_codes:${REDIS_HOST}:${REDIS_PORT}:2,scheduled_maintenance:${REDIS_HOST}:${REDIS_PORT}:3,user_notifications:${REDIS_HOST}:${REDIS_PORT}:4,announcements:${REDIS_HOST}:${REDIS_PORT}:5
39+
# If you add/remove a db, do not forget to update the --databases entry in the docker-compose.yml
40+
41+
clusters-keeper:
42+
image: local/clusters-keeper:development
43+
init: true
44+
ports:
45+
- "8010:8000"
46+
- "3015:3000"
47+
env_file:
48+
- .env
49+
volumes:
50+
- ./:/devel/services/clusters-keeper
51+
- ../../packages:/devel/packages

services/clusters-keeper/docker/entrypoint.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ fi
6565

6666
if [ "${SC_BOOT_MODE}" = "debug-ptvsd" ]; then
6767
# NOTE: production does NOT pre-installs ptvsd
68-
pip install --no-cache-dir ptvsd
68+
pip install --no-cache-dir debugpy
6969
fi
7070

7171
# Appends docker group if socket is mounted

services/clusters-keeper/requirements/_base.in

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# NOTE: ALL version constraints MUST be commented
55
--constraint ../../../requirements/constraints.txt
66
--constraint ./constraints.txt
7+
--constraint ../../../services/dask-sidecar/requirements/_dask-distributed.txt
78

89
# intra-repo required dependencies
910
--requirement ../../../packages/models-library/requirements/_base.in
@@ -13,7 +14,10 @@
1314
--requirement ../../../packages/service-library/requirements/_fastapi.in
1415

1516

17+
1618
aioboto3
19+
dask[distributed]
20+
dask-gateway
1721
fastapi
1822
packaging
1923
types-aiobotocore[ec2]

0 commit comments

Comments
 (0)