Skip to content

Commit 75d7971

Browse files
committed
moving files
1 parent 64a1bc1 commit 75d7971

17 files changed

+370
-371
lines changed

services/autoscaling/src/simcore_service_autoscaling/core/application.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@
2020
from ..modules.cluster_scaling.auto_scaling_task import (
2121
setup as setup_auto_scaler_background_task,
2222
)
23-
from ..modules.buffer_machines_pool_task import setup as setup_buffer_machines_pool_task
23+
from ..modules.cluster_scaling.buffer_machines_pool_task import (
24+
setup as setup_buffer_machines_pool_task,
25+
)
2426
from ..modules.docker import setup as setup_docker
2527
from ..modules.ec2 import setup as setup_ec2
2628
from ..modules.instrumentation import setup as setup_instrumentation

services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py

Whitespace-only changes.
Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +0,0 @@
1-
from typing import Protocol
2-
3-
from aws_library.ec2 import EC2InstanceData, EC2Tags, Resources
4-
from fastapi import FastAPI
5-
from models_library.docker import DockerLabelKey
6-
from models_library.generated_models.docker_rest_api import Node as DockerNode
7-
from types_aiobotocore_ec2.literals import InstanceTypeType
8-
9-
from ..models import AssociatedInstance
10-
11-
12-
class BaseAutoscaling(Protocol):
13-
async def get_monitored_nodes(self, app: FastAPI) -> list[DockerNode]: ...
14-
15-
def get_ec2_tags(self, app: FastAPI) -> EC2Tags: ...
16-
17-
def get_new_node_docker_tags(
18-
self, app: FastAPI, ec2_instance_data: EC2InstanceData
19-
) -> dict[DockerLabelKey, str]: ...
20-
21-
async def list_unrunnable_tasks(self, app: FastAPI) -> list: ...
22-
23-
def get_task_required_resources(self, task) -> Resources: ...
24-
25-
async def get_task_defined_instance(
26-
self, app: FastAPI, task
27-
) -> InstanceTypeType | None: ...
28-
29-
async def compute_node_used_resources(
30-
self, app: FastAPI, instance: AssociatedInstance
31-
) -> Resources: ...
32-
33-
async def compute_cluster_used_resources(
34-
self, app: FastAPI, instances: list[AssociatedInstance]
35-
) -> Resources: ...
36-
37-
async def compute_cluster_total_resources(
38-
self, app: FastAPI, instances: list[AssociatedInstance]
39-
) -> Resources: ...
40-
41-
async def is_instance_active(
42-
self, app: FastAPI, instance: AssociatedInstance
43-
) -> bool: ...
44-
45-
async def is_instance_retired(
46-
self, app: FastAPI, instance: AssociatedInstance
47-
) -> bool: ...
48-
49-
async def try_retire_nodes(self, app: FastAPI) -> None: ...
Lines changed: 0 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -1,166 +0,0 @@
1-
import collections
2-
import logging
3-
from typing import cast
4-
5-
from aws_library.ec2 import EC2InstanceData, EC2Tags, Resources
6-
from fastapi import FastAPI
7-
from models_library.clusters import ClusterAuthentication
8-
from models_library.docker import (
9-
DOCKER_TASK_EC2_INSTANCE_TYPE_PLACEMENT_CONSTRAINT_KEY,
10-
DockerLabelKey,
11-
)
12-
from models_library.generated_models.docker_rest_api import Node
13-
from pydantic import AnyUrl, ByteSize
14-
from servicelib.utils import logged_gather
15-
from types_aiobotocore_ec2.literals import InstanceTypeType
16-
17-
from ..core.errors import (
18-
DaskNoWorkersError,
19-
DaskSchedulerNotFoundError,
20-
DaskWorkerNotFoundError,
21-
)
22-
from ..core.settings import get_application_settings
23-
from ..models import AssociatedInstance, DaskTask
24-
from ..utils import computational_scaling as utils
25-
from ..utils import utils_docker, utils_ec2
26-
from . import dask
27-
from .docker import get_docker_client
28-
29-
_logger = logging.getLogger(__name__)
30-
31-
32-
def _scheduler_url(app: FastAPI) -> AnyUrl:
33-
app_settings = get_application_settings(app)
34-
assert app_settings.AUTOSCALING_DASK # nosec
35-
return app_settings.AUTOSCALING_DASK.DASK_MONITORING_URL
36-
37-
38-
def _scheduler_auth(app: FastAPI) -> ClusterAuthentication:
39-
app_settings = get_application_settings(app)
40-
assert app_settings.AUTOSCALING_DASK # nosec
41-
return app_settings.AUTOSCALING_DASK.DASK_SCHEDULER_AUTH
42-
43-
44-
class ComputationalAutoscaling:
45-
async def get_monitored_nodes(self, app: FastAPI) -> list[Node]:
46-
return await utils_docker.get_worker_nodes(get_docker_client(app))
47-
48-
def get_ec2_tags(self, app: FastAPI) -> EC2Tags:
49-
app_settings = get_application_settings(app)
50-
return utils_ec2.get_ec2_tags_computational(app_settings)
51-
52-
def get_new_node_docker_tags(
53-
self, app: FastAPI, ec2_instance_data: EC2InstanceData
54-
) -> dict[DockerLabelKey, str]:
55-
assert app # nosec
56-
return {
57-
DOCKER_TASK_EC2_INSTANCE_TYPE_PLACEMENT_CONSTRAINT_KEY: ec2_instance_data.type
58-
}
59-
60-
async def list_unrunnable_tasks(self, app: FastAPI) -> list[DaskTask]:
61-
try:
62-
unrunnable_tasks = await dask.list_unrunnable_tasks(
63-
_scheduler_url(app), _scheduler_auth(app)
64-
)
65-
# NOTE: any worker "processing" more than 1 task means that the other tasks are queued!
66-
# NOTE: that is not necessarily true, in cases where 1 worker takes multiple tasks?? (osparc.io)
67-
processing_tasks_by_worker = await dask.list_processing_tasks_per_worker(
68-
_scheduler_url(app), _scheduler_auth(app)
69-
)
70-
queued_tasks = []
71-
for tasks in processing_tasks_by_worker.values():
72-
queued_tasks += tasks[1:]
73-
_logger.debug(
74-
"found %s pending tasks and %s potentially queued tasks",
75-
len(unrunnable_tasks),
76-
len(queued_tasks),
77-
)
78-
return unrunnable_tasks + queued_tasks
79-
except DaskSchedulerNotFoundError:
80-
_logger.warning(
81-
"No dask scheduler found. TIP: Normal during machine startup."
82-
)
83-
return []
84-
85-
def get_task_required_resources(self, task) -> Resources:
86-
return utils.resources_from_dask_task(task)
87-
88-
async def get_task_defined_instance(
89-
self, app: FastAPI, task
90-
) -> InstanceTypeType | None:
91-
assert app # nosec
92-
return cast(InstanceTypeType | None, utils.get_task_instance_restriction(task))
93-
94-
async def compute_node_used_resources(
95-
self, app: FastAPI, instance: AssociatedInstance
96-
) -> Resources:
97-
try:
98-
resource = await dask.get_worker_used_resources(
99-
_scheduler_url(app), _scheduler_auth(app), instance.ec2_instance
100-
)
101-
if resource == Resources.create_as_empty():
102-
num_results_in_memory = (
103-
await dask.get_worker_still_has_results_in_memory(
104-
_scheduler_url(app), _scheduler_auth(app), instance.ec2_instance
105-
)
106-
)
107-
if num_results_in_memory > 0:
108-
_logger.debug(
109-
"found %s for %s",
110-
f"{num_results_in_memory=}",
111-
f"{instance.ec2_instance.id}",
112-
)
113-
# NOTE: this is a trick to consider the node still useful
114-
return Resources(cpus=0, ram=ByteSize(1024 * 1024 * 1024))
115-
116-
_logger.debug(
117-
"found %s for %s", f"{resource=}", f"{instance.ec2_instance.id}"
118-
)
119-
return resource
120-
except (DaskWorkerNotFoundError, DaskNoWorkersError):
121-
_logger.debug("no resource found for %s", f"{instance.ec2_instance.id}")
122-
return Resources.create_as_empty()
123-
124-
async def compute_cluster_used_resources(
125-
self, app: FastAPI, instances: list[AssociatedInstance]
126-
) -> Resources:
127-
list_of_used_resources: list[Resources] = await logged_gather(
128-
*(self.compute_node_used_resources(app, i) for i in instances)
129-
)
130-
counter = collections.Counter({k: 0 for k in Resources.model_fields})
131-
for result in list_of_used_resources:
132-
counter.update(result.model_dump())
133-
return Resources.model_validate(dict(counter))
134-
135-
async def compute_cluster_total_resources(
136-
self, app: FastAPI, instances: list[AssociatedInstance]
137-
) -> Resources:
138-
try:
139-
return await dask.compute_cluster_total_resources(
140-
_scheduler_url(app), _scheduler_auth(app), instances
141-
)
142-
except DaskNoWorkersError:
143-
return Resources.create_as_empty()
144-
145-
async def is_instance_active(
146-
self, app: FastAPI, instance: AssociatedInstance
147-
) -> bool:
148-
if not utils_docker.is_node_osparc_ready(instance.node):
149-
return False
150-
151-
# now check if dask-scheduler/dask-worker is available and running
152-
return await dask.is_worker_connected(
153-
_scheduler_url(app), _scheduler_auth(app), instance.ec2_instance
154-
)
155-
156-
async def is_instance_retired(
157-
self, app: FastAPI, instance: AssociatedInstance
158-
) -> bool:
159-
if not utils_docker.is_node_osparc_ready(instance.node):
160-
return False
161-
return await dask.is_worker_retired(
162-
_scheduler_url(app), _scheduler_auth(app), instance.ec2_instance
163-
)
164-
165-
async def try_retire_nodes(self, app: FastAPI) -> None:
166-
await dask.try_retire_nodes(_scheduler_url(app), _scheduler_auth(app))
Lines changed: 0 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -1,94 +0,0 @@
1-
from aws_library.ec2 import EC2InstanceData, EC2Tags, Resources
2-
from fastapi import FastAPI
3-
from models_library.docker import DockerLabelKey
4-
from models_library.generated_models.docker_rest_api import Node, Task
5-
from types_aiobotocore_ec2.literals import InstanceTypeType
6-
7-
from ..core.settings import get_application_settings
8-
from ..models import AssociatedInstance
9-
from ..utils import utils_docker, utils_ec2
10-
from .docker import get_docker_client
11-
12-
13-
class DynamicAutoscaling:
14-
async def get_monitored_nodes(self, app: FastAPI) -> list[Node]:
15-
app_settings = get_application_settings(app)
16-
assert app_settings.AUTOSCALING_NODES_MONITORING # nosec
17-
return await utils_docker.get_monitored_nodes(
18-
get_docker_client(app),
19-
node_labels=app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_NODE_LABELS,
20-
)
21-
22-
def get_ec2_tags(self, app: FastAPI) -> EC2Tags:
23-
app_settings = get_application_settings(app)
24-
return utils_ec2.get_ec2_tags_dynamic(app_settings)
25-
26-
def get_new_node_docker_tags(
27-
self, app: FastAPI, ec2_instance_data: EC2InstanceData
28-
) -> dict[DockerLabelKey, str]:
29-
app_settings = get_application_settings(app)
30-
return utils_docker.get_new_node_docker_tags(app_settings, ec2_instance_data)
31-
32-
async def list_unrunnable_tasks(self, app: FastAPI) -> list[Task]:
33-
app_settings = get_application_settings(app)
34-
assert app_settings.AUTOSCALING_NODES_MONITORING # nosec
35-
return await utils_docker.pending_service_tasks_with_insufficient_resources(
36-
get_docker_client(app),
37-
service_labels=app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_SERVICE_LABELS,
38-
)
39-
40-
def get_task_required_resources(self, task) -> Resources:
41-
return utils_docker.get_max_resources_from_docker_task(task)
42-
43-
async def get_task_defined_instance(
44-
self, app: FastAPI, task
45-
) -> InstanceTypeType | None:
46-
return await utils_docker.get_task_instance_restriction(
47-
get_docker_client(app), task
48-
)
49-
50-
async def compute_node_used_resources(
51-
self, app: FastAPI, instance: AssociatedInstance
52-
) -> Resources:
53-
docker_client = get_docker_client(app)
54-
app_settings = get_application_settings(app)
55-
assert app_settings.AUTOSCALING_NODES_MONITORING # nosec
56-
return await utils_docker.compute_node_used_resources(
57-
docker_client,
58-
instance.node,
59-
service_labels=app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_SERVICE_LABELS,
60-
)
61-
62-
async def compute_cluster_used_resources(
63-
self, app: FastAPI, instances: list[AssociatedInstance]
64-
) -> Resources:
65-
docker_client = get_docker_client(app)
66-
return await utils_docker.compute_cluster_used_resources(
67-
docker_client, [i.node for i in instances]
68-
)
69-
70-
async def compute_cluster_total_resources(
71-
self, app: FastAPI, instances: list[AssociatedInstance]
72-
) -> Resources:
73-
assert app # nosec
74-
return await utils_docker.compute_cluster_total_resources(
75-
[i.node for i in instances]
76-
)
77-
78-
async def is_instance_active(
79-
self, app: FastAPI, instance: AssociatedInstance
80-
) -> bool:
81-
assert app # nosec
82-
return utils_docker.is_node_osparc_ready(instance.node)
83-
84-
async def is_instance_retired(
85-
self, app: FastAPI, instance: AssociatedInstance
86-
) -> bool:
87-
assert app # nosec
88-
assert instance # nosec
89-
# nothing to do here
90-
return False
91-
92-
async def try_retire_nodes(self, app: FastAPI) -> None:
93-
assert app # nosec
94-
# nothing to do here
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,11 @@
5757
post_tasks_log_message,
5858
post_tasks_progress_message,
5959
)
60-
from ..auto_scaling_mode_base import BaseAutoscaling
6160
from ..docker import get_docker_client
6261
from ..ec2 import get_ec2_client
6362
from ..instrumentation import get_instrumentation, has_instrumentation
6463
from ..ssm import get_ssm_client
64+
from .auto_scaling_mode_base import BaseAutoscaling
6565

6666
_logger = logging.getLogger(__name__)
6767

services/autoscaling/src/simcore_service_autoscaling/modules/buffer_machines_pool_core.py renamed to services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_buffer_machines_pool_core.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,24 +35,24 @@
3535
from servicelib.logging_utils import log_context
3636
from types_aiobotocore_ec2.literals import InstanceTypeType
3737

38-
from ..constants import (
38+
from ...constants import (
3939
BUFFER_MACHINE_PULLING_COMMAND_ID_EC2_TAG_KEY,
4040
BUFFER_MACHINE_PULLING_EC2_TAG_KEY,
4141
DOCKER_PULL_COMMAND,
4242
PREPULL_COMMAND_NAME,
4343
)
44-
from ..core.settings import get_application_settings
45-
from ..models import BufferPool, BufferPoolManager
46-
from ..utils.auto_scaling_core import ec2_buffer_startup_script
47-
from ..utils.buffer_machines_pool_core import (
44+
from ...core.settings import get_application_settings
45+
from ...models import BufferPool, BufferPoolManager
46+
from ...utils.auto_scaling_core import ec2_buffer_startup_script
47+
from ...utils.buffer_machines_pool_core import (
4848
dump_pre_pulled_images_as_tags,
4949
get_deactivated_buffer_ec2_tags,
5050
load_pre_pulled_images_from_tags,
5151
)
52+
from ..ec2 import get_ec2_client
53+
from ..instrumentation import get_instrumentation, has_instrumentation
54+
from ..ssm import get_ssm_client
5255
from .auto_scaling_mode_base import BaseAutoscaling
53-
from .ec2 import get_ec2_client
54-
from .instrumentation import get_instrumentation, has_instrumentation
55-
from .ssm import get_ssm_client
5656

5757
_logger = logging.getLogger(__name__)
5858

0 commit comments

Comments
 (0)