Skip to content

Commit bbef4bc

Browse files
authored
♻️Autoscaling: refactor before changes (⚠️ DEVOPS) (#8002)
1 parent 961ad38 commit bbef4bc

32 files changed

+773
-703
lines changed

.github/workflows/ci-testing-deploy.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -587,7 +587,7 @@ jobs:
587587
unit-test-autoscaling:
588588
needs: changes
589589
if: ${{ needs.changes.outputs.autoscaling == 'true' || github.event_name == 'push' || github.event.inputs.force_all_builds == 'true' }}
590-
timeout-minutes: 22 # temporary: mypy takes a huge amount of time to run here, maybe we should cache it
590+
timeout-minutes: 18 # if this timeout gets too small, then split the tests
591591
name: "[unit] autoscaling"
592592
runs-on: ${{ matrix.os }}
593593
strategy:

packages/pytest-simcore/src/pytest_simcore/helpers/autoscaling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def create_fake_association(
3939
):
4040
fake_node_to_instance_map = {}
4141

42-
async def _fake_node_creator(
42+
def _fake_node_creator(
4343
_nodes: list[Node], ec2_instances: list[EC2InstanceData]
4444
) -> tuple[list[AssociatedInstance], list[EC2InstanceData]]:
4545
def _create_fake_node_with_labels(instance: EC2InstanceData) -> Node:

services/autoscaling/src/simcore_service_autoscaling/core/application.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,12 @@
1717
APP_STARTED_DYNAMIC_BANNER_MSG,
1818
)
1919
from ..api.routes import setup_api_routes
20-
from ..modules.auto_scaling_task import setup as setup_auto_scaler_background_task
21-
from ..modules.buffer_machines_pool_task import setup as setup_buffer_machines_pool_task
20+
from ..modules.cluster_scaling.auto_scaling_task import (
21+
setup as setup_auto_scaler_background_task,
22+
)
23+
from ..modules.cluster_scaling.warm_buffer_machines_pool_task import (
24+
setup as setup_warm_buffer_machines_pool_task,
25+
)
2226
from ..modules.docker import setup as setup_docker
2327
from ..modules.ec2 import setup as setup_ec2
2428
from ..modules.instrumentation import setup as setup_instrumentation
@@ -78,7 +82,7 @@ def create_app(settings: ApplicationSettings) -> FastAPI:
7882
initialize_fastapi_app_tracing(app)
7983

8084
setup_auto_scaler_background_task(app)
81-
setup_buffer_machines_pool_task(app)
85+
setup_warm_buffer_machines_pool_task(app)
8286

8387
# ERROR HANDLERS
8488

services/autoscaling/src/simcore_service_autoscaling/models.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,7 @@ class AssociatedInstance(_BaseInstance):
4747

4848

4949
@dataclass(frozen=True, kw_only=True, slots=True)
50-
class NonAssociatedInstance(_BaseInstance):
51-
...
50+
class NonAssociatedInstance(_BaseInstance): ...
5251

5352

5453
@dataclass(frozen=True, kw_only=True, slots=True)
@@ -68,9 +67,9 @@ class Cluster: # pylint: disable=too-many-instance-attributes
6867
"description": "This is a EC2-backed docker node which is drained (cannot accept tasks)"
6968
}
7069
)
71-
buffer_drained_nodes: list[AssociatedInstance] = field(
70+
hot_buffer_drained_nodes: list[AssociatedInstance] = field(
7271
metadata={
73-
"description": "This is a EC2-backed docker node which is drained in the reserve if this is enabled (with no tasks)"
72+
"description": "This is a EC2-backed docker node which is drained in the reserve if this is enabled (with no tasks, a.k.a. hot buffer)"
7473
}
7574
)
7675
pending_ec2s: list[NonAssociatedInstance] = field(
@@ -83,9 +82,9 @@ class Cluster: # pylint: disable=too-many-instance-attributes
8382
"description": "This is an existing EC2 instance that never properly joined the cluster and is deemed as broken and will be terminated"
8483
}
8584
)
86-
buffer_ec2s: list[NonAssociatedInstance] = field(
85+
warm_buffer_ec2s: list[NonAssociatedInstance] = field(
8786
metadata={
88-
"description": "This is a prepared stopped EC2 instance, not yet associated to a docker node, ready to be used"
87+
"description": "This is a prepared stopped EC2 instance, not yet associated to a docker node, ready to be used (a.k.a. warm buffer)"
8988
}
9089
)
9190
disconnected_nodes: list[Node] = field(
@@ -121,7 +120,7 @@ def total_number_of_machines(self) -> int:
121120
len(self.active_nodes)
122121
+ len(self.pending_nodes)
123122
+ len(self.drained_nodes)
124-
+ len(self.buffer_drained_nodes)
123+
+ len(self.hot_buffer_drained_nodes)
125124
+ len(self.pending_ec2s)
126125
+ len(self.broken_ec2s)
127126
+ len(self.terminating_nodes)
@@ -138,10 +137,10 @@ def _get_instance_ids(
138137
f"Cluster(active-nodes: count={len(self.active_nodes)} {_get_instance_ids(self.active_nodes)}, "
139138
f"pending-nodes: count={len(self.pending_nodes)} {_get_instance_ids(self.pending_nodes)}, "
140139
f"drained-nodes: count={len(self.drained_nodes)} {_get_instance_ids(self.drained_nodes)}, "
141-
f"reserve-drained-nodes: count={len(self.buffer_drained_nodes)} {_get_instance_ids(self.buffer_drained_nodes)}, "
140+
f"hot-buffer-drained-nodes: count={len(self.hot_buffer_drained_nodes)} {_get_instance_ids(self.hot_buffer_drained_nodes)}, "
142141
f"pending-ec2s: count={len(self.pending_ec2s)} {_get_instance_ids(self.pending_ec2s)}, "
143142
f"broken-ec2s: count={len(self.broken_ec2s)} {_get_instance_ids(self.broken_ec2s)}, "
144-
f"buffer-ec2s: count={len(self.buffer_ec2s)} {_get_instance_ids(self.buffer_ec2s)}, "
143+
f"warm-buffer-ec2s: count={len(self.warm_buffer_ec2s)} {_get_instance_ids(self.warm_buffer_ec2s)}, "
145144
f"disconnected-nodes: count={len(self.disconnected_nodes)}, "
146145
f"terminating-nodes: count={len(self.terminating_nodes)} {_get_instance_ids(self.terminating_nodes)}, "
147146
f"retired-nodes: count={len(self.retired_nodes)} {_get_instance_ids(self.retired_nodes)}, "
@@ -159,7 +158,7 @@ class DaskTask:
159158

160159

161160
@dataclass(kw_only=True, slots=True)
162-
class BufferPool:
161+
class WarmBufferPool:
163162
ready_instances: set[EC2InstanceData] = field(default_factory=set)
164163
pending_instances: set[EC2InstanceData] = field(default_factory=set)
165164
waiting_to_pull_instances: set[EC2InstanceData] = field(default_factory=set)
@@ -170,7 +169,7 @@ class BufferPool:
170169

171170
def __repr__(self) -> str:
172171
return (
173-
f"BufferPool(ready-count={len(self.ready_instances)}, "
172+
f"WarmBufferPool(ready-count={len(self.ready_instances)}, "
174173
f"pending-count={len(self.pending_instances)}, "
175174
f"waiting-to-pull-count={len(self.waiting_to_pull_instances)}, "
176175
f"waiting-to-stop-count={len(self.waiting_to_stop_instances)}, "
@@ -213,20 +212,20 @@ def remove_instance(self, instance: EC2InstanceData) -> None:
213212

214213

215214
@dataclass
216-
class BufferPoolManager:
217-
buffer_pools: dict[InstanceTypeType, BufferPool] = field(
218-
default_factory=lambda: defaultdict(BufferPool)
215+
class WarmBufferPoolManager:
216+
buffer_pools: dict[InstanceTypeType, WarmBufferPool] = field(
217+
default_factory=lambda: defaultdict(WarmBufferPool)
219218
)
220219

221220
def __repr__(self) -> str:
222-
return f"BufferPoolManager({dict(self.buffer_pools)})"
221+
return f"WarmBufferPoolManager({dict(self.buffer_pools)})"
223222

224-
def flatten_buffer_pool(self) -> BufferPool:
223+
def flatten_buffer_pool(self) -> WarmBufferPool:
225224
"""returns a flattened buffer pool with all the EC2InstanceData"""
226-
flat_pool = BufferPool()
225+
flat_pool = WarmBufferPool()
227226

228227
for buffer_pool in self.buffer_pools.values():
229-
for f in fields(BufferPool):
228+
for f in fields(WarmBufferPool):
230229
getattr(flat_pool, f.name).update(getattr(buffer_pool, f.name))
231230

232231
return flat_pool

services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_base.py

Lines changed: 0 additions & 80 deletions
This file was deleted.

services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)