Skip to content

Commit 81becec

Browse files
committed
refactor: clean up unused functions and improve node state checks
1 parent e0cc0ae commit 81becec

File tree

3 files changed

+47
-37
lines changed

3 files changed

+47
-37
lines changed

services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
)
1818
from aws_library.ec2._errors import EC2TooManyInstancesError
1919
from fastapi import FastAPI
20-
from models_library.generated_models.docker_rest_api import Node, NodeState
20+
from models_library.generated_models.docker_rest_api import Node
2121
from models_library.rabbitmq_messages import ProgressType
2222
from servicelib.logging_utils import log_catch, log_context
2323
from servicelib.utils import limited_gather
@@ -66,11 +66,6 @@
6666
_logger = logging.getLogger(__name__)
6767

6868

69-
def _node_not_ready(node: Node) -> bool:
70-
assert node.status # nosec
71-
return bool(node.status.state != NodeState.ready)
72-
73-
7469
async def _analyze_current_cluster(
7570
app: FastAPI,
7671
auto_scaling_mode: BaseAutoscaling,
@@ -141,7 +136,7 @@ async def _analyze_current_cluster(
141136
- node_used_resources,
142137
)
143138
)
144-
elif auto_scaling_mode.is_instance_drained(instance):
139+
elif utils_docker.is_instance_drained(instance):
145140
all_drained_nodes.append(instance)
146141
elif await auto_scaling_mode.is_instance_retired(app, instance):
147142
# it should be drained, but it is not, so we force it to be drained such that it might be re-used if needed
@@ -166,7 +161,9 @@ async def _analyze_current_cluster(
166161
terminated_instances=[
167162
NonAssociatedInstance(ec2_instance=i) for i in terminated_ec2_instances
168163
],
169-
disconnected_nodes=[n for n in docker_nodes if _node_not_ready(n)],
164+
disconnected_nodes=[
165+
n for n in docker_nodes if not utils_docker.is_node_ready(n)
166+
],
170167
retired_nodes=retired_nodes,
171168
)
172169
_logger.info("current state: %s", f"{cluster!r}")
@@ -343,10 +340,10 @@ async def _sorted_allowed_instance_types(app: FastAPI) -> list[EC2InstanceType]:
343340
allowed_instance_type_names
344341
), "EC2_INSTANCES_ALLOWED_TYPES cannot be empty!"
345342

346-
allowed_instance_types: list[
347-
EC2InstanceType
348-
] = await ec2_client.get_ec2_instance_capabilities(
349-
cast(set[InstanceTypeType], set(allowed_instance_type_names))
343+
allowed_instance_types: list[EC2InstanceType] = (
344+
await ec2_client.get_ec2_instance_capabilities(
345+
cast(set[InstanceTypeType], set(allowed_instance_type_names))
346+
)
350347
)
351348

352349
def _as_selection(instance_type: EC2InstanceType) -> int:
@@ -1078,9 +1075,9 @@ async def _notify_based_on_machine_type(
10781075
launch_time_to_tasks: dict[datetime.datetime, list] = collections.defaultdict(list)
10791076
now = datetime.datetime.now(datetime.UTC)
10801077
for instance in instances:
1081-
launch_time_to_tasks[instance.ec2_instance.launch_time] += (
1082-
instance.assigned_tasks
1083-
)
1078+
launch_time_to_tasks[
1079+
instance.ec2_instance.launch_time
1080+
] += instance.assigned_tasks
10841081

10851082
for launch_time, tasks in launch_time_to_tasks.items():
10861083
time_since_launch = now - launch_time

services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_mode_base.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from types_aiobotocore_ec2.literals import InstanceTypeType
99

1010
from ..models import AssociatedInstance
11-
from ..utils import utils_docker
1211

1312

1413
@dataclass
@@ -71,10 +70,6 @@ async def is_instance_retired(
7170
app: FastAPI, instance: AssociatedInstance
7271
) -> bool: ...
7372

74-
@staticmethod
75-
def is_instance_drained(instance: AssociatedInstance) -> bool:
76-
return not utils_docker.is_node_osparc_ready(instance.node)
77-
7873
@staticmethod
7974
@abstractmethod
8075
async def try_retire_nodes(app: FastAPI) -> None: ...

services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from types_aiobotocore_ec2.literals import InstanceTypeType
3636

3737
from ..core.settings import ApplicationSettings
38+
from ..models import AssociatedInstance
3839
from ..modules.docker import AutoscalingDocker
3940

4041
logger = logging.getLogger(__name__)
@@ -278,24 +279,32 @@ def get_max_resources_from_docker_task(task: Task) -> Resources:
278279
return Resources(
279280
cpus=max(
280281
(
281-
task.spec.resources.reservations
282-
and task.spec.resources.reservations.nano_cp_us
282+
(
283+
task.spec.resources.reservations
284+
and task.spec.resources.reservations.nano_cp_us
285+
)
283286
or 0
284287
),
285288
(
286-
task.spec.resources.limits
287-
and task.spec.resources.limits.nano_cp_us
289+
(
290+
task.spec.resources.limits
291+
and task.spec.resources.limits.nano_cp_us
292+
)
288293
or 0
289294
),
290295
)
291296
/ _NANO_CPU,
292297
ram=TypeAdapter(ByteSize).validate_python(
293298
max(
294-
task.spec.resources.reservations
295-
and task.spec.resources.reservations.memory_bytes
299+
(
300+
task.spec.resources.reservations
301+
and task.spec.resources.reservations.memory_bytes
302+
)
296303
or 0,
297-
task.spec.resources.limits
298-
and task.spec.resources.limits.memory_bytes
304+
(
305+
task.spec.resources.limits
306+
and task.spec.resources.limits.memory_bytes
307+
)
299308
or 0,
300309
)
301310
),
@@ -382,7 +391,7 @@ async def compute_cluster_used_resources(
382391
list_of_used_resources = await logged_gather(
383392
*(compute_node_used_resources(docker_client, node) for node in nodes)
384393
)
385-
counter = collections.Counter({k: 0 for k in list(Resources.model_fields)})
394+
counter = collections.Counter(dict.fromkeys(list(Resources.model_fields), 0))
386395
for result in list_of_used_resources:
387396
counter.update(result.model_dump())
388397

@@ -570,14 +579,14 @@ def get_new_node_docker_tags(
570579
) -> dict[DockerLabelKey, str]:
571580
assert app_settings.AUTOSCALING_NODES_MONITORING # nosec
572581
return (
573-
{
574-
tag_key: "true"
575-
for tag_key in app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_NODE_LABELS
576-
}
577-
| {
578-
tag_key: "true"
579-
for tag_key in app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_NEW_NODES_LABELS
580-
}
582+
dict.fromkeys(
583+
app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_NODE_LABELS,
584+
"true",
585+
)
586+
| dict.fromkeys(
587+
app_settings.AUTOSCALING_NODES_MONITORING.NODES_MONITORING_NEW_NODES_LABELS,
588+
"true",
589+
)
581590
| {DOCKER_TASK_EC2_INSTANCE_TYPE_PLACEMENT_CONSTRAINT_KEY: ec2_instance.type}
582591
)
583592

@@ -601,6 +610,10 @@ def is_node_osparc_ready(node: Node) -> bool:
601610
)
602611

603612

613+
def is_instance_drained(instance: AssociatedInstance) -> bool:
614+
return not is_node_osparc_ready(instance.node)
615+
616+
604617
async def set_node_osparc_ready(
605618
app_settings: ApplicationSettings,
606619
docker_client: AutoscalingDocker,
@@ -702,3 +715,8 @@ async def attach_node(
702715
tags=new_tags,
703716
available=app_settings.AUTOSCALING_DRAIN_NODES_WITH_LABELS, # NOTE: full drain sometimes impede on performance
704717
)
718+
719+
720+
def is_node_ready(node: Node) -> bool:
721+
assert node.status # nosec
722+
return bool(node.status.state is NodeState.ready)

0 commit comments

Comments
 (0)