From c8ab88997562001ac05607dd0659944d3d89c068 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 11:37:19 +0200
Subject: [PATCH 01/93] disable hack

---
 .../modules/cluster_scaling/_provider_computational.py    | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
index c9b2d498fd66..92be7fe1f4be 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
@@ -88,13 +88,7 @@ async def list_unrunnable_tasks(self, app: FastAPI) -> list[DaskTask]:
 
     def get_task_required_resources(self, task) -> Resources:
         assert self  # nosec
-        task_required_resources = utils.resources_from_dask_task(task)
-        # ensure cpu is set at least to 1 as dask-workers use 1 thread per CPU
-        if task_required_resources.cpus < 1.0:
-            task_required_resources = task_required_resources.model_copy(
-                update={"cpus": 1.0}
-            )
-        return task_required_resources
+        return utils.resources_from_dask_task(task)
 
     async def get_task_defined_instance(
         self, app: FastAPI, task

From d0fc1c638e4ac73c9cd93cf6b60ee4a57cc5f51f Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 11:37:59 +0200
Subject: [PATCH 02/93] refactor

---
 .../modules/cluster_scaling/_auto_scaling_core.py  | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py
index ff4b0ad4f5b6..5b74cb412fad 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py
@@ -728,15 +728,15 @@ async def _find_needed_instances(
             task_required_resources = auto_scaling_mode.get_task_required_resources(
                 task
             )
-            task_required_ec2_instance = (
-                await auto_scaling_mode.get_task_defined_instance(app, task)
+            task_required_ec2 = await auto_scaling_mode.get_task_defined_instance(
+                app, task
             )
 
             # first check if we can assign the task to one of the newly tobe created instances
             if _try_assign_task_to_ec2_instance_type(
                 task,
                 instances=needed_new_instance_types_for_tasks,
-                task_required_ec2_instance=task_required_ec2_instance,
+                task_required_ec2_instance=task_required_ec2,
                 task_required_resources=task_required_resources,
             ):
                 continue
@@ -744,12 +744,12 @@ async def _find_needed_instances(
             # so we need to find what we can create now
             try:
                 # check if exact instance type is needed first
-                if task_required_ec2_instance:
+                if task_required_ec2:
                     defined_ec2 = find_selected_instance_type_for_task(
-                        task_required_ec2_instance,
+                        task_required_ec2,
                         available_ec2_types,
                         task,
-                        auto_scaling_mode.get_task_required_resources(task),
+                        task_required_resources,
                     )
                     needed_new_instance_types_for_tasks.append(
                         AssignedTasksToInstanceType(
@@ -763,7 +763,7 @@ async def _find_needed_instances(
                     # we go for best fitting type
                     best_ec2_instance = utils_ec2.find_best_fitting_ec2_instance(
                         available_ec2_types,
-                        auto_scaling_mode.get_task_required_resources(task),
+                        task_required_resources,
                         score_type=utils_ec2.closest_instance_policy,
                     )
                     needed_new_instance_types_for_tasks.append(

From 29465a59f25421981a416830bc1076426af8f0cb Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 13:11:40 +0200
Subject: [PATCH 03/93] added generic resource to ec2 resource model

---
 .../src/aws_library/ec2/_models.py            | 102 +++++++++++++++---
 1 file changed, 88 insertions(+), 14 deletions(-)

diff --git a/packages/aws-library/src/aws_library/ec2/_models.py b/packages/aws-library/src/aws_library/ec2/_models.py
index e08e207b0b0e..d5c1ca62442d 100644
--- a/packages/aws-library/src/aws_library/ec2/_models.py
+++ b/packages/aws-library/src/aws_library/ec2/_models.py
@@ -14,46 +14,120 @@
     Field,
     NonNegativeFloat,
     NonNegativeInt,
+    StrictFloat,
+    StrictInt,
     StringConstraints,
     field_validator,
 )
 from pydantic.config import JsonDict
 from types_aiobotocore_ec2.literals import InstanceStateNameType, InstanceTypeType
 
+GenericResourceValue: TypeAlias = StrictInt | StrictFloat | str
+
 
 class Resources(BaseModel, frozen=True):
     cpus: NonNegativeFloat
     ram: ByteSize
+    generic_resources: Annotated[
+        dict[str, GenericResourceValue],
+        Field(
+            default_factory=dict,
+            description=(
+                "Arbitrary additional resources (e.g. {'threads': 8}). "
+                "Numeric values are treated as quantities and participate in add/sub/compare."
+            ),
+        ),
+    ] = DEFAULT_FACTORY
 
     @classmethod
     def create_as_empty(cls) -> "Resources":
         return cls(cpus=0, ram=ByteSize(0))
 
     def __ge__(self, other: "Resources") -> bool:
-        return self.cpus >= other.cpus and self.ram >= other.ram
+        if not (self.cpus >= other.cpus and self.ram >= other.ram):
+            return False
+        # ensure all numeric generic resources in `other` are satisfied by `self`
+        for k, v in other.generic_resources.items():
+            if isinstance(v, int | float):
+                lhs_val = self.generic_resources.get(k, 0)
+                if not isinstance(lhs_val, int | float) or lhs_val < v:
+                    return False
+                continue
+            # non-numeric must be equal and present
+            if k not in self.generic_resources or self.generic_resources[k] != v:
+                return False
+        return True
 
     def __gt__(self, other: "Resources") -> bool:
-        return self.cpus > other.cpus or self.ram > other.ram
+        if self.cpus > other.cpus or self.ram > other.ram:
+            return True
+        for k, v in other.generic_resources.items():
+            lhs_val = self.generic_resources.get(k)
+            if (
+                isinstance(v, int | float)
+                and isinstance(lhs_val, int | float)
+                and lhs_val > v
+            ):
+                return True
+            if not isinstance(v, int | float) and lhs_val is not None and lhs_val != v:
+                return True
+        return False
 
     def __add__(self, other: "Resources") -> "Resources":
+        """operator for adding two Resources
+        Note that only numeric generic resources are added
+        Non-numeric generic resources are ignored
+        """
+        merged: dict[str, GenericResourceValue] = {}
+        keys = set(self.generic_resources) | set(other.generic_resources)
+        for k in keys:
+            a = self.generic_resources.get(k)
+            b = other.generic_resources.get(k)
+            # adding non numeric values does not make sense, so we skip those for the resulting resource
+            if isinstance(a, int | float) and isinstance(b, int | float):
+                merged[k] = a + b
+            elif a is None and isinstance(b, int | float):
+                merged[k] = b
+            elif b is None and isinstance(a, int | float):
+                merged[k] = a
+
         return Resources.model_construct(
-            **{
-                key: a + b
-                for (key, a), b in zip(
-                    self.model_dump().items(), other.model_dump().values(), strict=True
-                )
-            }
+            cpus=self.cpus + other.cpus,
+            ram=self.ram + other.ram,
+            generic_resources=merged,
         )
 
     def __sub__(self, other: "Resources") -> "Resources":
+        """operator for subtracting two Resources
+        Note that only numeric generic resources are subtracted
+        Non-numeric generic resources are ignored
+        """
+        merged: dict[str, GenericResourceValue] = {}
+        keys = set(self.generic_resources) | set(other.generic_resources)
+        for k in keys:
+            a = self.generic_resources.get(k)
+            b = other.generic_resources.get(k)
+            # subtracting non numeric values does not make sense, so we skip those for the resulting resource
+            if isinstance(a, int | float) and isinstance(b, int | float):
+                merged[k] = a - b
+            elif a is None and isinstance(b, int | float):
+                merged[k] = -b
+            elif b is None and isinstance(a, int | float):
+                merged[k] = a
+
         return Resources.model_construct(
-            **{
-                key: a - b
-                for (key, a), b in zip(
-                    self.model_dump().items(), other.model_dump().values(), strict=True
-                )
-            }
+            cpus=self.cpus - other.cpus,
+            ram=self.ram - other.ram,
+            generic_resources=merged,
+        )
+
+    def __hash__(self) -> int:
+        """Deterministic hash including cpus, ram (in bytes) and generic_resources."""
+        # sort generic_resources items to ensure order-independent hashing
+        generic_items: tuple[tuple[str, GenericResourceValue], ...] = tuple(
+            sorted(self.generic_resources.items())
         )
+        return hash((self.cpus, self.ram, generic_items))
 
     @field_validator("cpus", mode="before")
     @classmethod

From 3915a82af6d820f892d3e11bc1085e6b88dc6a7d Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 13:56:21 +0200
Subject: [PATCH 04/93] do not define gt it is weird for resources

---
 .../src/aws_library/ec2/_models.py            | 42 +++++++--------
 packages/aws-library/tests/test_ec2_models.py | 53 +++++++++++--------
 .../src/simcore_service_autoscaling/models.py |  6 +--
 .../_provider_computational.py                |  3 ++
 4 files changed, 57 insertions(+), 47 deletions(-)

diff --git a/packages/aws-library/src/aws_library/ec2/_models.py b/packages/aws-library/src/aws_library/ec2/_models.py
index d5c1ca62442d..5cdd5ba305b4 100644
--- a/packages/aws-library/src/aws_library/ec2/_models.py
+++ b/packages/aws-library/src/aws_library/ec2/_models.py
@@ -44,35 +44,31 @@ def create_as_empty(cls) -> "Resources":
         return cls(cpus=0, ram=ByteSize(0))
 
     def __ge__(self, other: "Resources") -> bool:
+        """operator for >= comparison
+        if self has greater or equal resources than other, returns True
+        Note that generic_resources are compared only if they are numeric
+        Non-numeric generic resources must be equal in both or only defined in self
+        to be considered greater or equal
+        """
+
         if not (self.cpus >= other.cpus and self.ram >= other.ram):
             return False
-        # ensure all numeric generic resources in `other` are satisfied by `self`
-        for k, v in other.generic_resources.items():
-            if isinstance(v, int | float):
-                lhs_val = self.generic_resources.get(k, 0)
-                if not isinstance(lhs_val, int | float) or lhs_val < v:
+
+        keys = set(self.generic_resources) | set(other.generic_resources)
+        for k in keys:
+            a = self.generic_resources.get(k)
+            b = other.generic_resources.get(
+                k, a
+            )  # NOTE: get from other, default to a so that non-existing keys are considered equal
+            if isinstance(a, int | float) and isinstance(b, int | float):
+                if not (a >= b):
                     return False
-                continue
-            # non-numeric must be equal and present
-            if k not in self.generic_resources or self.generic_resources[k] != v:
+            elif a != b:
+                assert isinstance(a, str | None)  # nosec
+                assert isinstance(b, int | float | str | None)  # nosec
                 return False
         return True
 
-    def __gt__(self, other: "Resources") -> bool:
-        if self.cpus > other.cpus or self.ram > other.ram:
-            return True
-        for k, v in other.generic_resources.items():
-            lhs_val = self.generic_resources.get(k)
-            if (
-                isinstance(v, int | float)
-                and isinstance(lhs_val, int | float)
-                and lhs_val > v
-            ):
-                return True
-            if not isinstance(v, int | float) and lhs_val is not None and lhs_val != v:
-                return True
-        return False
-
     def __add__(self, other: "Resources") -> "Resources":
         """operator for adding two Resources
         Note that only numeric generic resources are added
diff --git a/packages/aws-library/tests/test_ec2_models.py b/packages/aws-library/tests/test_ec2_models.py
index ed232ad0043d..a2953e1d6b7e 100644
--- a/packages/aws-library/tests/test_ec2_models.py
+++ b/packages/aws-library/tests/test_ec2_models.py
@@ -37,46 +37,57 @@
             Resources(cpus=0.1, ram=ByteSize(1)),
             False,
         ),
-    ],
-)
-def test_resources_ge_operator(
-    a: Resources, b: Resources, a_greater_or_equal_than_b: bool
-):
-    assert (a >= b) is a_greater_or_equal_than_b
-
-
-@pytest.mark.parametrize(
-    "a,b,a_greater_than_b",
-    [
         (
-            Resources(cpus=0.2, ram=ByteSize(0)),
-            Resources(cpus=0.1, ram=ByteSize(0)),
+            Resources(cpus=0.1, ram=ByteSize(0), generic_resources={"GPU": 1}),
+            Resources(cpus=0.1, ram=ByteSize(1)),
+            False,  # ram is not enough
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1}),
+            Resources(cpus=0.1, ram=ByteSize(1)),
             True,
         ),
         (
-            Resources(cpus=0.1, ram=ByteSize(0)),
-            Resources(cpus=0.1, ram=ByteSize(0)),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1}),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1}),
+            True,
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1}),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 2}),
             False,
         ),
         (
             Resources(cpus=0.1, ram=ByteSize(1)),
-            Resources(cpus=0.1, ram=ByteSize(0)),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 2}),
+            False,
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": "2"}),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 2}),
+            False,
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "yes"}),
+            Resources(cpus=0.1, ram=ByteSize(1)),
             True,
         ),
         (
-            Resources(cpus=0.05, ram=ByteSize(1)),
-            Resources(cpus=0.1, ram=ByteSize(0)),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "yes"}),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "yes"}),
             True,
         ),
         (
-            Resources(cpus=0.1, ram=ByteSize(0)),
             Resources(cpus=0.1, ram=ByteSize(1)),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "yes"}),
             False,
         ),
     ],
 )
-def test_resources_gt_operator(a: Resources, b: Resources, a_greater_than_b: bool):
-    assert (a > b) is a_greater_than_b
+def test_resources_ge_operator(
+    a: Resources, b: Resources, a_greater_or_equal_than_b: bool
+):
+    assert (a >= b) is a_greater_or_equal_than_b
 
 
 @pytest.mark.parametrize(
diff --git a/services/autoscaling/src/simcore_service_autoscaling/models.py b/services/autoscaling/src/simcore_service_autoscaling/models.py
index 7645b300e8de..ca697a13fa06 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/models.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/models.py
@@ -23,6 +23,9 @@ def assign_task(self, task, task_resources: Resources) -> None:
     def has_resources_for_task(self, task_resources: Resources) -> bool:
         return bool(self.available_resources >= task_resources)
 
+    def has_assigned_tasks(self) -> bool:
+        return len(self.assigned_tasks) > 0
+
 
 @dataclass(frozen=True, kw_only=True, slots=True)
 class AssignedTasksToInstanceType(_TaskAssignmentMixin):
@@ -37,9 +40,6 @@ def __post_init__(self) -> None:
         if self.available_resources == Resources.create_as_empty():
             object.__setattr__(self, "available_resources", self.ec2_instance.resources)
 
-    def has_assigned_tasks(self) -> bool:
-        return bool(self.available_resources < self.ec2_instance.resources)
-
 
 @dataclass(frozen=True, kw_only=True, slots=True)
 class AssociatedInstance(_BaseInstance):
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
index 92be7fe1f4be..a47323e42587 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
@@ -88,6 +88,9 @@ async def list_unrunnable_tasks(self, app: FastAPI) -> list[DaskTask]:
 
     def get_task_required_resources(self, task) -> Resources:
         assert self  # nosec
+        # NOTE: a dask worker can take a task if it has a free thread, regardless of its resources
+        #       so we need to be careful when interpreting the resources, adding the thread here will mimick this
+
         return utils.resources_from_dask_task(task)
 
     async def get_task_defined_instance(

From 2209e2c914ee4e657e4289e9f55865a05f6a2eb2 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 14:06:10 +0200
Subject: [PATCH 05/93] improve coverage

---
 packages/aws-library/tests/test_ec2_models.py | 69 ++++++++++++++++++-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/packages/aws-library/tests/test_ec2_models.py b/packages/aws-library/tests/test_ec2_models.py
index a2953e1d6b7e..adaf81158bb8 100644
--- a/packages/aws-library/tests/test_ec2_models.py
+++ b/packages/aws-library/tests/test_ec2_models.py
@@ -103,6 +103,36 @@ def test_resources_ge_operator(
             Resources(cpus=1, ram=ByteSize(34)),
             Resources(cpus=1.1, ram=ByteSize(35)),
         ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1}),
+            Resources(cpus=1, ram=ByteSize(34)),
+            Resources(cpus=1.1, ram=ByteSize(35), generic_resources={"GPU": 1}),
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1)),
+            Resources(cpus=1, ram=ByteSize(34), generic_resources={"GPU": 1}),
+            Resources(cpus=1.1, ram=ByteSize(35), generic_resources={"GPU": 1}),
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1}),
+            Resources(cpus=1, ram=ByteSize(34), generic_resources={"GPU": 1}),
+            Resources(cpus=1.1, ram=ByteSize(35), generic_resources={"GPU": 2}),
+        ),
+        (
+            Resources(
+                cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1, "SSE": "yes"}
+            ),
+            Resources(cpus=1, ram=ByteSize(34), generic_resources={"GPU": 1}),
+            Resources(cpus=1.1, ram=ByteSize(35), generic_resources={"GPU": 2}),
+        ),  # string resources are not summed
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": "1"}),
+            Resources(cpus=1, ram=ByteSize(34), generic_resources={"GPU": 1}),
+            Resources(
+                cpus=1.1,
+                ram=ByteSize(35),
+            ),
+        ),  # string resources are ignored in summation
     ],
 )
 def test_resources_add(a: Resources, b: Resources, result: Resources):
@@ -112,7 +142,9 @@ def test_resources_add(a: Resources, b: Resources, result: Resources):
 
 
 def test_resources_create_as_empty():
-    assert Resources.create_as_empty() == Resources(cpus=0, ram=ByteSize(0))
+    assert Resources.create_as_empty() == Resources(
+        cpus=0, ram=ByteSize(0), generic_resources={}
+    )
 
 
 @pytest.mark.parametrize(
@@ -128,6 +160,41 @@ def test_resources_create_as_empty():
             Resources(cpus=1, ram=ByteSize(1)),
             Resources.model_construct(cpus=-0.9, ram=ByteSize(33)),
         ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1}),
+            Resources(cpus=1, ram=ByteSize(34)),
+            Resources.model_construct(
+                cpus=-0.9, ram=ByteSize(-33), generic_resources={"GPU": 1}
+            ),
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1)),
+            Resources(cpus=1, ram=ByteSize(34), generic_resources={"GPU": 1}),
+            Resources.model_construct(
+                cpus=-0.9, ram=ByteSize(-33), generic_resources={"GPU": -1}
+            ),
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1}),
+            Resources(cpus=1, ram=ByteSize(34), generic_resources={"GPU": 1}),
+            Resources.model_construct(
+                cpus=-0.9, ram=ByteSize(-33), generic_resources={"GPU": 0}
+            ),
+        ),
+        (
+            Resources(
+                cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1, "SSE": "yes"}
+            ),
+            Resources(cpus=1, ram=ByteSize(34), generic_resources={"GPU": 1}),
+            Resources.model_construct(
+                cpus=-0.9, ram=ByteSize(-33), generic_resources={"GPU": 0}
+            ),
+        ),  # string resources are not summed
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": "1"}),
+            Resources(cpus=1, ram=ByteSize(34), generic_resources={"GPU": 1}),
+            Resources.model_construct(cpus=-0.9, ram=ByteSize(-33)),
+        ),  # string resources are ignored in summation
     ],
 )
 def test_resources_sub(a: Resources, b: Resources, result: Resources):

From 4db4871da2b41afa9bc06169e2be8a33748062f6 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 14:07:28 +0200
Subject: [PATCH 06/93] ruff

---
 packages/aws-library/tests/test_ec2_models.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/packages/aws-library/tests/test_ec2_models.py b/packages/aws-library/tests/test_ec2_models.py
index adaf81158bb8..53c9ed031ac7 100644
--- a/packages/aws-library/tests/test_ec2_models.py
+++ b/packages/aws-library/tests/test_ec2_models.py
@@ -226,7 +226,11 @@ def test_ec2_instance_data_hashable(faker: Faker):
                 cpus=faker.pyfloat(min_value=0.1),
                 ram=ByteSize(faker.pyint(min_value=123)),
             ),
-            {AWSTagKey("mytagkey"): AWSTagValue("mytagvalue")},
+            {
+                TypeAdapter(AWSTagKey)
+                .validate_python("mytagkey"): TypeAdapter(AWSTagValue)
+                .validate_python("mytagvalue")
+            },
         )
     }
     second_set_of_ec2s = {
@@ -241,7 +245,11 @@ def test_ec2_instance_data_hashable(faker: Faker):
                 cpus=faker.pyfloat(min_value=0.1),
                 ram=ByteSize(faker.pyint(min_value=123)),
             ),
-            {AWSTagKey("mytagkey"): AWSTagValue("mytagvalue")},
+            {
+                TypeAdapter(AWSTagKey)
+                .validate_python("mytagkey"): TypeAdapter(AWSTagValue)
+                .validate_python("mytagvalue")
+            },
         )
     }
 

From e5012b671c299c12a15188b78626955edbd0bf23 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 14:12:34 +0200
Subject: [PATCH 07/93] add missing test

---
 packages/aws-library/tests/test_ec2_models.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/packages/aws-library/tests/test_ec2_models.py b/packages/aws-library/tests/test_ec2_models.py
index 53c9ed031ac7..1dc3521ab6e9 100644
--- a/packages/aws-library/tests/test_ec2_models.py
+++ b/packages/aws-library/tests/test_ec2_models.py
@@ -4,7 +4,13 @@
 
 
 import pytest
-from aws_library.ec2._models import AWSTagKey, AWSTagValue, EC2InstanceData, Resources
+from aws_library.ec2._models import (
+    AWSTagKey,
+    AWSTagValue,
+    EC2InstanceBootSpecific,
+    EC2InstanceData,
+    Resources,
+)
 from faker import Faker
 from pydantic import ByteSize, TypeAdapter, ValidationError
 
@@ -256,3 +262,11 @@ def test_ec2_instance_data_hashable(faker: Faker):
     union_of_sets = first_set_of_ec2s.union(second_set_of_ec2s)
     assert next(iter(first_set_of_ec2s)) in union_of_sets
     assert next(iter(second_set_of_ec2s)) in union_of_sets
+
+
+def test_ec2_instance_boot_specific_with_invalid_custome_script(faker: Faker):
+    valid_model = EC2InstanceBootSpecific.model_json_schema()["examples"][0]
+    invalid_model = {**valid_model, "custom_boot_scripts": ["echo 'missing end quote"]}
+
+    with pytest.raises(ValueError, match="Invalid bash call"):
+        EC2InstanceBootSpecific(**invalid_model)

From cf3915862506c21c0f49189f531f9139edc48ee0 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 14:14:04 +0200
Subject: [PATCH 08/93] ruff

---
 packages/aws-library/src/aws_library/ec2/_models.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/packages/aws-library/src/aws_library/ec2/_models.py b/packages/aws-library/src/aws_library/ec2/_models.py
index 5cdd5ba305b4..9aba3ccf838b 100644
--- a/packages/aws-library/src/aws_library/ec2/_models.py
+++ b/packages/aws-library/src/aws_library/ec2/_models.py
@@ -244,8 +244,9 @@ def validate_bash_calls(cls, v):
                 temp_file.flush()
                 # NOTE: this will not capture runtime errors, but at least some syntax errors such as invalid quotes
                 sh.bash(
-                    "-n", temp_file.name
-                )  # pyright: ignore[reportCallIssue]  # sh is untyped, but this call is safe for bash syntax checking
+                    "-n",
+                    temp_file.name,  # pyright: ignore[reportCallIssue]
+                )  # sh is untyped, but this call is safe for bash syntax checking
         except sh.ErrorReturnCode as exc:
             msg = f"Invalid bash call in custom_boot_scripts: {v}, Error: {exc.stderr}"
             raise ValueError(msg) from exc

From cfc3ec70b2aa4cdf5a22ee773b9ae77da0766084 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 14:16:26 +0200
Subject: [PATCH 09/93] use ge operator

---
 .../src/simcore_service_autoscaling/utils/cluster_scaling.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py b/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py
index 13c25dcd2112..5a64de471cd8 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py
@@ -109,7 +109,7 @@ def find_selected_instance_type_for_task(
     selected_instance = filtered_instances[0]
 
     # check that the assigned resources and the machine resource fit
-    if task_required_resources > selected_instance.resources:
+    if task_required_resources <= selected_instance.resources:
         raise TaskRequirementsAboveRequiredEC2InstanceTypeError(
             task=task,
             instance_type=selected_instance,

From 9e3e916e4b992743d9d54bf3edf08ef0f4df9b2d Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 16:26:11 +0200
Subject: [PATCH 10/93] added model dump flat

---
 packages/aws-library/src/aws_library/ec2/_models.py | 6 ++++++
 packages/aws-library/tests/test_ec2_models.py       | 8 ++++++++
 2 files changed, 14 insertions(+)

diff --git a/packages/aws-library/src/aws_library/ec2/_models.py b/packages/aws-library/src/aws_library/ec2/_models.py
index 9aba3ccf838b..e2ad26fbba2e 100644
--- a/packages/aws-library/src/aws_library/ec2/_models.py
+++ b/packages/aws-library/src/aws_library/ec2/_models.py
@@ -125,6 +125,12 @@ def __hash__(self) -> int:
         )
         return hash((self.cpus, self.ram, generic_items))
 
+    def model_dump_flat(self) -> dict[str, float | int]:
+        """Like model_dump, but flattens ram to bytes and generic_resources to top level keys"""
+        base = self.model_dump()
+        base.update(base.pop("generic_resources"))
+        return base
+
     @field_validator("cpus", mode="before")
     @classmethod
     def _floor_cpus_to_0(cls, v: float) -> float:
diff --git a/packages/aws-library/tests/test_ec2_models.py b/packages/aws-library/tests/test_ec2_models.py
index 1dc3521ab6e9..1dca9b5a3a0b 100644
--- a/packages/aws-library/tests/test_ec2_models.py
+++ b/packages/aws-library/tests/test_ec2_models.py
@@ -209,6 +209,14 @@ def test_resources_sub(a: Resources, b: Resources, result: Resources):
     assert a == result
 
 
+def test_resources_model_dump_flat():
+    r = Resources(
+        cpus=0.1, ram=ByteSize(1024), generic_resources={"GPU": 2, "SSE": "yes"}
+    )
+    flat = r.model_dump_flat()
+    assert flat == {"cpus": 0.1, "ram": 1024, "GPU": 2, "SSE": "yes"}
+
+
 @pytest.mark.parametrize("ec2_tag_key", ["", "/", " ", ".", "..", "_index"])
 def test_aws_tag_key_invalid(ec2_tag_key: str):
     # for a key it raises

From 4dd4191f14a11e5fc0bd2fc1702330373093932e Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 16:32:16 +0200
Subject: [PATCH 11/93] both direction

---
 .../aws-library/src/aws_library/ec2/_models.py     | 14 ++++++++++++--
 packages/aws-library/tests/test_ec2_models.py      |  7 +++++--
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/packages/aws-library/src/aws_library/ec2/_models.py b/packages/aws-library/src/aws_library/ec2/_models.py
index e2ad26fbba2e..3a4aaf42ec19 100644
--- a/packages/aws-library/src/aws_library/ec2/_models.py
+++ b/packages/aws-library/src/aws_library/ec2/_models.py
@@ -125,12 +125,22 @@ def __hash__(self) -> int:
         )
         return hash((self.cpus, self.ram, generic_items))
 
-    def model_dump_flat(self) -> dict[str, float | int]:
-        """Like model_dump, but flattens ram to bytes and generic_resources to top level keys"""
+    def as_flat_dict(self) -> dict[str, int | float | str]:
+        """Like model_dump, but flattens generic_resources to top level keys"""
         base = self.model_dump()
         base.update(base.pop("generic_resources"))
         return base
 
+    @classmethod
+    def from_flat_dict(cls, data: dict[str, int | float | str]) -> "Resources":
+        """Inverse of as_flat_dict"""
+        generic_resources = {k: v for k, v in data.items() if k not in {"cpus", "ram"}}
+        return cls(
+            cpus=data.get("cpus", 0),
+            ram=ByteSize(data.get("ram", 0)),
+            generic_resources=generic_resources,
+        )
+
     @field_validator("cpus", mode="before")
     @classmethod
     def _floor_cpus_to_0(cls, v: float) -> float:
diff --git a/packages/aws-library/tests/test_ec2_models.py b/packages/aws-library/tests/test_ec2_models.py
index 1dca9b5a3a0b..d1ef498f0c28 100644
--- a/packages/aws-library/tests/test_ec2_models.py
+++ b/packages/aws-library/tests/test_ec2_models.py
@@ -209,13 +209,16 @@ def test_resources_sub(a: Resources, b: Resources, result: Resources):
     assert a == result
 
 
-def test_resources_model_dump_flat():
+def test_resources_flat_dict():
     r = Resources(
         cpus=0.1, ram=ByteSize(1024), generic_resources={"GPU": 2, "SSE": "yes"}
     )
-    flat = r.model_dump_flat()
+    flat = r.as_flat_dict()
     assert flat == {"cpus": 0.1, "ram": 1024, "GPU": 2, "SSE": "yes"}
 
+    reconstructed = Resources.from_flat_dict(flat)
+    assert reconstructed == r
+
 
 @pytest.mark.parametrize("ec2_tag_key", ["", "/", " ", ".", "..", "_index"])
 def test_aws_tag_key_invalid(ec2_tag_key: str):

From 073050fc22024bad4f2b180ac8473498926a7a07 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 16:45:22 +0200
Subject: [PATCH 12/93] added ENV variables for nthreads and threads multiplier

---
 .../simcore_service_autoscaling/core/settings.py    | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/core/settings.py b/services/autoscaling/src/simcore_service_autoscaling/core/settings.py
index 0ae53b943954..38f994bcea6e 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/core/settings.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/core/settings.py
@@ -14,6 +14,7 @@
     AnyUrl,
     Field,
     NonNegativeInt,
+    PositiveInt,
     TypeAdapter,
     field_validator,
     model_validator,
@@ -241,6 +242,18 @@ class DaskMonitoringSettings(BaseCustomSettings):
             description="defines the authentication of the clusters created via clusters-keeper (can be None or TLS)",
         ),
     ]
+    DASK_NTHREADS: Annotated[
+        NonNegativeInt,
+        Field(
+            description="if >0, it overrides the default number of threads per process in the dask-sidecars, (see description in dask-sidecar)",
+        ),
+    ]
+    DASK_NTHREADS_MULTIPLIER: Annotated[
+        PositiveInt,
+        Field(
+            description="if >1, it overrides the default number of threads per process in the dask-sidecars, by multiplying the number of vCPUs with this factor (see description in dask-sidecar)",
+        ),
+    ]
 
 
 class ApplicationSettings(BaseApplicationSettings, MixinLoggingSettings):

From 74bcf7d19e9a0f3432e72ea4cd466e8f22fabbc4 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 16:45:42 +0200
Subject: [PATCH 13/93] use ge operator instead of incorrect gt operator

---
 .../src/simcore_service_autoscaling/utils/cluster_scaling.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py b/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py
index 5a64de471cd8..1cff28a0bb46 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py
@@ -109,7 +109,7 @@ def find_selected_instance_type_for_task(
     selected_instance = filtered_instances[0]
 
     # check that the assigned resources and the machine resource fit
-    if task_required_resources <= selected_instance.resources:
+    if not (task_required_resources <= selected_instance.resources):
         raise TaskRequirementsAboveRequiredEC2InstanceTypeError(
             task=task,
             instance_type=selected_instance,

From 9a7eb37638d93064c5b7bbc1785dbb4d888e1f31 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 16:46:52 +0200
Subject: [PATCH 14/93] define variables for tests

---
 services/autoscaling/tests/unit/conftest.py            |  4 ++--
 .../unit/test_modules_cluster_scaling_computational.py | 10 ++++------
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/services/autoscaling/tests/unit/conftest.py b/services/autoscaling/tests/unit/conftest.py
index 57c9b381fc2d..192cc4932dde 100644
--- a/services/autoscaling/tests/unit/conftest.py
+++ b/services/autoscaling/tests/unit/conftest.py
@@ -380,8 +380,8 @@ def enabled_computational_mode(
             "AUTOSCALING_DASK": "{}",
             "DASK_MONITORING_URL": faker.url(),
             "DASK_SCHEDULER_AUTH": "{}",
-            "DASK_MONITORING_USER_NAME": faker.user_name(),
-            "DASK_MONITORING_PASSWORD": faker.password(),
+            "DASK_NTHREADS": f"{faker.pyint(min_value=0, max_value=10)}",
+            "DASK_NTHREADS_MULTIPLIER": f"{faker.pyint(min_value=1, max_value=4)}",
         },
     )
 
diff --git a/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py b/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py
index f83eaac9ea8b..fbfd965cd34a 100644
--- a/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py
+++ b/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py
@@ -14,7 +14,7 @@
 from collections.abc import Awaitable, Callable, Iterator
 from copy import deepcopy
 from dataclasses import dataclass
-from typing import Any, Final, cast
+from typing import Any, cast
 from unittest import mock
 
 import arrow
@@ -285,13 +285,11 @@ class _ScaleUpParams:
     expected_num_instances: int
 
 
-_RESOURCE_TO_DASK_RESOURCE_MAP: Final[dict[str, str]] = {"CPUS": "CPU", "RAM": "RAM"}
-
-
 def _dask_task_resources_from_resources(resources: Resources) -> DaskTaskResources:
     return {
-        _RESOURCE_TO_DASK_RESOURCE_MAP[res_key.upper()]: res_value
-        for res_key, res_value in resources.model_dump().items()
+        "CPU": resources.cpus,
+        "RAM": resources.ram,
+        **dict(resources.generic_resources.items()),
     }
 
 

From 2a67d9ec547101e663194d262ea62d47e70b50be Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 16:48:49 +0200
Subject: [PATCH 15/93] pass nthreads and multiplier also to the autoscaling
 service

---
 .../src/simcore_service_clusters_keeper/data/docker-compose.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml b/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml
index dc44dd9ece75..d3ba68cb76a8 100644
--- a/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml
+++ b/services/clusters-keeper/src/simcore_service_clusters_keeper/data/docker-compose.yml
@@ -104,6 +104,8 @@ services:
       AUTOSCALING_RABBITMQ: ${AUTOSCALING_RABBITMQ}
       DASK_MONITORING_URL: tls://dask-scheduler:8786
       DASK_SCHEDULER_AUTH: '{"type":"tls","tls_ca_file":"${DASK_TLS_CA_FILE}","tls_client_cert":"${DASK_TLS_CERT}","tls_client_key":"${DASK_TLS_KEY}"}'
+      DASK_NTHREADS: ${DASK_NTHREADS}
+      DASK_NTHREADS_MULTIPLIER: ${DASK_NTHREADS_MULTIPLIER}
       EC2_INSTANCES_ALLOWED_TYPES: ${WORKERS_EC2_INSTANCES_ALLOWED_TYPES}
       EC2_INSTANCES_COLD_START_DOCKER_IMAGES_PRE_PULLING: ${WORKERS_EC2_INSTANCES_COLD_START_DOCKER_IMAGES_PRE_PULLING}
       EC2_INSTANCES_CUSTOM_TAGS: ${WORKERS_EC2_INSTANCES_CUSTOM_TAGS}

From 2a3019f84d1f728dabdd34ec58e0055daf36efa8 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 16:58:22 +0200
Subject: [PATCH 16/93] ongoing

---
 .../cluster_scaling/_provider_computational.py | 18 ++++++++++++------
 .../modules/dask.py                            | 18 ++++++++++++++++++
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
index a47323e42587..a605c7ea0484 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
@@ -1,6 +1,6 @@
 import collections
 import logging
-from typing import cast
+from typing import Any, cast
 
 from aws_library.ec2 import EC2InstanceData, EC2Tags, Resources
 from fastapi import FastAPI
@@ -90,8 +90,10 @@ def get_task_required_resources(self, task) -> Resources:
         assert self  # nosec
         # NOTE: a dask worker can take a task if it has a free thread, regardless of its resources
         #       so we need to be careful when interpreting the resources, adding the thread here will mimick this
-
-        return utils.resources_from_dask_task(task)
+        task_required_resources = utils.resources_from_dask_task(task)
+        # TODO: should we add a generic resource for threads?
+        # task_required_resources.generic_resources[_DASK_WORKER_THREAD_RESOURCE_NAME] = 1
+        return task_required_resources
 
     async def get_task_defined_instance(
         self, app: FastAPI, task
@@ -138,10 +140,14 @@ async def compute_cluster_used_resources(
         list_of_used_resources: list[Resources] = await logged_gather(
             *(self.compute_node_used_resources(app, i) for i in instances)
         )
-        counter = collections.Counter(dict.fromkeys(Resources.model_fields, 0))
+        counter = collections.Counter()
         for result in list_of_used_resources:
-            counter.update(result.model_dump())
-        return Resources.model_validate(dict(counter))
+            counter.update(result.as_flat_dict())
+
+        flat_counter: dict[str, Any] = dict(counter)
+        flat_counter.setdefault("cpus", 0)
+        flat_counter.setdefault("ram", 0)
+        return Resources.from_flat_dict(flat_counter)
 
     async def compute_cluster_total_resources(
         self, app: FastAPI, instances: list[AssociatedInstance]
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index 966593295e87..ce52f997bdd5 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -15,6 +15,7 @@
 from models_library.clusters import ClusterAuthentication, TLSAuthentication
 from pydantic import AnyUrl, ByteSize, TypeAdapter
 
+from ...core.settings import DaskMonitoringSettings
 from ..core.errors import (
     DaskNoWorkersError,
     DaskSchedulerNotFoundError,
@@ -39,6 +40,7 @@ async def _wrap_client_async_routine(
 
 
 _DASK_SCHEDULER_CONNECT_TIMEOUT_S: Final[int] = 5
+_DASK_WORKER_THREAD_RESOURCE_NAME: Final[str] = "threads"
 
 
 @contextlib.asynccontextmanager
@@ -326,3 +328,19 @@ async def try_retire_nodes(
         await _wrap_client_async_routine(
             client.retire_workers(close_workers=False, remove=False)
         )
+
+
+async def add_instance_generic_resources(
+    settings: DaskMonitoringSettings, instance: EC2InstanceData
+) -> None:
+    instance_threads = round(instance.available_resources.cpus)
+    if settings.AUTOSCALING_DASK.DASK_NTHREADS > 0:
+        # this overrides everything
+        instance_threads = settings.AUTOSCALING_DASK.DASK_NTHREADS
+    if settings.AUTOSCALING_DASK.DASK_NTHREADS_MULTIPLIER > 1:
+        instance_threads = (
+            instance_threads * settings.AUTOSCALING_DASK.DASK_NTHREADS_MULTIPLIER
+        )
+    instance.available_resources.generic_resources[
+        _DASK_WORKER_THREAD_RESOURCE_NAME
+    ] = instance_threads

From 30b4367b9ff604dc88baf497cba2e376e6151461 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 17:01:57 +0200
Subject: [PATCH 17/93] typo

---
 .../autoscaling/src/simcore_service_autoscaling/modules/dask.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index ce52f997bdd5..6ed2bd7741bd 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -15,12 +15,12 @@
 from models_library.clusters import ClusterAuthentication, TLSAuthentication
 from pydantic import AnyUrl, ByteSize, TypeAdapter
 
-from ...core.settings import DaskMonitoringSettings
 from ..core.errors import (
     DaskNoWorkersError,
     DaskSchedulerNotFoundError,
     DaskWorkerNotFoundError,
 )
+from ..core.settings import DaskMonitoringSettings
 from ..models import AssociatedInstance, DaskTask, DaskTaskId
 from ..utils.utils_ec2 import (
     node_host_name_from_ec2_private_dns,

From 3c8732141b71e14344c4bb795e42051aec37429e Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 17:24:09 +0200
Subject: [PATCH 18/93] fix counter

---
 .../simcore_service_autoscaling/utils/utils_docker.py  | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py b/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py
index a48951986763..e2b9e044b9ef 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py
@@ -395,14 +395,16 @@ async def compute_cluster_used_resources(
     docker_client: AutoscalingDocker, nodes: list[Node]
 ) -> Resources:
     """Returns the total amount of resources (reservations) used on each of the given nodes"""
-    list_of_used_resources = await logged_gather(
+    list_of_used_resources: list[Resources] = await logged_gather(
         *(compute_node_used_resources(docker_client, node) for node in nodes)
     )
-    counter = collections.Counter(dict.fromkeys(list(Resources.model_fields), 0))
+    flat_counter = collections.Counter()
     for result in list_of_used_resources:
-        counter.update(result.model_dump())
+        flat_counter.update(result.as_flat_dict())
+    flat_counter.setdefault("cpus", 0)
+    flat_counter.setdefault("ram", 0)
 
-    return Resources.model_validate(dict(counter))
+    return Resources.from_flat_dict(dict(flat_counter))
 
 
 _COMMAND_TIMEOUT_S = 10

From 7971dea38d800da74f4de4da2567e1ea23bdabb8 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 17:24:26 +0200
Subject: [PATCH 19/93] fixed test assert

---
 .../tests/unit/test_modules_cluster_scaling_dynamic.py           | 1 +
 1 file changed, 1 insertion(+)

diff --git a/services/autoscaling/tests/unit/test_modules_cluster_scaling_dynamic.py b/services/autoscaling/tests/unit/test_modules_cluster_scaling_dynamic.py
index 8ba17f3f34ff..4b2682805345 100644
--- a/services/autoscaling/tests/unit/test_modules_cluster_scaling_dynamic.py
+++ b/services/autoscaling/tests/unit/test_modules_cluster_scaling_dynamic.py
@@ -437,6 +437,7 @@ async def test_cluster_scaling_with_no_services_and_machine_buffer_starts_expect
             / 1e9,
             "ram": app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER
             * fake_node.description.resources.memory_bytes,
+            "generic_resources": {},
         },
     )
 

From 89819621c1b5917f2145bfb4ee2c8bc4aa583174 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 17:27:08 +0200
Subject: [PATCH 20/93] fix assert

---
 .../tests/unit/test_modules_cluster_scaling_dynamic.py      | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/services/autoscaling/tests/unit/test_modules_cluster_scaling_dynamic.py b/services/autoscaling/tests/unit/test_modules_cluster_scaling_dynamic.py
index 4b2682805345..bf9e42a1ec67 100644
--- a/services/autoscaling/tests/unit/test_modules_cluster_scaling_dynamic.py
+++ b/services/autoscaling/tests/unit/test_modules_cluster_scaling_dynamic.py
@@ -713,11 +713,9 @@ async def _assert_wait_for_ec2_instances_running() -> list[InstanceTypeDef]:
         cluster_total_resources={
             "cpus": fake_attached_node.description.resources.nano_cp_us / 1e9,
             "ram": fake_attached_node.description.resources.memory_bytes,
+            "generic_resources": {},
         },
-        cluster_used_resources={
-            "cpus": float(0),
-            "ram": 0,
-        },
+        cluster_used_resources={"cpus": float(0), "ram": 0, "generic_resources": {}},
         instances_running=scale_up_params.expected_num_instances,
     )
     mock_rabbitmq_post_message.reset_mock()

From 8fd2fcea2e359c0b5af7b3ecc6b3cced5f1c021e Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 17:30:30 +0200
Subject: [PATCH 21/93] typo

---
 packages/aws-library/tests/test_ec2_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/aws-library/tests/test_ec2_models.py b/packages/aws-library/tests/test_ec2_models.py
index d1ef498f0c28..b83b57e75f57 100644
--- a/packages/aws-library/tests/test_ec2_models.py
+++ b/packages/aws-library/tests/test_ec2_models.py
@@ -275,7 +275,7 @@ def test_ec2_instance_data_hashable(faker: Faker):
     assert next(iter(second_set_of_ec2s)) in union_of_sets
 
 
-def test_ec2_instance_boot_specific_with_invalid_custome_script(faker: Faker):
+def test_ec2_instance_boot_specific_with_invalid_custom_script(faker: Faker):
     valid_model = EC2InstanceBootSpecific.model_json_schema()["examples"][0]
     invalid_model = {**valid_model, "custom_boot_scripts": ["echo 'missing end quote"]}
 

From f62edf4c13887a00a5a7ec72735133ff3cd95874 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 17:32:22 +0200
Subject: [PATCH 22/93] wrong types

---
 .../modules/dask.py                            | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index 6ed2bd7741bd..105551885aca 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -333,14 +333,12 @@ async def try_retire_nodes(
 async def add_instance_generic_resources(
     settings: DaskMonitoringSettings, instance: EC2InstanceData
 ) -> None:
-    instance_threads = round(instance.available_resources.cpus)
-    if settings.AUTOSCALING_DASK.DASK_NTHREADS > 0:
+    instance_threads = round(instance.resources.cpus)
+    if settings.DASK_NTHREADS > 0:
         # this overrides everything
-        instance_threads = settings.AUTOSCALING_DASK.DASK_NTHREADS
-    if settings.AUTOSCALING_DASK.DASK_NTHREADS_MULTIPLIER > 1:
-        instance_threads = (
-            instance_threads * settings.AUTOSCALING_DASK.DASK_NTHREADS_MULTIPLIER
-        )
-    instance.available_resources.generic_resources[
-        _DASK_WORKER_THREAD_RESOURCE_NAME
-    ] = instance_threads
+        instance_threads = settings.DASK_NTHREADS
+    if settings.DASK_NTHREADS_MULTIPLIER > 1:
+        instance_threads = instance_threads * settings.DASK_NTHREADS_MULTIPLIER
+    instance.resources.generic_resources[_DASK_WORKER_THREAD_RESOURCE_NAME] = (
+        instance_threads
+    )

From 6911754cf5f6ee5d8a371b4ea5e65652980c53da Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 25 Sep 2025 17:39:21 +0200
Subject: [PATCH 23/93] ongoing

---
 .../modules/cluster_scaling/_provider_computational.py          | 2 +-
 .../src/simcore_service_autoscaling/utils/utils_docker.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
index a605c7ea0484..b55c8fc2f02e 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
@@ -140,7 +140,7 @@ async def compute_cluster_used_resources(
         list_of_used_resources: list[Resources] = await logged_gather(
             *(self.compute_node_used_resources(app, i) for i in instances)
         )
-        counter = collections.Counter()
+        counter: collections.Counter = collections.Counter()
         for result in list_of_used_resources:
             counter.update(result.as_flat_dict())
 
diff --git a/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py b/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py
index e2b9e044b9ef..f4feea61cfde 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py
@@ -398,7 +398,7 @@ async def compute_cluster_used_resources(
     list_of_used_resources: list[Resources] = await logged_gather(
         *(compute_node_used_resources(docker_client, node) for node in nodes)
     )
-    flat_counter = collections.Counter()
+    flat_counter: collections.Counter = collections.Counter()
     for result in list_of_used_resources:
         flat_counter.update(result.as_flat_dict())
     flat_counter.setdefault("cpus", 0)

From 7c9a8b71645ba28df002126d16c5fa8db8676573 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 08:04:29 +0200
Subject: [PATCH 24/93] mypy

---
 packages/aws-library/src/aws_library/ec2/_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/aws-library/src/aws_library/ec2/_models.py b/packages/aws-library/src/aws_library/ec2/_models.py
index 3a4aaf42ec19..5045d34541df 100644
--- a/packages/aws-library/src/aws_library/ec2/_models.py
+++ b/packages/aws-library/src/aws_library/ec2/_models.py
@@ -136,7 +136,7 @@ def from_flat_dict(cls, data: dict[str, int | float | str]) -> "Resources":
         """Inverse of as_flat_dict"""
         generic_resources = {k: v for k, v in data.items() if k not in {"cpus", "ram"}}
         return cls(
-            cpus=data.get("cpus", 0),
+            cpus=float(data.get("cpus", 0)),
             ram=ByteSize(data.get("ram", 0)),
             generic_resources=generic_resources,
         )

From 6dfde1d79061ece28d98b4badf7b395aaef1981b Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 08:44:18 +0200
Subject: [PATCH 25/93] added test for getting threads resources

---
 .../modules/dask.py                           |  6 +--
 .../tests/unit/test_modules_dask.py           | 41 +++++++++++++++++++
 2 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index 105551885aca..1eb5a51aa5ed 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -40,7 +40,7 @@ async def _wrap_client_async_routine(
 
 
 _DASK_SCHEDULER_CONNECT_TIMEOUT_S: Final[int] = 5
-_DASK_WORKER_THREAD_RESOURCE_NAME: Final[str] = "threads"
+DASK_WORKER_THREAD_RESOURCE_NAME: Final[str] = "threads"
 
 
 @contextlib.asynccontextmanager
@@ -330,7 +330,7 @@ async def try_retire_nodes(
         )
 
 
-async def add_instance_generic_resources(
+def add_instance_generic_resources(
     settings: DaskMonitoringSettings, instance: EC2InstanceData
 ) -> None:
     instance_threads = round(instance.resources.cpus)
@@ -339,6 +339,6 @@ async def add_instance_generic_resources(
         instance_threads = settings.DASK_NTHREADS
     if settings.DASK_NTHREADS_MULTIPLIER > 1:
         instance_threads = instance_threads * settings.DASK_NTHREADS_MULTIPLIER
-    instance.resources.generic_resources[_DASK_WORKER_THREAD_RESOURCE_NAME] = (
+    instance.resources.generic_resources[DASK_WORKER_THREAD_RESOURCE_NAME] = (
         instance_threads
     )
diff --git a/services/autoscaling/tests/unit/test_modules_dask.py b/services/autoscaling/tests/unit/test_modules_dask.py
index 9c53865cfa30..c4cd3c52794a 100644
--- a/services/autoscaling/tests/unit/test_modules_dask.py
+++ b/services/autoscaling/tests/unit/test_modules_dask.py
@@ -31,8 +31,11 @@
     EC2InstanceData,
 )
 from simcore_service_autoscaling.modules.dask import (
+    DASK_WORKER_THREAD_RESOURCE_NAME,
+    DaskMonitoringSettings,
     DaskTask,
     _scheduler_client,
+    add_instance_generic_resources,
     get_worker_still_has_results_in_memory,
     get_worker_used_resources,
     list_processing_tasks_per_worker,
@@ -370,3 +373,41 @@ def _add_fct(x: int, y: int) -> int:
         )
         == Resources.create_as_empty()
     )
+
+
+@pytest.mark.parametrize(
+    "dask_nthreads, dask_nthreads_multiplier, expected_threads_resource",
+    [(4, 1, 4), (4, 2, 8), (0, 2.0, -1)],
+)
+def test_add_instance_generic_resources(
+    fake_ec2_instance_data: Callable[..., EC2InstanceData],
+    faker: Faker,
+    dask_nthreads: int,
+    dask_nthreads_multiplier: int,
+    expected_threads_resource: int,
+):
+    settings = DaskMonitoringSettings(
+        DASK_MONITORING_URL=faker.url(),
+        DASK_SCHEDULER_AUTH=NoAuthentication(),
+        DASK_NTHREADS=dask_nthreads,
+        DASK_NTHREADS_MULTIPLIER=dask_nthreads_multiplier,
+    )
+    ec2_instance_data = fake_ec2_instance_data()
+    assert ec2_instance_data.resources.cpus > 0
+    assert ec2_instance_data.resources.ram > 0
+    assert ec2_instance_data.resources.generic_resources == {}
+
+    add_instance_generic_resources(settings, ec2_instance_data)
+    assert ec2_instance_data.resources.generic_resources != {}
+    assert (
+        DASK_WORKER_THREAD_RESOURCE_NAME
+        in ec2_instance_data.resources.generic_resources
+    )
+    if expected_threads_resource < 0:
+        expected_threads_resource = (
+            ec2_instance_data.resources.cpus * dask_nthreads_multiplier
+        )
+    assert (
+        ec2_instance_data.resources.generic_resources[DASK_WORKER_THREAD_RESOURCE_NAME]
+        == expected_threads_resource
+    )

From 03be3af3ab68c8e1f1197c871f67e20af5cb0bde Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 08:51:16 +0200
Subject: [PATCH 26/93] added test

---
 .../tests/unit/test_modules_dask.py           | 22 ++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/services/autoscaling/tests/unit/test_modules_dask.py b/services/autoscaling/tests/unit/test_modules_dask.py
index c4cd3c52794a..a77fffd3fc43 100644
--- a/services/autoscaling/tests/unit/test_modules_dask.py
+++ b/services/autoscaling/tests/unit/test_modules_dask.py
@@ -38,6 +38,7 @@
     add_instance_generic_resources,
     get_worker_still_has_results_in_memory,
     get_worker_used_resources,
+    is_worker_connected,
     list_processing_tasks_per_worker,
     list_unrunnable_tasks,
 )
@@ -380,15 +381,16 @@ def _add_fct(x: int, y: int) -> int:
     [(4, 1, 4), (4, 2, 8), (0, 2.0, -1)],
 )
 def test_add_instance_generic_resources(
+    scheduler_url: AnyUrl,
+    scheduler_authentication: ClusterAuthentication,
     fake_ec2_instance_data: Callable[..., EC2InstanceData],
-    faker: Faker,
     dask_nthreads: int,
     dask_nthreads_multiplier: int,
     expected_threads_resource: int,
 ):
     settings = DaskMonitoringSettings(
-        DASK_MONITORING_URL=faker.url(),
-        DASK_SCHEDULER_AUTH=NoAuthentication(),
+        DASK_MONITORING_URL=scheduler_url,
+        DASK_SCHEDULER_AUTH=scheduler_authentication,
         DASK_NTHREADS=dask_nthreads,
         DASK_NTHREADS_MULTIPLIER=dask_nthreads_multiplier,
     )
@@ -411,3 +413,17 @@ def test_add_instance_generic_resources(
         ec2_instance_data.resources.generic_resources[DASK_WORKER_THREAD_RESOURCE_NAME]
         == expected_threads_resource
     )
+
+
+async def test_is_worker_connected(
+    scheduler_url: AnyUrl,
+    scheduler_authentication: ClusterAuthentication,
+    fake_ec2_instance_data: Callable[..., EC2InstanceData],
+):
+    ec2_instance_data = fake_ec2_instance_data()
+    assert (
+        await is_worker_connected(
+            scheduler_url, scheduler_authentication, ec2_instance_data
+        )
+        is False
+    )

From c3117a7958bc887089da30df73955781d62abfaf Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 10:07:23 +0200
Subject: [PATCH 27/93] improve testing

---
 .../modules/dask.py                           |  4 ++
 .../tests/unit/test_modules_dask.py           | 56 ++++++++++++++++++-
 2 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index 1eb5a51aa5ed..7d8f8c289c4f 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -103,6 +103,10 @@ def _find_by_worker_host(
         _, details = dask_worker
         if match := re.match(DASK_NAME_PATTERN, details["name"]):
             return bool(match.group("private_ip") == node_hostname)
+        _logger.warning(
+            "Unexpected worker name format: %s. TIP: this should be investigated",
+            details["name"],
+        )
         return False
 
     filtered_workers = dict(filter(_find_by_worker_host, workers.items()))
diff --git a/services/autoscaling/tests/unit/test_modules_dask.py b/services/autoscaling/tests/unit/test_modules_dask.py
index a77fffd3fc43..28f6a3047d12 100644
--- a/services/autoscaling/tests/unit/test_modules_dask.py
+++ b/services/autoscaling/tests/unit/test_modules_dask.py
@@ -39,10 +39,11 @@
     get_worker_still_has_results_in_memory,
     get_worker_used_resources,
     is_worker_connected,
+    is_worker_retired,
     list_processing_tasks_per_worker,
     list_unrunnable_tasks,
 )
-from tenacity import retry, stop_after_delay, wait_fixed
+from tenacity import AsyncRetrying, retry, stop_after_delay, wait_fixed
 
 _authentication_types = [
     NoAuthentication(),
@@ -406,7 +407,7 @@ def test_add_instance_generic_resources(
         in ec2_instance_data.resources.generic_resources
     )
     if expected_threads_resource < 0:
-        expected_threads_resource = (
+        expected_threads_resource = int(
             ec2_instance_data.resources.cpus * dask_nthreads_multiplier
         )
     assert (
@@ -419,6 +420,7 @@ async def test_is_worker_connected(
     scheduler_url: AnyUrl,
     scheduler_authentication: ClusterAuthentication,
     fake_ec2_instance_data: Callable[..., EC2InstanceData],
+    fake_localhost_ec2_instance_data: EC2InstanceData,
 ):
     ec2_instance_data = fake_ec2_instance_data()
     assert (
@@ -427,3 +429,53 @@ async def test_is_worker_connected(
         )
         is False
     )
+
+    assert (
+        await is_worker_connected(
+            scheduler_url, scheduler_authentication, fake_localhost_ec2_instance_data
+        )
+        is True
+    )
+
+
+async def test_is_worker_retired(
+    dask_spec_local_cluster: distributed.SpecCluster,
+    scheduler_url: AnyUrl,
+    scheduler_authentication: ClusterAuthentication,
+    fake_ec2_instance_data: Callable[..., EC2InstanceData],
+    fake_localhost_ec2_instance_data: EC2InstanceData,
+):
+    ec2_instance_data = fake_ec2_instance_data()
+    # fake instance is not connected, so it cannot be retired
+    assert (
+        await is_worker_retired(
+            scheduler_url, scheduler_authentication, ec2_instance_data
+        )
+        is False
+    )
+
+    # localhost is connected, but not retired
+    assert (
+        await is_worker_retired(
+            scheduler_url, scheduler_authentication, fake_localhost_ec2_instance_data
+        )
+        is False
+    )
+
+    # retire localhost worker
+    assert isinstance(dask_spec_local_cluster.scheduler, distributed.Scheduler)
+    await dask_spec_local_cluster.scheduler.retire_workers(
+        close_workers=True, remove=False
+    )
+    async for attempt in AsyncRetrying(
+        stop=stop_after_delay(10), wait=wait_fixed(1), reraise=True
+    ):
+        with attempt:
+            assert (
+                await is_worker_retired(
+                    scheduler_url,
+                    scheduler_authentication,
+                    fake_localhost_ec2_instance_data,
+                )
+                is True
+            )

From 37d1b449e040b448f8bbe0a7f70685381f5e1d8e Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 10:17:01 +0200
Subject: [PATCH 28/93] improve testing

---
 services/autoscaling/tests/unit/test_modules_dask.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/services/autoscaling/tests/unit/test_modules_dask.py b/services/autoscaling/tests/unit/test_modules_dask.py
index 28f6a3047d12..09e104c5cfe6 100644
--- a/services/autoscaling/tests/unit/test_modules_dask.py
+++ b/services/autoscaling/tests/unit/test_modules_dask.py
@@ -42,6 +42,7 @@
     is_worker_retired,
     list_processing_tasks_per_worker,
     list_unrunnable_tasks,
+    try_retire_nodes,
 )
 from tenacity import AsyncRetrying, retry, stop_after_delay, wait_fixed
 
@@ -463,10 +464,7 @@ async def test_is_worker_retired(
     )
 
     # retire localhost worker
-    assert isinstance(dask_spec_local_cluster.scheduler, distributed.Scheduler)
-    await dask_spec_local_cluster.scheduler.retire_workers(
-        close_workers=True, remove=False
-    )
+    await try_retire_nodes(scheduler_url, scheduler_authentication)
     async for attempt in AsyncRetrying(
         stop=stop_after_delay(10), wait=wait_fixed(1), reraise=True
     ):

From 0902d083d0d5e2f9931cd5e7d9daceb05b2024f6 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 11:17:28 +0200
Subject: [PATCH 29/93] ruff

---
 services/autoscaling/tests/unit/test_utils_cluster_scaling.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/services/autoscaling/tests/unit/test_utils_cluster_scaling.py b/services/autoscaling/tests/unit/test_utils_cluster_scaling.py
index 1c325c1f6234..5525cedc9268 100644
--- a/services/autoscaling/tests/unit/test_utils_cluster_scaling.py
+++ b/services/autoscaling/tests/unit/test_utils_cluster_scaling.py
@@ -296,6 +296,7 @@ def test_sort_drained_nodes(
     assert app_settings.AUTOSCALING_EC2_INSTANCES
     machine_buffer_type = get_hot_buffer_type(random_fake_available_instances)
     _NUM_DRAINED_NODES = 20
+    assert app_settings.AUTOSCALING_EC2_INSTANCES
     _NUM_NODE_WITH_TYPE_BUFFER = (
         3 * app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER
     )

From 64d84dc7247bbf5330469cd769f280d8d1a92911 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 11:17:37 +0200
Subject: [PATCH 30/93] adding test

---
 .../autoscaling/tests/unit/test_modules_dask.py    | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/services/autoscaling/tests/unit/test_modules_dask.py b/services/autoscaling/tests/unit/test_modules_dask.py
index 09e104c5cfe6..4d3e667ad5df 100644
--- a/services/autoscaling/tests/unit/test_modules_dask.py
+++ b/services/autoscaling/tests/unit/test_modules_dask.py
@@ -36,6 +36,7 @@
     DaskTask,
     _scheduler_client,
     add_instance_generic_resources,
+    compute_cluster_total_resources,
     get_worker_still_has_results_in_memory,
     get_worker_used_resources,
     is_worker_connected,
@@ -378,6 +379,19 @@ def _add_fct(x: int, y: int) -> int:
     )
 
 
+async def test_compute_cluster_total_resources(
+    scheduler_url: AnyUrl,
+    scheduler_authentication: ClusterAuthentication,
+):
+    # asking for resources of empty cluster returns empty resources
+    assert (
+        await compute_cluster_total_resources(
+            scheduler_url, scheduler_authentication, []
+        )
+        == Resources.create_as_empty()
+    )
+
+
 @pytest.mark.parametrize(
     "dask_nthreads, dask_nthreads_multiplier, expected_threads_resource",
     [(4, 1, 4), (4, 2, 8), (0, 2.0, -1)],

From 067f8ff756e02b12976a08ca424b36856b9e6d1f Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 11:45:19 +0200
Subject: [PATCH 31/93] implemented compute cluster total resources

---
 .../_provider_computational.py                |  4 +++-
 .../modules/dask.py                           | 22 +++++++++++------
 .../tests/unit/test_modules_dask.py           | 24 ++++++++++++++++++-
 3 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
index b55c8fc2f02e..f73fa60a3c7d 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
@@ -155,7 +155,9 @@ async def compute_cluster_total_resources(
         assert self  # nosec
         try:
             return await dask.compute_cluster_total_resources(
-                _scheduler_url(app), _scheduler_auth(app), instances
+                _scheduler_url(app),
+                _scheduler_auth(app),
+                [i.ec2_instance for i in instances],
             )
         except DaskNoWorkersError:
             return Resources.create_as_empty()
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index 7d8f8c289c4f..c07baf992034 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -21,7 +21,7 @@
     DaskWorkerNotFoundError,
 )
 from ..core.settings import DaskMonitoringSettings
-from ..models import AssociatedInstance, DaskTask, DaskTaskId
+from ..models import DaskTask, DaskTaskId
 from ..utils.utils_ec2 import (
     node_host_name_from_ec2_private_dns,
     node_ip_from_ec2_private_dns,
@@ -306,23 +306,31 @@ def _list_processing_tasks_on_worker(
 async def compute_cluster_total_resources(
     scheduler_url: AnyUrl,
     authentication: ClusterAuthentication,
-    instances: list[AssociatedInstance],
+    instances: list[EC2InstanceData],
 ) -> Resources:
     if not instances:
         return Resources.create_as_empty()
     async with _scheduler_client(scheduler_url, authentication) as client:
-        instance_hosts = (
-            node_ip_from_ec2_private_dns(i.ec2_instance) for i in instances
-        )
+        instance_host_resources_map = {
+            node_ip_from_ec2_private_dns(i): i.resources for i in instances
+        }
         scheduler_info = client.scheduler_info()
         if "workers" not in scheduler_info or not scheduler_info["workers"]:
             raise DaskNoWorkersError(url=scheduler_url)
         workers: dict[str, Any] = scheduler_info["workers"]
+        cluster_resources = Resources.create_as_empty()
         for worker_details in workers.values():
-            if worker_details["host"] not in instance_hosts:
+            if worker_details["host"] not in instance_host_resources_map:
                 continue
+            worker_ram = worker_details["memory_limit"]
+            worker_threads = worker_details["nthreads"]
+            cluster_resources += Resources(
+                cpus=instance_host_resources_map[worker_details["host"]].cpus,
+                ram=TypeAdapter(ByteSize).validate_python(worker_ram),
+                generic_resources={DASK_WORKER_THREAD_RESOURCE_NAME: worker_threads},
+            )
 
-        return Resources.create_as_empty()
+        return cluster_resources
 
 
 async def try_retire_nodes(
diff --git a/services/autoscaling/tests/unit/test_modules_dask.py b/services/autoscaling/tests/unit/test_modules_dask.py
index 4d3e667ad5df..ebaab72ca406 100644
--- a/services/autoscaling/tests/unit/test_modules_dask.py
+++ b/services/autoscaling/tests/unit/test_modules_dask.py
@@ -380,8 +380,11 @@ def _add_fct(x: int, y: int) -> int:
 
 
 async def test_compute_cluster_total_resources(
+    dask_spec_local_cluster: distributed.SpecCluster,
     scheduler_url: AnyUrl,
     scheduler_authentication: ClusterAuthentication,
+    fake_ec2_instance_data: Callable[..., EC2InstanceData],
+    fake_localhost_ec2_instance_data: EC2InstanceData,
 ):
     # asking for resources of empty cluster returns empty resources
     assert (
@@ -390,6 +393,26 @@ async def test_compute_cluster_total_resources(
         )
         == Resources.create_as_empty()
     )
+    ec2_instance_data = fake_ec2_instance_data()
+    assert ec2_instance_data.resources.cpus > 0
+    assert ec2_instance_data.resources.ram > 0
+    assert ec2_instance_data.resources.generic_resources == {}
+    assert (
+        await compute_cluster_total_resources(
+            scheduler_url, scheduler_authentication, [ec2_instance_data]
+        )
+        == Resources.create_as_empty()
+    ), "this instance is not connected and should not be accounted for"
+
+    cluster_total_resources = await compute_cluster_total_resources(
+        scheduler_url, scheduler_authentication, [fake_localhost_ec2_instance_data]
+    )
+    assert cluster_total_resources.cpus > 0
+    assert cluster_total_resources.ram > 0
+    assert DASK_WORKER_THREAD_RESOURCE_NAME in cluster_total_resources.generic_resources
+    assert (
+        cluster_total_resources.generic_resources[DASK_WORKER_THREAD_RESOURCE_NAME] == 2
+    )
 
 
 @pytest.mark.parametrize(
@@ -454,7 +477,6 @@ async def test_is_worker_connected(
 
 
 async def test_is_worker_retired(
-    dask_spec_local_cluster: distributed.SpecCluster,
     scheduler_url: AnyUrl,
     scheduler_authentication: ClusterAuthentication,
     fake_ec2_instance_data: Callable[..., EC2InstanceData],

From 7048af26086b947409d226fadc8feaf112aae815 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 14:02:01 +0200
Subject: [PATCH 32/93] adjusted compute used resources

---
 .../modules/dask.py                           | 20 +++++++++++++++----
 .../tests/unit/test_modules_dask.py           |  6 +++++-
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index c07baf992034..f6a234b2e508 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -295,12 +295,18 @@ def _list_processing_tasks_on_worker(
             total_resources_used.update(task_resources)
 
         _logger.debug("found %s for %s", f"{total_resources_used=}", f"{worker_url=}")
-        return Resources(
+        worker_used_resources = Resources(
             cpus=total_resources_used.get("CPU", 0),
             ram=TypeAdapter(ByteSize).validate_python(
                 total_resources_used.get("RAM", 0)
             ),
         )
+        if worker_processing_tasks:
+            worker_used_resources.generic_resources[
+                DASK_WORKER_THREAD_RESOURCE_NAME
+            ] = len(worker_processing_tasks)
+
+        return worker_used_resources
 
 
 async def compute_cluster_total_resources(
@@ -322,11 +328,17 @@ async def compute_cluster_total_resources(
         for worker_details in workers.values():
             if worker_details["host"] not in instance_host_resources_map:
                 continue
-            worker_ram = worker_details["memory_limit"]
+            worker_dask_resources = worker_details["resources"]
             worker_threads = worker_details["nthreads"]
             cluster_resources += Resources(
-                cpus=instance_host_resources_map[worker_details["host"]].cpus,
-                ram=TypeAdapter(ByteSize).validate_python(worker_ram),
+                cpus=worker_dask_resources.get(
+                    "CPU", instance_host_resources_map[worker_details["host"]].cpus
+                ),
+                ram=TypeAdapter(ByteSize).validate_python(
+                    worker_dask_resources.get(
+                        "RAM", instance_host_resources_map[worker_details["host"]].ram
+                    )
+                ),
                 generic_resources={DASK_WORKER_THREAD_RESOURCE_NAME: worker_threads},
             )
 
diff --git a/services/autoscaling/tests/unit/test_modules_dask.py b/services/autoscaling/tests/unit/test_modules_dask.py
index ebaab72ca406..1b2b6aac1bb3 100644
--- a/services/autoscaling/tests/unit/test_modules_dask.py
+++ b/services/autoscaling/tests/unit/test_modules_dask.py
@@ -365,7 +365,11 @@ def _add_fct(x: int, y: int) -> int:
     await _wait_for_dask_scheduler_to_change_state()
     assert await get_worker_used_resources(
         scheduler_url, scheduler_authentication, fake_localhost_ec2_instance_data
-    ) == Resources(cpus=num_cpus, ram=ByteSize(0))
+    ) == Resources(
+        cpus=num_cpus,
+        ram=ByteSize(0),
+        generic_resources={DASK_WORKER_THREAD_RESOURCE_NAME: 1},
+    )
 
     result = await future_queued_task.result(timeout=_DASK_SCHEDULER_REACTION_TIME_S)  # type: ignore
     assert result == 7

From 52ee133ea861fdc626f08727a0a72ec8fff46881 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 14:11:56 +0200
Subject: [PATCH 33/93] testing

---
 .../autoscaling/src/simcore_service_autoscaling/modules/dask.py  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index f6a234b2e508..431554e2ad67 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -8,7 +8,6 @@
 
 import dask.typing
 import distributed
-import distributed.scheduler
 from aws_library.ec2 import EC2InstanceData, Resources
 from dask_task_models_library.resource_constraints import DaskTaskResources
 from distributed.core import Status

From 171e702d80864cec0088bea7fcad45cdcb7caf7b Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 14:56:19 +0200
Subject: [PATCH 34/93] simplify

---
 .../modules/dask.py                           | 53 +++++++++----------
 .../tests/unit/test_modules_dask.py           |  7 ++-
 2 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index 431554e2ad67..52bb06d497c4 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -185,21 +185,16 @@ def _list_tasks(
         return [
             DaskTask(
                 task_id=_dask_key_to_dask_task_id(task_id),
-                required_resources=task_resources,
+                required_resources=task_resources
+                | {DASK_WORKER_THREAD_RESOURCE_NAME: 1},
             )
             for task_id, task_resources in list_of_tasks.items()
         ]
 
 
-async def list_processing_tasks_per_worker(
-    scheduler_url: AnyUrl,
-    authentication: ClusterAuthentication,
-) -> dict[DaskWorkerUrl, list[DaskTask]]:
-    """
-    Raises:
-        DaskSchedulerNotFoundError
-    """
-
+async def _list_cluster_processing_tasks(
+    client: distributed.Client,
+) -> dict[DaskWorkerUrl, list[tuple[dask.typing.Key, DaskTaskResources]]]:
     def _list_processing_tasks(
         dask_scheduler: distributed.Scheduler,
     ) -> dict[str, list[tuple[dask.typing.Key, DaskTaskResources]]]:
@@ -211,13 +206,26 @@ def _list_processing_tasks(
                 )
         return worker_to_processing_tasks
 
+    list_of_tasks: dict[str, list[tuple[dask.typing.Key, DaskTaskResources]]] = (
+        await client.run_on_scheduler(_list_processing_tasks)
+    )
+    _logger.debug("found processing tasks: %s", list_of_tasks)
+
+    return list_of_tasks
+
+
+async def list_processing_tasks_per_worker(
+    scheduler_url: AnyUrl,
+    authentication: ClusterAuthentication,
+) -> dict[DaskWorkerUrl, list[DaskTask]]:
+    """
+    Raises:
+        DaskSchedulerNotFoundError
+    """
+
     async with _scheduler_client(scheduler_url, authentication) as client:
-        worker_to_tasks: dict[str, list[tuple[dask.typing.Key, DaskTaskResources]]] = (
-            await _wrap_client_async_routine(
-                client.run_on_scheduler(_list_processing_tasks)
-            )
-        )
-        _logger.debug("found processing tasks: %s", worker_to_tasks)
+        worker_to_tasks = await _list_cluster_processing_tasks(client)
+
         tasks_per_worker = defaultdict(list)
         for worker, tasks in worker_to_tasks.items():
             for task_id, required_resources in tasks:
@@ -277,17 +285,8 @@ def _list_processing_tasks_on_worker(
 
     async with _scheduler_client(scheduler_url, authentication) as client:
         worker_url, _ = _dask_worker_from_ec2_instance(client, ec2_instance)
-
-        _logger.debug("looking for processing tasks for %s", f"{worker_url=}")
-
-        # now get the used resources
-        worker_processing_tasks: list[tuple[dask.typing.Key, DaskTaskResources]] = (
-            await _wrap_client_async_routine(
-                client.run_on_scheduler(
-                    _list_processing_tasks_on_worker, worker_url=worker_url
-                ),
-            )
-        )
+        worker_to_tasks = await _list_cluster_processing_tasks(client)
+        worker_processing_tasks = worker_to_tasks.get(worker_url, [])
 
         total_resources_used: collections.Counter[str] = collections.Counter()
         for _, task_resources in worker_processing_tasks:
diff --git a/services/autoscaling/tests/unit/test_modules_dask.py b/services/autoscaling/tests/unit/test_modules_dask.py
index 1b2b6aac1bb3..4d721b3f28ad 100644
--- a/services/autoscaling/tests/unit/test_modules_dask.py
+++ b/services/autoscaling/tests/unit/test_modules_dask.py
@@ -126,7 +126,12 @@ async def test_list_unrunnable_tasks(
     future = create_dask_task(dask_task_impossible_resources)
     assert future
     assert await list_unrunnable_tasks(scheduler_url, scheduler_authentication) == [
-        DaskTask(task_id=future.key, required_resources=dask_task_impossible_resources)
+        DaskTask(
+            task_id=future.key,
+            required_resources=(
+                dask_task_impossible_resources | {DASK_WORKER_THREAD_RESOURCE_NAME: 1}
+            ),
+        )
     ]
     # remove that future, will remove the task
     del future

From 7328d7500d16704b461b959e580e2cfc3167e4da Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 15:10:45 +0200
Subject: [PATCH 35/93] simplify

---
 .../modules/dask.py                           | 19 +++++--------------
 .../tests/unit/test_modules_dask.py           |  5 ++++-
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index 52bb06d497c4..92d3c13ee250 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -202,7 +202,11 @@ def _list_processing_tasks(
         for task_key, task_state in dask_scheduler.tasks.items():
             if task_state.processing_on:
                 worker_to_processing_tasks[task_state.processing_on.address].append(
-                    (task_key, task_state.resource_restrictions or {})
+                    (
+                        task_key,
+                        (task_state.resource_restrictions or {})
+                        | {DASK_WORKER_THREAD_RESOURCE_NAME: 1},
+                    )
                 )
         return worker_to_processing_tasks
 
@@ -270,19 +274,6 @@ async def get_worker_used_resources(
         DaskNoWorkersError
     """
 
-    def _list_processing_tasks_on_worker(
-        dask_scheduler: distributed.Scheduler, *, worker_url: str
-    ) -> list[tuple[dask.typing.Key, DaskTaskResources]]:
-        processing_tasks = []
-        for task_key, task_state in dask_scheduler.tasks.items():
-            if task_state.processing_on and (
-                task_state.processing_on.address == worker_url
-            ):
-                processing_tasks.append(
-                    (task_key, task_state.resource_restrictions or {})
-                )
-        return processing_tasks
-
     async with _scheduler_client(scheduler_url, authentication) as client:
         worker_url, _ = _dask_worker_from_ec2_instance(client, ec2_instance)
         worker_to_tasks = await _list_cluster_processing_tasks(client)
diff --git a/services/autoscaling/tests/unit/test_modules_dask.py b/services/autoscaling/tests/unit/test_modules_dask.py
index 4d721b3f28ad..4f6b4ce73366 100644
--- a/services/autoscaling/tests/unit/test_modules_dask.py
+++ b/services/autoscaling/tests/unit/test_modules_dask.py
@@ -166,7 +166,10 @@ def _add_fct(x: int, y: int) -> int:
         scheduler_url, scheduler_authentication
     ) == {
         next(iter(dask_spec_cluster_client.scheduler_info()["workers"])): [
-            DaskTask(task_id=DaskTaskId(future_queued_task.key), required_resources={})
+            DaskTask(
+                task_id=DaskTaskId(future_queued_task.key),
+                required_resources={DASK_WORKER_THREAD_RESOURCE_NAME: 1},
+            )
         ]
     }
 

From addb6fd058fd243f2625535aa2f57c987f677005 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 15:10:59 +0200
Subject: [PATCH 36/93] create a typed dict

---
 .../src/dask_task_models_library/resource_constraints.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py b/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py
index 3a81114ef878..27b5bb1cb192 100644
--- a/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py
+++ b/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py
@@ -1,8 +1,13 @@
-from typing import Any, TypeAlias
+from typing import Literal, TypedDict
 
 from .constants import DASK_TASK_EC2_RESOURCE_RESTRICTION_KEY
 
-DaskTaskResources: TypeAlias = dict[str, Any]
+
+class DaskTaskResources(TypedDict):
+    CPU: float
+    RAM: int  # in bytes
+    # threads is a constant of 1 (enforced by static type checkers via Literal)
+    threads: Literal[1]
 
 
 def create_ec2_resource_constraint_key(ec2_instance_type: str) -> str:

From 82889c4737bbca77d6c86c6c414642cfb4b9d38c Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 15:29:58 +0200
Subject: [PATCH 37/93] simplify

---
 .../modules/dask.py                           | 81 +++++++++++--------
 1 file changed, 46 insertions(+), 35 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index 92d3c13ee250..500eec402fa9 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -4,7 +4,7 @@
 import re
 from collections import defaultdict
 from collections.abc import AsyncIterator, Coroutine
-from typing import Any, Final, TypeAlias
+from typing import Any, Final, TypeAlias, TypedDict
 
 import dask.typing
 import distributed
@@ -119,6 +119,44 @@ def _find_by_worker_host(
     return next(iter(filtered_workers.items()))
 
 
+class DaskClusterTasks(TypedDict):
+    processing: dict[DaskWorkerUrl, list[tuple[dask.typing.Key, DaskTaskResources]]]
+    unrunnable: dict[dask.typing.Key, DaskTaskResources]
+
+
+async def _list_cluster_known_tasks(
+    client: distributed.Client,
+) -> DaskClusterTasks:
+    def _list_on_scheduler(
+        dask_scheduler: distributed.Scheduler,
+    ) -> DaskClusterTasks:
+        worker_to_processing_tasks = defaultdict(list)
+        unrunnable_tasks = {}
+        for task_key, task_state in dask_scheduler.tasks.items():
+            if task_state.processing_on:
+                worker_to_processing_tasks[task_state.processing_on.address].append(
+                    (
+                        task_key,
+                        (task_state.resource_restrictions or {})
+                        | {DASK_WORKER_THREAD_RESOURCE_NAME: 1},
+                    )
+                )
+            elif task_state in dask_scheduler.unrunnable:
+                unrunnable_tasks[task_key] = (
+                    task_state.resource_restrictions or {}
+                ) | {DASK_WORKER_THREAD_RESOURCE_NAME: 1}
+
+        return DaskClusterTasks(
+            processing=dict(worker_to_processing_tasks),
+            unrunnable=unrunnable_tasks,
+        )
+
+    list_of_tasks: DaskClusterTasks = await client.run_on_scheduler(_list_on_scheduler)
+    _logger.debug("found tasks: %s", list_of_tasks)
+
+    return list_of_tasks
+
+
 async def is_worker_connected(
     scheduler_url: AnyUrl,
     authentication: ClusterAuthentication,
@@ -178,10 +216,9 @@ def _list_tasks(
         }
 
     async with _scheduler_client(scheduler_url, authentication) as client:
-        list_of_tasks: dict[dask.typing.Key, DaskTaskResources] = (
-            await _wrap_client_async_routine(client.run_on_scheduler(_list_tasks))
-        )
-        _logger.debug("found unrunnable tasks: %s", list_of_tasks)
+        known_tasks = await _list_cluster_known_tasks(client)
+        list_of_tasks = known_tasks["unrunnable"]
+
         return [
             DaskTask(
                 task_id=_dask_key_to_dask_task_id(task_id),
@@ -192,32 +229,6 @@ def _list_tasks(
         ]
 
 
-async def _list_cluster_processing_tasks(
-    client: distributed.Client,
-) -> dict[DaskWorkerUrl, list[tuple[dask.typing.Key, DaskTaskResources]]]:
-    def _list_processing_tasks(
-        dask_scheduler: distributed.Scheduler,
-    ) -> dict[str, list[tuple[dask.typing.Key, DaskTaskResources]]]:
-        worker_to_processing_tasks = defaultdict(list)
-        for task_key, task_state in dask_scheduler.tasks.items():
-            if task_state.processing_on:
-                worker_to_processing_tasks[task_state.processing_on.address].append(
-                    (
-                        task_key,
-                        (task_state.resource_restrictions or {})
-                        | {DASK_WORKER_THREAD_RESOURCE_NAME: 1},
-                    )
-                )
-        return worker_to_processing_tasks
-
-    list_of_tasks: dict[str, list[tuple[dask.typing.Key, DaskTaskResources]]] = (
-        await client.run_on_scheduler(_list_processing_tasks)
-    )
-    _logger.debug("found processing tasks: %s", list_of_tasks)
-
-    return list_of_tasks
-
-
 async def list_processing_tasks_per_worker(
     scheduler_url: AnyUrl,
     authentication: ClusterAuthentication,
@@ -228,10 +239,10 @@ async def list_processing_tasks_per_worker(
     """
 
     async with _scheduler_client(scheduler_url, authentication) as client:
-        worker_to_tasks = await _list_cluster_processing_tasks(client)
+        worker_to_tasks = await _list_cluster_known_tasks(client)
 
         tasks_per_worker = defaultdict(list)
-        for worker, tasks in worker_to_tasks.items():
+        for worker, tasks in worker_to_tasks["processing"].items():
             for task_id, required_resources in tasks:
                 tasks_per_worker[worker].append(
                     DaskTask(
@@ -276,8 +287,8 @@ async def get_worker_used_resources(
 
     async with _scheduler_client(scheduler_url, authentication) as client:
         worker_url, _ = _dask_worker_from_ec2_instance(client, ec2_instance)
-        worker_to_tasks = await _list_cluster_processing_tasks(client)
-        worker_processing_tasks = worker_to_tasks.get(worker_url, [])
+        known_tasks = await _list_cluster_known_tasks(client)
+        worker_processing_tasks = known_tasks["processing"].get(worker_url, [])
 
         total_resources_used: collections.Counter[str] = collections.Counter()
         for _, task_resources in worker_processing_tasks:

From 711a7e9d36a30dd4756cb1fb302a91d9e9b6020a Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 15:35:28 +0200
Subject: [PATCH 38/93] moved naming

---
 .../dask_task_models_library/resource_constraints.py |  6 ++++--
 .../src/simcore_service_autoscaling/modules/dask.py  |  6 ++++--
 services/autoscaling/tests/unit/test_modules_dask.py | 12 ++++++------
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py b/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py
index 27b5bb1cb192..49f050a05a46 100644
--- a/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py
+++ b/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py
@@ -1,9 +1,11 @@
-from typing import Literal, TypedDict
+from typing import Final, Literal, TypedDict
 
 from .constants import DASK_TASK_EC2_RESOURCE_RESTRICTION_KEY
 
+DASK_WORKER_THREAD_RESOURCE_NAME: Final[str] = "threads"
 
-class DaskTaskResources(TypedDict):
+
+class DaskTaskResources(TypedDict, total=False):
     CPU: float
     RAM: int  # in bytes
     # threads is a constant of 1 (enforced by static type checkers via Literal)
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index 500eec402fa9..9f16ca74e425 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -9,7 +9,10 @@
 import dask.typing
 import distributed
 from aws_library.ec2 import EC2InstanceData, Resources
-from dask_task_models_library.resource_constraints import DaskTaskResources
+from dask_task_models_library.resource_constraints import (
+    DASK_WORKER_THREAD_RESOURCE_NAME,
+    DaskTaskResources,
+)
 from distributed.core import Status
 from models_library.clusters import ClusterAuthentication, TLSAuthentication
 from pydantic import AnyUrl, ByteSize, TypeAdapter
@@ -39,7 +42,6 @@ async def _wrap_client_async_routine(
 
 
 _DASK_SCHEDULER_CONNECT_TIMEOUT_S: Final[int] = 5
-DASK_WORKER_THREAD_RESOURCE_NAME: Final[str] = "threads"
 
 
 @contextlib.asynccontextmanager
diff --git a/services/autoscaling/tests/unit/test_modules_dask.py b/services/autoscaling/tests/unit/test_modules_dask.py
index 4f6b4ce73366..6cc5b98a1516 100644
--- a/services/autoscaling/tests/unit/test_modules_dask.py
+++ b/services/autoscaling/tests/unit/test_modules_dask.py
@@ -11,6 +11,9 @@
 import pytest
 from arrow import utcnow
 from aws_library.ec2 import Resources
+from dask_task_models_library.resource_constraints import (
+    DASK_WORKER_THREAD_RESOURCE_NAME,
+)
 from faker import Faker
 from models_library.clusters import (
     ClusterAuthentication,
@@ -31,7 +34,6 @@
     EC2InstanceData,
 )
 from simcore_service_autoscaling.modules.dask import (
-    DASK_WORKER_THREAD_RESOURCE_NAME,
     DaskMonitoringSettings,
     DaskTask,
     _scheduler_client,
@@ -122,15 +124,13 @@ async def test_list_unrunnable_tasks(
     # we have nothing running now
     assert await list_unrunnable_tasks(scheduler_url, scheduler_authentication) == []
     # start a task that cannot run
-    dask_task_impossible_resources = {"XRAM": 213}
+    dask_task_impossible_resources = DaskTaskResources(XRAM=213, threads=1)
     future = create_dask_task(dask_task_impossible_resources)
     assert future
     assert await list_unrunnable_tasks(scheduler_url, scheduler_authentication) == [
         DaskTask(
             task_id=future.key,
-            required_resources=(
-                dask_task_impossible_resources | {DASK_WORKER_THREAD_RESOURCE_NAME: 1}
-            ),
+            required_resources=(dask_task_impossible_resources),
         )
     ]
     # remove that future, will remove the task
@@ -168,7 +168,7 @@ def _add_fct(x: int, y: int) -> int:
         next(iter(dask_spec_cluster_client.scheduler_info()["workers"])): [
             DaskTask(
                 task_id=DaskTaskId(future_queued_task.key),
-                required_resources={DASK_WORKER_THREAD_RESOURCE_NAME: 1},
+                required_resources=DaskTaskResources(threads=1),
             )
         ]
     }

From b236f576ed69d1d3f5389484b145fa71c508daf6 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 16:06:12 +0200
Subject: [PATCH 39/93] more

---
 .../modules/dask.py                           | 25 ++++++-------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index 9f16ca74e425..78489740507b 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -208,15 +208,6 @@ async def list_unrunnable_tasks(
         DaskSchedulerNotFoundError
     """
 
-    def _list_tasks(
-        dask_scheduler: distributed.Scheduler,
-    ) -> dict[dask.typing.Key, dict[str, float]]:
-        # NOTE: task.key can be a byte, str, or a tuple
-        return {
-            task.key: task.resource_restrictions or {}
-            for task in dask_scheduler.unrunnable
-        }
-
     async with _scheduler_client(scheduler_url, authentication) as client:
         known_tasks = await _list_cluster_known_tasks(client)
         list_of_tasks = known_tasks["unrunnable"]
@@ -224,8 +215,7 @@ def _list_tasks(
         return [
             DaskTask(
                 task_id=_dask_key_to_dask_task_id(task_id),
-                required_resources=task_resources
-                | {DASK_WORKER_THREAD_RESOURCE_NAME: 1},
+                required_resources=task_resources,
             )
             for task_id, task_resources in list_of_tasks.items()
         ]
@@ -291,24 +281,23 @@ async def get_worker_used_resources(
         worker_url, _ = _dask_worker_from_ec2_instance(client, ec2_instance)
         known_tasks = await _list_cluster_known_tasks(client)
         worker_processing_tasks = known_tasks["processing"].get(worker_url, [])
+        if not worker_processing_tasks:
+            return Resources.create_as_empty()
 
         total_resources_used: collections.Counter[str] = collections.Counter()
         for _, task_resources in worker_processing_tasks:
             total_resources_used.update(task_resources)
 
         _logger.debug("found %s for %s", f"{total_resources_used=}", f"{worker_url=}")
-        worker_used_resources = Resources(
+        return Resources(
             cpus=total_resources_used.get("CPU", 0),
             ram=TypeAdapter(ByteSize).validate_python(
                 total_resources_used.get("RAM", 0)
             ),
+            generic_resources={
+                k: v for k, v in total_resources_used.items() if k not in {"CPU", "RAM"}
+            },
         )
-        if worker_processing_tasks:
-            worker_used_resources.generic_resources[
-                DASK_WORKER_THREAD_RESOURCE_NAME
-            ] = len(worker_processing_tasks)
-
-        return worker_used_resources
 
 
 async def compute_cluster_total_resources(

From d9682ff755c8e0d075afb20a1c7ebcf7dc923761 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 16:51:00 +0200
Subject: [PATCH 40/93] mypy

---
 .../src/simcore_service_autoscaling/modules/dask.py           | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index 78489740507b..fc6adfb6014a 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -149,8 +149,8 @@ def _list_on_scheduler(
                 ) | {DASK_WORKER_THREAD_RESOURCE_NAME: 1}
 
         return DaskClusterTasks(
-            processing=dict(worker_to_processing_tasks),
-            unrunnable=unrunnable_tasks,
+            processing=worker_to_processing_tasks,  # type: ignore[typeddict-item]
+            unrunnable=unrunnable_tasks,  # type: ignore[typeddict-item]
         )
 
     list_of_tasks: DaskClusterTasks = await client.run_on_scheduler(_list_on_scheduler)

From fb5f001549e2de61b57ea83aba1152306a177d3f Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 16:53:39 +0200
Subject: [PATCH 41/93] mypy

---
 .../modules/cluster_scaling/_utils_computational.py          | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
index 4fb76ee5e129..6a351a4d1c9e 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
@@ -3,8 +3,10 @@
 
 from aws_library.ec2 import Resources
 from dask_task_models_library.resource_constraints import (
+    DASK_WORKER_THREAD_RESOURCE_NAME,
     get_ec2_instance_type_from_resources,
 )
+from pydantic import ByteSize
 
 from ...models import DaskTask
 
@@ -17,7 +19,8 @@
 def resources_from_dask_task(task: DaskTask) -> Resources:
     return Resources(
         cpus=task.required_resources.get("CPU", _DEFAULT_MAX_CPU),
-        ram=task.required_resources.get("RAM", _DEFAULT_MAX_RAM),
+        ram=ByteSize(task.required_resources.get("RAM", _DEFAULT_MAX_RAM)),
+        generic_resources={DASK_WORKER_THREAD_RESOURCE_NAME: 1},
     )
 
 

From adfdb9c3a01d7f9ac049bc1de5315c6243c153a5 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 26 Sep 2025 17:13:27 +0200
Subject: [PATCH 42/93] fix test

---
 .../cluster_scaling/_utils_computational.py   | 29 +++++++++++++++----
 ...les_cluster_scaling_utils_computational.py | 23 ++++++++++++---
 2 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
index 6a351a4d1c9e..3584c7afd290 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
@@ -3,7 +3,7 @@
 
 from aws_library.ec2 import Resources
 from dask_task_models_library.resource_constraints import (
-    DASK_WORKER_THREAD_RESOURCE_NAME,
+    DaskTaskResources,
     get_ec2_instance_type_from_resources,
 )
 from pydantic import ByteSize
@@ -15,13 +15,32 @@
 _DEFAULT_MAX_CPU: Final[float] = 1
 _DEFAULT_MAX_RAM: Final[int] = 1024
 
+_DASK_TO_RESOURCE_NAME_MAPPING: Final[dict[str, str]] = {
+    "CPU": "cpus",
+    "RAM": "ram",
+}
+_DEFAULT_DASK_RESOURCES: Final[DaskTaskResources] = DaskTaskResources(
+    CPU=_DEFAULT_MAX_CPU, RAM=ByteSize(_DEFAULT_MAX_RAM), threads=1
+)
+
 
 def resources_from_dask_task(task: DaskTask) -> Resources:
-    return Resources(
-        cpus=task.required_resources.get("CPU", _DEFAULT_MAX_CPU),
-        ram=ByteSize(task.required_resources.get("RAM", _DEFAULT_MAX_RAM)),
-        generic_resources={DASK_WORKER_THREAD_RESOURCE_NAME: 1},
+    task_resources = (
+        _DEFAULT_DASK_RESOURCES | task.required_resources
+    )  # merge with defaults
+
+    return Resources.from_flat_dict(
+        {_DASK_TO_RESOURCE_NAME_MAPPING.get(k, k): v for k, v in task_resources.items()}
     )
+    #     ({
+    #         "cpus": task.required_resources.get("CPU", _DEFAULT_MAX_CPU),
+    #         "ram": task.required_resources.get("RAM", _DEFAULT_MAX_RAM),
+    #     }
+    # )
+    # return Resources(
+    #     cpus=task.required_resources.get("CPU", _DEFAULT_MAX_CPU),
+    #     ram=ByteSize(task.required_resources.get("RAM", _DEFAULT_MAX_RAM)),
+    # )
 
 
 def get_task_instance_restriction(task: DaskTask) -> str | None:
diff --git a/services/autoscaling/tests/unit/test_modules_cluster_scaling_utils_computational.py b/services/autoscaling/tests/unit/test_modules_cluster_scaling_utils_computational.py
index e412487f4ea6..e051766dae31 100644
--- a/services/autoscaling/tests/unit/test_modules_cluster_scaling_utils_computational.py
+++ b/services/autoscaling/tests/unit/test_modules_cluster_scaling_utils_computational.py
@@ -6,6 +6,9 @@
 
 import pytest
 from aws_library.ec2 import Resources
+from dask_task_models_library.resource_constraints import (
+    DASK_WORKER_THREAD_RESOURCE_NAME,
+)
 from pydantic import ByteSize, TypeAdapter
 from simcore_service_autoscaling.models import DaskTask, DaskTaskResources
 from simcore_service_autoscaling.modules.cluster_scaling._utils_computational import (
@@ -23,13 +26,16 @@
             Resources(
                 cpus=_DEFAULT_MAX_CPU,
                 ram=TypeAdapter(ByteSize).validate_python(_DEFAULT_MAX_RAM),
+                generic_resources={DASK_WORKER_THREAD_RESOURCE_NAME: 1},
             ),
             id="missing resources returns defaults",
         ),
         pytest.param(
             DaskTask(task_id="fake", required_resources={"CPU": 2.5}),
             Resources(
-                cpus=2.5, ram=TypeAdapter(ByteSize).validate_python(_DEFAULT_MAX_RAM)
+                cpus=2.5,
+                ram=TypeAdapter(ByteSize).validate_python(_DEFAULT_MAX_RAM),
+                generic_resources={DASK_WORKER_THREAD_RESOURCE_NAME: 1},
             ),
             id="only cpus defined",
         ),
@@ -38,16 +44,25 @@
                 task_id="fake",
                 required_resources={"CPU": 2.5, "RAM": 2 * 1024 * 1024 * 1024},
             ),
-            Resources(cpus=2.5, ram=TypeAdapter(ByteSize).validate_python("2GiB")),
+            Resources(
+                cpus=2.5,
+                ram=TypeAdapter(ByteSize).validate_python("2GiB"),
+                generic_resources={DASK_WORKER_THREAD_RESOURCE_NAME: 1},
+            ),
             id="cpu and ram defined",
         ),
         pytest.param(
             DaskTask(
                 task_id="fake",
-                required_resources={"CPU": 2.5, "ram": 2 * 1024 * 1024 * 1024},
+                required_resources={"CPU": 2.5, "xram": 2 * 1024 * 1024 * 1024},  # type: ignore
             ),
             Resources(
-                cpus=2.5, ram=TypeAdapter(ByteSize).validate_python(_DEFAULT_MAX_RAM)
+                cpus=2.5,
+                ram=TypeAdapter(ByteSize).validate_python(_DEFAULT_MAX_RAM),
+                generic_resources={
+                    DASK_WORKER_THREAD_RESOURCE_NAME: 1,
+                    "xram": 2 * 1024 * 1024 * 1024,
+                },
             ),
             id="invalid naming",
         ),

From a402559bd3cc3e2b3aab7a0308370bfb917fc930 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 16 Oct 2025 17:08:56 +0200
Subject: [PATCH 43/93] mypy

---
 .../cluster_scaling/_utils_computational.py      | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
index 3584c7afd290..01fcff71523f 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Final
+from typing import Final, cast
 
 from aws_library.ec2 import Resources
 from dask_task_models_library.resource_constraints import (
@@ -30,17 +30,11 @@ def resources_from_dask_task(task: DaskTask) -> Resources:
     )  # merge with defaults
 
     return Resources.from_flat_dict(
-        {_DASK_TO_RESOURCE_NAME_MAPPING.get(k, k): v for k, v in task_resources.items()}
+        {
+            _DASK_TO_RESOURCE_NAME_MAPPING.get(k, k): cast(int | float | str, v)
+            for k, v in task_resources.items()
+        }
     )
-    #     ({
-    #         "cpus": task.required_resources.get("CPU", _DEFAULT_MAX_CPU),
-    #         "ram": task.required_resources.get("RAM", _DEFAULT_MAX_RAM),
-    #     }
-    # )
-    # return Resources(
-    #     cpus=task.required_resources.get("CPU", _DEFAULT_MAX_CPU),
-    #     ram=ByteSize(task.required_resources.get("RAM", _DEFAULT_MAX_RAM)),
-    # )
 
 
 def get_task_instance_restriction(task: DaskTask) -> str | None:

From 97a195b64ae505fb9f00e9b8e76ee4de729ca756 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 16 Oct 2025 17:09:03 +0200
Subject: [PATCH 44/93] improve docs

---
 packages/aws-library/src/aws_library/ec2/_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/aws-library/src/aws_library/ec2/_models.py b/packages/aws-library/src/aws_library/ec2/_models.py
index 5045d34541df..20960b5b6862 100644
--- a/packages/aws-library/src/aws_library/ec2/_models.py
+++ b/packages/aws-library/src/aws_library/ec2/_models.py
@@ -59,7 +59,7 @@ def __ge__(self, other: "Resources") -> bool:
             a = self.generic_resources.get(k)
             b = other.generic_resources.get(
                 k, a
-            )  # NOTE: get from other, default to a so that non-existing keys are considered equal
+            )  # NOTE: get from other, default to "a" resources so that non-existing keys can be compared as equal
             if isinstance(a, int | float) and isinstance(b, int | float):
                 if not (a >= b):
                     return False

From 89c547cc760d9258add9ac64f1796809c48c6fc4 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 16 Oct 2025 17:11:18 +0200
Subject: [PATCH 45/93] remove todo

---
 .../modules/cluster_scaling/_provider_computational.py       | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
index f73fa60a3c7d..7d12ae19e6e7 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
@@ -90,10 +90,7 @@ def get_task_required_resources(self, task) -> Resources:
         assert self  # nosec
         # NOTE: a dask worker can take a task if it has a free thread, regardless of its resources
         #       so we need to be careful when interpreting the resources, adding the thread here will mimick this
-        task_required_resources = utils.resources_from_dask_task(task)
-        # TODO: should we add a generic resource for threads?
-        # task_required_resources.generic_resources[_DASK_WORKER_THREAD_RESOURCE_NAME] = 1
-        return task_required_resources
+        return utils.resources_from_dask_task(task)
 
     async def get_task_defined_instance(
         self, app: FastAPI, task

From 6179fe79df5a3bc54e288bb9a1d111dad7a8d563 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 16 Oct 2025 17:46:29 +0200
Subject: [PATCH 46/93] re-added gt operator

---
 .../src/aws_library/ec2/_models.py            |  9 +++
 packages/aws-library/tests/test_ec2_models.py | 79 +++++++++++++++++++
 2 files changed, 88 insertions(+)

diff --git a/packages/aws-library/src/aws_library/ec2/_models.py b/packages/aws-library/src/aws_library/ec2/_models.py
index 20960b5b6862..4ca976618747 100644
--- a/packages/aws-library/src/aws_library/ec2/_models.py
+++ b/packages/aws-library/src/aws_library/ec2/_models.py
@@ -69,6 +69,15 @@ def __ge__(self, other: "Resources") -> bool:
                 return False
         return True
 
+    def __gt__(self, other: "Resources") -> bool:
+        """operator for > comparison
+        if self has greater resources than other, returns True
+        Note that generic_resources are compared only if they are numeric
+        Non-numeric generic resources must be equal in both or only defined in self
+        to be considered greater
+        """
+        return self >= other and self != other
+
     def __add__(self, other: "Resources") -> "Resources":
         """operator for adding two Resources
         Note that only numeric generic resources are added
diff --git a/packages/aws-library/tests/test_ec2_models.py b/packages/aws-library/tests/test_ec2_models.py
index b83b57e75f57..00767b220f7f 100644
--- a/packages/aws-library/tests/test_ec2_models.py
+++ b/packages/aws-library/tests/test_ec2_models.py
@@ -96,6 +96,85 @@ def test_resources_ge_operator(
     assert (a >= b) is a_greater_or_equal_than_b
 
 
+@pytest.mark.parametrize(
+    "a,b,a_greater_than_b",
+    [
+        (
+            Resources(cpus=0.2, ram=ByteSize(0)),
+            Resources(cpus=0.1, ram=ByteSize(0)),
+            True,
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(0)),
+            Resources(cpus=0.1, ram=ByteSize(0)),
+            False,
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1)),
+            Resources(cpus=0.1, ram=ByteSize(0)),
+            True,
+        ),
+        (
+            Resources(cpus=0.05, ram=ByteSize(1)),
+            Resources(cpus=0.1, ram=ByteSize(0)),
+            False,
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(0)),
+            Resources(cpus=0.1, ram=ByteSize(1)),
+            False,
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(0), generic_resources={"GPU": 1}),
+            Resources(cpus=0.1, ram=ByteSize(1)),
+            False,  # ram is not enough
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1}),
+            Resources(cpus=0.1, ram=ByteSize(1)),
+            True,
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1}),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1}),
+            False,
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1}),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 2}),
+            False,
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1)),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 2}),
+            False,
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": "2"}),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 2}),
+            False,
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "yes"}),
+            Resources(cpus=0.1, ram=ByteSize(1)),
+            True,
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "yes"}),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "yes"}),
+            False,
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1)),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "yes"}),
+            False,
+        ),
+    ],
+)
+def test_resources_gt_operator(a: Resources, b: Resources, a_greater_than_b: bool):
+    assert (a > b) is a_greater_than_b
+
+
 @pytest.mark.parametrize(
     "a,b,result",
     [

From 03a26142bcffe33d949e727809d599efe9e7741f Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 16 Oct 2025 18:06:00 +0200
Subject: [PATCH 47/93] use Required

---
 .../dask_task_models_library/resource_constraints.py | 11 +++++++----
 .../tests/test_resource_constraints.py               | 12 ++++++++++++
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py b/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py
index 49f050a05a46..55c700541a73 100644
--- a/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py
+++ b/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py
@@ -1,4 +1,4 @@
-from typing import Final, Literal, TypedDict
+from typing import Final, Literal, Required, TypedDict
 
 from .constants import DASK_TASK_EC2_RESOURCE_RESTRICTION_KEY
 
@@ -6,10 +6,13 @@
 
 
 class DaskTaskResources(TypedDict, total=False):
-    CPU: float
-    RAM: int  # in bytes
+    CPU: Required[float]
+    RAM: Required[int]  # in bytes
     # threads is a constant of 1 (enforced by static type checkers via Literal)
-    threads: Literal[1]
+    # NOTE: a dask worker can take a task if it has a free thread,
+    # regardless of its resources so we need to be careful when interpreting
+    # the resources, adding the thread here will mimick this
+    threads: Required[Literal[1]]
 
 
 def create_ec2_resource_constraint_key(ec2_instance_type: str) -> str:
diff --git a/packages/dask-task-models-library/tests/test_resource_constraints.py b/packages/dask-task-models-library/tests/test_resource_constraints.py
index 9a2c1e59e26b..121d2b740d23 100644
--- a/packages/dask-task-models-library/tests/test_resource_constraints.py
+++ b/packages/dask-task-models-library/tests/test_resource_constraints.py
@@ -1,11 +1,23 @@
 from dask_task_models_library.constants import DASK_TASK_EC2_RESOURCE_RESTRICTION_KEY
 from dask_task_models_library.resource_constraints import (
+    DaskTaskResources,
     create_ec2_resource_constraint_key,
     get_ec2_instance_type_from_resources,
 )
 from faker import Faker
 
 
+def test_dask_task_resource(faker: Faker):
+    task_resources = DaskTaskResources(
+        CPU=faker.pyfloat(min_value=0.1, max_value=100),
+        RAM=faker.pyint(min_value=1024, max_value=1024**3),
+        threads=1,
+    )
+    assert task_resources["threads"] == 1
+    assert task_resources["CPU"] > 0
+    assert task_resources["RAM"] >= 1024
+
+
 def test_create_ec2_resource_constraint_key(faker: Faker):
     faker_instance_type = faker.pystr()
     assert (

From 8e0f9055727de86163f0f930fc66122c23e3ba93 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 16 Oct 2025 18:06:53 +0200
Subject: [PATCH 48/93] improve docs

---
 .../modules/cluster_scaling/_utils_computational.py             | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
index 01fcff71523f..8ec895348108 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
@@ -27,7 +27,7 @@
 def resources_from_dask_task(task: DaskTask) -> Resources:
     task_resources = (
         _DEFAULT_DASK_RESOURCES | task.required_resources
-    )  # merge with defaults
+    )  # merge with defaults to ensure there is always some minimal resource defined
 
     return Resources.from_flat_dict(
         {

From 74d8ca81b18581810397c70d3fb9c310175d1cc0 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 16 Oct 2025 18:07:10 +0200
Subject: [PATCH 49/93] moved docs

---
 .../modules/cluster_scaling/_provider_computational.py          | 2 --
 1 file changed, 2 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
index 7d12ae19e6e7..243674344a39 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
@@ -88,8 +88,6 @@ async def list_unrunnable_tasks(self, app: FastAPI) -> list[DaskTask]:
 
     def get_task_required_resources(self, task) -> Resources:
         assert self  # nosec
-        # NOTE: a dask worker can take a task if it has a free thread, regardless of its resources
-        #       so we need to be careful when interpreting the resources, adding the thread here will mimick this
         return utils.resources_from_dask_task(task)
 
     async def get_task_defined_instance(

From 1158e99423292fa1c88e8fe1b5f7b74cc9e70a68 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 16 Oct 2025 18:16:05 +0200
Subject: [PATCH 50/93] added mapping

---
 .../src/aws_library/ec2/_models.py            | 21 ++++++++++++++-----
 packages/aws-library/tests/test_ec2_models.py |  7 +++++++
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/packages/aws-library/src/aws_library/ec2/_models.py b/packages/aws-library/src/aws_library/ec2/_models.py
index 4ca976618747..27e8024e5a42 100644
--- a/packages/aws-library/src/aws_library/ec2/_models.py
+++ b/packages/aws-library/src/aws_library/ec2/_models.py
@@ -141,12 +141,23 @@ def as_flat_dict(self) -> dict[str, int | float | str]:
         return base
 
     @classmethod
-    def from_flat_dict(cls, data: dict[str, int | float | str]) -> "Resources":
-        """Inverse of as_flat_dict"""
-        generic_resources = {k: v for k, v in data.items() if k not in {"cpus", "ram"}}
+    def from_flat_dict(
+        cls,
+        data: dict[str, int | float | str],
+        *,
+        mapping: dict[str, str] | None = None,
+    ) -> "Resources":
+        """Inverse of as_flat_dict with optional key mapping"""
+        mapped_data = data
+        if mapping:
+            mapped_data = {mapping.get(k, k): v for k, v in data.items()}
+        generic_resources = {
+            k: v for k, v in mapped_data.items() if k not in {"cpus", "ram"}
+        }
+
         return cls(
-            cpus=float(data.get("cpus", 0)),
-            ram=ByteSize(data.get("ram", 0)),
+            cpus=float(mapped_data.get("cpus", 0)),
+            ram=ByteSize(mapped_data.get("ram", 0)),
             generic_resources=generic_resources,
         )
 
diff --git a/packages/aws-library/tests/test_ec2_models.py b/packages/aws-library/tests/test_ec2_models.py
index 00767b220f7f..131b24da87f8 100644
--- a/packages/aws-library/tests/test_ec2_models.py
+++ b/packages/aws-library/tests/test_ec2_models.py
@@ -298,6 +298,13 @@ def test_resources_flat_dict():
     reconstructed = Resources.from_flat_dict(flat)
     assert reconstructed == r
 
+    # test with mapping
+    flat_with_oter_names = {"CPU": 0.1, "RAM": 1024, "GPU": 2, "SSE": "yes"}
+    reconstructed2 = Resources.from_flat_dict(
+        flat_with_oter_names, mapping={"CPU": "cpus", "RAM": "ram"}
+    )
+    assert reconstructed2 == r
+
 
 @pytest.mark.parametrize("ec2_tag_key", ["", "/", " ", ".", "..", "_index"])
 def test_aws_tag_key_invalid(ec2_tag_key: str):

From 49b4bb9ded8af3afb9ae5233443084286edff17e Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 16 Oct 2025 18:17:40 +0200
Subject: [PATCH 51/93] added mapping

---
 .../modules/cluster_scaling/_utils_computational.py        | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
index 8ec895348108..80e846398096 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
@@ -1,5 +1,5 @@
 import logging
-from typing import Final, cast
+from typing import Final
 
 from aws_library.ec2 import Resources
 from dask_task_models_library.resource_constraints import (
@@ -30,10 +30,7 @@ def resources_from_dask_task(task: DaskTask) -> Resources:
     )  # merge with defaults to ensure there is always some minimal resource defined
 
     return Resources.from_flat_dict(
-        {
-            _DASK_TO_RESOURCE_NAME_MAPPING.get(k, k): cast(int | float | str, v)
-            for k, v in task_resources.items()
-        }
+        task_resources.items(), mapping=_DASK_TO_RESOURCE_NAME_MAPPING
     )
 
 

From 83db960fb7a2d07aa6e15d45025b1361d661c593 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 16 Oct 2025 18:19:58 +0200
Subject: [PATCH 52/93] improve error

---
 .../src/simcore_service_autoscaling/modules/dask.py           | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index fc6adfb6014a..e759a0c08027 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -104,8 +104,8 @@ def _find_by_worker_host(
         _, details = dask_worker
         if match := re.match(DASK_NAME_PATTERN, details["name"]):
             return bool(match.group("private_ip") == node_hostname)
-        _logger.warning(
-            "Unexpected worker name format: %s. TIP: this should be investigated",
+        _logger.error(
+            "Unexpected worker name format: %s. TIP: this should be investigated as this is unexpected",
             details["name"],
         )
         return False

From 4d2281f1fc5abaf25051650ff78948cfee36afe6 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 16 Oct 2025 18:24:27 +0200
Subject: [PATCH 53/93] make private

---
 .../src/simcore_service_autoscaling/modules/dask.py   | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index e759a0c08027..d2ef18333574 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -121,17 +121,18 @@ def _find_by_worker_host(
     return next(iter(filtered_workers.items()))
 
 
-class DaskClusterTasks(TypedDict):
+class _DaskClusterTasks(TypedDict):
     processing: dict[DaskWorkerUrl, list[tuple[dask.typing.Key, DaskTaskResources]]]
     unrunnable: dict[dask.typing.Key, DaskTaskResources]
 
 
 async def _list_cluster_known_tasks(
     client: distributed.Client,
-) -> DaskClusterTasks:
+) -> _DaskClusterTasks:
     def _list_on_scheduler(
         dask_scheduler: distributed.Scheduler,
-    ) -> DaskClusterTasks:
+    ) -> _DaskClusterTasks:
+
         worker_to_processing_tasks = defaultdict(list)
         unrunnable_tasks = {}
         for task_key, task_state in dask_scheduler.tasks.items():
@@ -148,12 +149,12 @@ def _list_on_scheduler(
                     task_state.resource_restrictions or {}
                 ) | {DASK_WORKER_THREAD_RESOURCE_NAME: 1}
 
-        return DaskClusterTasks(
+        return _DaskClusterTasks(
             processing=worker_to_processing_tasks,  # type: ignore[typeddict-item]
             unrunnable=unrunnable_tasks,  # type: ignore[typeddict-item]
         )
 
-    list_of_tasks: DaskClusterTasks = await client.run_on_scheduler(_list_on_scheduler)
+    list_of_tasks: _DaskClusterTasks = await client.run_on_scheduler(_list_on_scheduler)
     _logger.debug("found tasks: %s", list_of_tasks)
 
     return list_of_tasks

From 7418819b9bbd3177fef3f2649f0eb93e3b86f0fb Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 17 Oct 2025 08:30:28 +0200
Subject: [PATCH 54/93] simplify

---
 .../simcore_service_autoscaling/modules/dask.py   | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index d2ef18333574..4f7495c0162d 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -28,6 +28,9 @@
     node_host_name_from_ec2_private_dns,
     node_ip_from_ec2_private_dns,
 )
+from .cluster_scaling._utils_computational import (
+    resources_from_dask_task,
+)
 
 _logger = logging.getLogger(__name__)
 
@@ -285,20 +288,12 @@ async def get_worker_used_resources(
         if not worker_processing_tasks:
             return Resources.create_as_empty()
 
-        total_resources_used: collections.Counter[str] = collections.Counter()
+        total_resources_used: collections.Counter = collections.Counter()
         for _, task_resources in worker_processing_tasks:
             total_resources_used.update(task_resources)
 
         _logger.debug("found %s for %s", f"{total_resources_used=}", f"{worker_url=}")
-        return Resources(
-            cpus=total_resources_used.get("CPU", 0),
-            ram=TypeAdapter(ByteSize).validate_python(
-                total_resources_used.get("RAM", 0)
-            ),
-            generic_resources={
-                k: v for k, v in total_resources_used.items() if k not in {"CPU", "RAM"}
-            },
-        )
+        return resources_from_dask_task(total_resources_used)
 
 
 async def compute_cluster_total_resources(

From 3f930bd9b691857ddb1d666ee766d303616dee39 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 17 Oct 2025 08:33:36 +0200
Subject: [PATCH 55/93] simplify

---
 .../modules/cluster_scaling/_utils_computational.py         | 4 ++--
 .../src/simcore_service_autoscaling/modules/dask.py         | 6 ++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
index 80e846398096..0e6c8dbed549 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
@@ -15,7 +15,7 @@
 _DEFAULT_MAX_CPU: Final[float] = 1
 _DEFAULT_MAX_RAM: Final[int] = 1024
 
-_DASK_TO_RESOURCE_NAME_MAPPING: Final[dict[str, str]] = {
+DASK_TO_RESOURCE_NAME_MAPPING: Final[dict[str, str]] = {
     "CPU": "cpus",
     "RAM": "ram",
 }
@@ -30,7 +30,7 @@ def resources_from_dask_task(task: DaskTask) -> Resources:
     )  # merge with defaults to ensure there is always some minimal resource defined
 
     return Resources.from_flat_dict(
-        task_resources.items(), mapping=_DASK_TO_RESOURCE_NAME_MAPPING
+        task_resources.items(), mapping=DASK_TO_RESOURCE_NAME_MAPPING
     )
 
 
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index 4f7495c0162d..5570fccee15f 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -29,7 +29,7 @@
     node_ip_from_ec2_private_dns,
 )
 from .cluster_scaling._utils_computational import (
-    resources_from_dask_task,
+    DASK_TO_RESOURCE_NAME_MAPPING,
 )
 
 _logger = logging.getLogger(__name__)
@@ -293,7 +293,9 @@ async def get_worker_used_resources(
             total_resources_used.update(task_resources)
 
         _logger.debug("found %s for %s", f"{total_resources_used=}", f"{worker_url=}")
-        return resources_from_dask_task(total_resources_used)
+        return Resources.from_flat_dict(
+            dict(total_resources_used), mapping=DASK_TO_RESOURCE_NAME_MAPPING
+        )
 
 
 async def compute_cluster_total_resources(

From ec8cbaf614e4665b8e111bb1797d1dd4a3f4f3e5 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 17 Oct 2025 08:46:54 +0200
Subject: [PATCH 56/93] fix computation

---
 .../modules/dask.py                           | 23 ++++++++-----------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index 5570fccee15f..1d921bbb3ba7 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -15,7 +15,7 @@
 )
 from distributed.core import Status
 from models_library.clusters import ClusterAuthentication, TLSAuthentication
-from pydantic import AnyUrl, ByteSize, TypeAdapter
+from pydantic import AnyUrl
 
 from ..core.errors import (
     DaskNoWorkersError,
@@ -306,7 +306,7 @@ async def compute_cluster_total_resources(
     if not instances:
         return Resources.create_as_empty()
     async with _scheduler_client(scheduler_url, authentication) as client:
-        instance_host_resources_map = {
+        ec2_instance_resources_map = {
             node_ip_from_ec2_private_dns(i): i.resources for i in instances
         }
         scheduler_info = client.scheduler_info()
@@ -315,20 +315,17 @@ async def compute_cluster_total_resources(
         workers: dict[str, Any] = scheduler_info["workers"]
         cluster_resources = Resources.create_as_empty()
         for worker_details in workers.values():
-            if worker_details["host"] not in instance_host_resources_map:
+            if worker_details["host"] not in ec2_instance_resources_map:
                 continue
+            # get dask information about resources
             worker_dask_resources = worker_details["resources"]
             worker_threads = worker_details["nthreads"]
-            cluster_resources += Resources(
-                cpus=worker_dask_resources.get(
-                    "CPU", instance_host_resources_map[worker_details["host"]].cpus
-                ),
-                ram=TypeAdapter(ByteSize).validate_python(
-                    worker_dask_resources.get(
-                        "RAM", instance_host_resources_map[worker_details["host"]].ram
-                    )
-                ),
-                generic_resources={DASK_WORKER_THREAD_RESOURCE_NAME: worker_threads},
+            worker_dask_resources = {
+                **worker_dask_resources,
+                DASK_WORKER_THREAD_RESOURCE_NAME: worker_threads,
+            }
+            cluster_resources += Resources.from_flat_dict(
+                worker_dask_resources.items(), mapping=DASK_TO_RESOURCE_NAME_MAPPING
             )
 
         return cluster_resources

From f40f5dbae9bade6476917a21be3e3a6571fb0451 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 17 Oct 2025 08:47:37 +0200
Subject: [PATCH 57/93] type

---
 .../autoscaling/src/simcore_service_autoscaling/modules/dask.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index 1d921bbb3ba7..c105057254c2 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -320,7 +320,7 @@ async def compute_cluster_total_resources(
             # get dask information about resources
             worker_dask_resources = worker_details["resources"]
             worker_threads = worker_details["nthreads"]
-            worker_dask_resources = {
+            worker_dask_resources: dict[str, int | float | str] = {
                 **worker_dask_resources,
                 DASK_WORKER_THREAD_RESOURCE_NAME: worker_threads,
             }

From fd3a58d8f9df4df6ba28032e31b908cf09ba5fec Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 17 Oct 2025 08:48:21 +0200
Subject: [PATCH 58/93] type

---
 .../autoscaling/src/simcore_service_autoscaling/modules/dask.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index c105057254c2..dd6dffde0aeb 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -325,7 +325,7 @@ async def compute_cluster_total_resources(
                 DASK_WORKER_THREAD_RESOURCE_NAME: worker_threads,
             }
             cluster_resources += Resources.from_flat_dict(
-                worker_dask_resources.items(), mapping=DASK_TO_RESOURCE_NAME_MAPPING
+                worker_dask_resources, mapping=DASK_TO_RESOURCE_NAME_MAPPING
             )
 
         return cluster_resources

From b114585b8bad11557a34594931809410fcb7fbf2 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 17 Oct 2025 08:50:30 +0200
Subject: [PATCH 59/93] no need to call items

---
 .../modules/cluster_scaling/_utils_computational.py             | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
index 0e6c8dbed549..4b32fbbbede1 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
@@ -30,7 +30,7 @@ def resources_from_dask_task(task: DaskTask) -> Resources:
     )  # merge with defaults to ensure there is always some minimal resource defined
 
     return Resources.from_flat_dict(
-        task_resources.items(), mapping=DASK_TO_RESOURCE_NAME_MAPPING
+        task_resources, mapping=DASK_TO_RESOURCE_NAME_MAPPING
     )
 
 

From f852935c1a0a9ad1f1153974d13ee8539d3266b9 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 17 Oct 2025 08:51:43 +0200
Subject: [PATCH 60/93] revert

---
 .../src/simcore_service_autoscaling/utils/cluster_scaling.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py b/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py
index 1cff28a0bb46..13c25dcd2112 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py
@@ -109,7 +109,7 @@ def find_selected_instance_type_for_task(
     selected_instance = filtered_instances[0]
 
     # check that the assigned resources and the machine resource fit
-    if not (task_required_resources <= selected_instance.resources):
+    if task_required_resources > selected_instance.resources:
         raise TaskRequirementsAboveRequiredEC2InstanceTypeError(
             task=task,
             instance_type=selected_instance,

From b02ec9732487e8345b3f1b8b954cc976a98ddee7 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 17 Oct 2025 14:18:29 +0200
Subject: [PATCH 61/93] refactor

---
 .../aws-library/src/aws_library/ec2/__init__.py     |  2 ++
 packages/aws-library/src/aws_library/ec2/_models.py | 10 +++++-----
 .../modules/cluster_scaling/_utils_computational.py |  7 ++++---
 .../src/simcore_service_autoscaling/modules/dask.py | 13 ++++++-------
 4 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/packages/aws-library/src/aws_library/ec2/__init__.py b/packages/aws-library/src/aws_library/ec2/__init__.py
index 0acff01ff0d6..127a6dd076db 100644
--- a/packages/aws-library/src/aws_library/ec2/__init__.py
+++ b/packages/aws-library/src/aws_library/ec2/__init__.py
@@ -17,6 +17,7 @@
     EC2InstanceData,
     EC2InstanceType,
     EC2Tags,
+    GenericResourceValueType,
     Resources,
 )
 
@@ -36,6 +37,7 @@
     "EC2NotConnectedError",
     "EC2RuntimeError",
     "EC2Tags",
+    "GenericResourceValueType",
     "Resources",
     "SimcoreEC2API",
 )
diff --git a/packages/aws-library/src/aws_library/ec2/_models.py b/packages/aws-library/src/aws_library/ec2/_models.py
index 27e8024e5a42..6d4ff3a7cdca 100644
--- a/packages/aws-library/src/aws_library/ec2/_models.py
+++ b/packages/aws-library/src/aws_library/ec2/_models.py
@@ -22,14 +22,14 @@
 from pydantic.config import JsonDict
 from types_aiobotocore_ec2.literals import InstanceStateNameType, InstanceTypeType
 
-GenericResourceValue: TypeAlias = StrictInt | StrictFloat | str
+GenericResourceValueType: TypeAlias = StrictInt | StrictFloat | str
 
 
 class Resources(BaseModel, frozen=True):
     cpus: NonNegativeFloat
     ram: ByteSize
     generic_resources: Annotated[
-        dict[str, GenericResourceValue],
+        dict[str, GenericResourceValueType],
         Field(
             default_factory=dict,
             description=(
@@ -83,7 +83,7 @@ def __add__(self, other: "Resources") -> "Resources":
         Note that only numeric generic resources are added
         Non-numeric generic resources are ignored
         """
-        merged: dict[str, GenericResourceValue] = {}
+        merged: dict[str, GenericResourceValueType] = {}
         keys = set(self.generic_resources) | set(other.generic_resources)
         for k in keys:
             a = self.generic_resources.get(k)
@@ -107,7 +107,7 @@ def __sub__(self, other: "Resources") -> "Resources":
         Note that only numeric generic resources are subtracted
         Non-numeric generic resources are ignored
         """
-        merged: dict[str, GenericResourceValue] = {}
+        merged: dict[str, GenericResourceValueType] = {}
         keys = set(self.generic_resources) | set(other.generic_resources)
         for k in keys:
             a = self.generic_resources.get(k)
@@ -129,7 +129,7 @@ def __sub__(self, other: "Resources") -> "Resources":
     def __hash__(self) -> int:
         """Deterministic hash including cpus, ram (in bytes) and generic_resources."""
         # sort generic_resources items to ensure order-independent hashing
-        generic_items: tuple[tuple[str, GenericResourceValue], ...] = tuple(
+        generic_items: tuple[tuple[str, GenericResourceValueType], ...] = tuple(
             sorted(self.generic_resources.items())
         )
         return hash((self.cpus, self.ram, generic_items))
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
index 4b32fbbbede1..f5ed682f6669 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
@@ -1,7 +1,7 @@
 import logging
-from typing import Final
+from typing import Final, cast
 
-from aws_library.ec2 import Resources
+from aws_library.ec2 import GenericResourceValueType, Resources
 from dask_task_models_library.resource_constraints import (
     DaskTaskResources,
     get_ec2_instance_type_from_resources,
@@ -30,7 +30,8 @@ def resources_from_dask_task(task: DaskTask) -> Resources:
     )  # merge with defaults to ensure there is always some minimal resource defined
 
     return Resources.from_flat_dict(
-        task_resources, mapping=DASK_TO_RESOURCE_NAME_MAPPING
+        cast(dict[str, GenericResourceValueType], task_resources),
+        mapping=DASK_TO_RESOURCE_NAME_MAPPING,
     )
 
 
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index dd6dffde0aeb..f7eb0193de0f 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -135,7 +135,6 @@ async def _list_cluster_known_tasks(
     def _list_on_scheduler(
         dask_scheduler: distributed.Scheduler,
     ) -> _DaskClusterTasks:
-
         worker_to_processing_tasks = defaultdict(list)
         unrunnable_tasks = {}
         for task_key, task_state in dask_scheduler.tasks.items():
@@ -319,13 +318,13 @@ async def compute_cluster_total_resources(
                 continue
             # get dask information about resources
             worker_dask_resources = worker_details["resources"]
-            worker_threads = worker_details["nthreads"]
-            worker_dask_resources: dict[str, int | float | str] = {
-                **worker_dask_resources,
-                DASK_WORKER_THREAD_RESOURCE_NAME: worker_threads,
-            }
+            worker_dask_nthreads = worker_details["nthreads"]
             cluster_resources += Resources.from_flat_dict(
-                worker_dask_resources, mapping=DASK_TO_RESOURCE_NAME_MAPPING
+                {
+                    **worker_dask_resources,
+                    DASK_WORKER_THREAD_RESOURCE_NAME: worker_dask_nthreads,
+                },
+                mapping=DASK_TO_RESOURCE_NAME_MAPPING,
             )
 
         return cluster_resources

From 44eb03d18563c2ef7f211935acc5164d3286b268 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 17 Oct 2025 14:18:38 +0200
Subject: [PATCH 62/93] better assert

---
 .../src/dask_task_models_library/container_tasks/utils.py       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/utils.py b/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/utils.py
index d97b0c896c36..97cfb440f45b 100644
--- a/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/utils.py
+++ b/packages/dask-task-models-library/src/dask_task_models_library/container_tasks/utils.py
@@ -34,7 +34,7 @@ def parse_dask_job_id(
     job_id: str,
 ) -> tuple[ServiceKey, ServiceVersion, UserID, ProjectID, NodeID]:
     parts = job_id.split(":")
-    assert len(parts) == _JOB_ID_PARTS  # nosec
+    assert len(parts) == _JOB_ID_PARTS, f"unexpected job id {parts=}"  # nosec
     return (
         parts[0],
         parts[1],

From 8916cc6d2d1975f594fdf9e228783938e609a529 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 17 Oct 2025 14:18:59 +0200
Subject: [PATCH 63/93] improving test

---
 services/autoscaling/tests/unit/conftest.py   | 52 +++++++++++++++++++
 ...t_modules_cluster_scaling_computational.py | 12 +++--
 .../tests/unit/test_modules_dask.py           |  4 +-
 .../tests/unit/test_utils_rabbitmq.py         | 25 ---------
 4 files changed, 62 insertions(+), 31 deletions(-)

diff --git a/services/autoscaling/tests/unit/conftest.py b/services/autoscaling/tests/unit/conftest.py
index 192cc4932dde..3c77b01be372 100644
--- a/services/autoscaling/tests/unit/conftest.py
+++ b/services/autoscaling/tests/unit/conftest.py
@@ -30,6 +30,7 @@
     Resources,
 )
 from common_library.json_serialization import json_dumps
+from dask_task_models_library.container_tasks.utils import generate_dask_job_id
 from deepdiff import DeepDiff
 from faker import Faker
 from fakeredis.aioredis import FakeRedis
@@ -52,7 +53,11 @@
     Service,
     TaskSpec,
 )
+from models_library.projects import ProjectID
+from models_library.projects_nodes_io import NodeID
 from models_library.services_metadata_runtime import SimcoreContainerLabels
+from models_library.services_types import ServiceKey, ServiceVersion
+from models_library.users import UserID
 from pydantic import ByteSize, NonNegativeInt, PositiveInt, TypeAdapter
 from pytest_mock import MockType
 from pytest_mock.plugin import MockerFixture
@@ -857,9 +862,55 @@ def _creator(**cluter_overrides) -> Cluster:
     return _creator
 
 
+@pytest.fixture
+def service_version() -> ServiceVersion:
+    return "1.0.234"
+
+
+@pytest.fixture
+def service_key() -> ServiceKey:
+    return "simcore/services/dynamic/test"
+
+
+@pytest.fixture
+def node_id(faker: Faker) -> NodeID:
+    return faker.uuid4(cast_to=None)
+
+
+@pytest.fixture
+def project_id(faker: Faker) -> ProjectID:
+    return faker.uuid4(cast_to=None)
+
+
+@pytest.fixture
+def user_id(faker: Faker) -> UserID:
+    return faker.pyint(min_value=1)
+
+
+@pytest.fixture
+def fake_dask_job_id(
+    service_key: ServiceKey,
+    service_version: ServiceVersion,
+    user_id: UserID,
+    project_id: ProjectID,
+    faker: Faker,
+) -> Callable[[], str]:
+    def _() -> str:
+        return generate_dask_job_id(
+            service_key=service_key,
+            service_version=service_version,
+            user_id=user_id,
+            project_id=project_id,
+            node_id=faker.uuid4(cast_to=None),
+        )
+
+    return _
+
+
 @pytest.fixture
 async def create_dask_task(
     dask_spec_cluster_client: distributed.Client,
+    fake_dask_job_id: Callable[[], str],
 ) -> Callable[..., distributed.Future]:
     def _remote_pytest_fct(x: int, y: int) -> int:
         return x + y
@@ -874,6 +925,7 @@ def _creator(
             43,
             resources=required_resources,
             pure=False,
+            key=fake_dask_job_id(),
             **overrides,
         )
         assert future
diff --git a/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py b/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py
index fbfd965cd34a..0ec7755c3c61 100644
--- a/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py
+++ b/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py
@@ -126,10 +126,14 @@ def _assert_rabbit_autoscaling_message_sent(
         instances_running=0,
     )
     expected_message = default_message.model_copy(update=message_update_kwargs)
-    mock_rabbitmq_post_message.assert_called_once_with(
-        app,
-        expected_message,
-    )
+    # in this mock we get all kind of messages, we just want to assert one of them is the expected one and there is only one
+    autoscaling_status_messages = [
+        call_args.args[1]
+        for call_args in mock_rabbitmq_post_message.call_args_list
+        if isinstance(call_args.args[1], RabbitAutoscalingStatusMessage)
+    ]
+    assert len(autoscaling_status_messages) == 1, "too many messages sent"
+    assert autoscaling_status_messages[0] == expected_message
 
 
 @pytest.fixture
diff --git a/services/autoscaling/tests/unit/test_modules_dask.py b/services/autoscaling/tests/unit/test_modules_dask.py
index 6cc5b98a1516..4ed547d9f4d2 100644
--- a/services/autoscaling/tests/unit/test_modules_dask.py
+++ b/services/autoscaling/tests/unit/test_modules_dask.py
@@ -124,7 +124,7 @@ async def test_list_unrunnable_tasks(
     # we have nothing running now
     assert await list_unrunnable_tasks(scheduler_url, scheduler_authentication) == []
     # start a task that cannot run
-    dask_task_impossible_resources = DaskTaskResources(XRAM=213, threads=1)
+    dask_task_impossible_resources = DaskTaskResources(XRAM=213, threads=1)  # type: ignore
     future = create_dask_task(dask_task_impossible_resources)
     assert future
     assert await list_unrunnable_tasks(scheduler_url, scheduler_authentication) == [
@@ -168,7 +168,7 @@ def _add_fct(x: int, y: int) -> int:
         next(iter(dask_spec_cluster_client.scheduler_info()["workers"])): [
             DaskTask(
                 task_id=DaskTaskId(future_queued_task.key),
-                required_resources=DaskTaskResources(threads=1),
+                required_resources=DaskTaskResources(threads=1),  # type: ignore
             )
         ]
     }
diff --git a/services/autoscaling/tests/unit/test_utils_rabbitmq.py b/services/autoscaling/tests/unit/test_utils_rabbitmq.py
index 006155b1e0fa..8741949e76a7 100644
--- a/services/autoscaling/tests/unit/test_utils_rabbitmq.py
+++ b/services/autoscaling/tests/unit/test_utils_rabbitmq.py
@@ -122,31 +122,6 @@ async def _(labels: dict[DockerLabelKey, str]) -> list[Task]:
     return _
 
 
-@pytest.fixture
-def service_version() -> ServiceVersion:
-    return "1.0.0"
-
-
-@pytest.fixture
-def service_key() -> ServiceKey:
-    return "simcore/services/dynamic/test"
-
-
-@pytest.fixture
-def node_id(faker: Faker) -> NodeID:
-    return faker.uuid4(cast_to=None)
-
-
-@pytest.fixture
-def project_id(faker: Faker) -> ProjectID:
-    return faker.uuid4(cast_to=None)
-
-
-@pytest.fixture
-def user_id(faker: Faker) -> UserID:
-    return faker.pyint(min_value=1)
-
-
 @pytest.fixture
 def dask_task(
     service_key: ServiceKey,

From 255a7d26cfd00a226a44a55911ad70ced8e2bccc Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 17 Oct 2025 16:36:47 +0200
Subject: [PATCH 64/93] add generic instances based on the provider

---
 .../modules/cluster_scaling/_auto_scaling_core.py        | 3 +++
 .../modules/cluster_scaling/_provider_computational.py   | 9 +++++++++
 .../modules/cluster_scaling/_provider_protocol.py        | 4 ++++
 .../src/simcore_service_autoscaling/modules/dask.py      | 8 ++++++++
 .../unit/test_modules_cluster_scaling_computational.py   | 2 +-
 5 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py
index 5b74cb412fad..967b85281d18 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py
@@ -112,6 +112,9 @@ async def _analyze_current_cluster(
         state_names=["stopped"],
     )
 
+    for instance in itertools.chain(existing_ec2_instances, warm_buffer_ec2_instances):
+        auto_scaling_mode.add_instance_generic_resources(app, instance)
+
     attached_ec2s, pending_ec2s = associate_ec2_instances_with_nodes(
         docker_nodes, existing_ec2_instances
     )
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
index 243674344a39..70cb9eedad47 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
@@ -182,3 +182,12 @@ async def is_instance_retired(
     async def try_retire_nodes(self, app: FastAPI) -> None:
         assert self  # nosec
         await dask.try_retire_nodes(_scheduler_url(app), _scheduler_auth(app))
+
+    def add_instance_generic_resources(
+        self, app: FastAPI, instance: EC2InstanceData
+    ) -> None:
+        assert self  # nosec
+        assert app  # nosec
+        app_settings = get_application_settings(app)
+        assert app_settings.AUTOSCALING_DASK  # nosec
+        dask.add_instance_generic_resources(app_settings.AUTOSCALING_DASK, instance)
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py
index 355394b9f1d3..71355d21bcf3 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py
@@ -47,3 +47,7 @@ async def is_instance_retired(
     ) -> bool: ...
 
     async def try_retire_nodes(self, app: FastAPI) -> None: ...
+
+    def add_instance_generic_resources(
+        self, app: FastAPI, instance: EC2InstanceData
+    ) -> None: ...
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index f7eb0193de0f..b7290d129919 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -12,6 +12,7 @@
 from dask_task_models_library.resource_constraints import (
     DASK_WORKER_THREAD_RESOURCE_NAME,
     DaskTaskResources,
+    create_ec2_resource_constraint_key,
 )
 from distributed.core import Status
 from models_library.clusters import ClusterAuthentication, TLSAuthentication
@@ -339,6 +340,9 @@ async def try_retire_nodes(
         )
 
 
+_LARGE_RESOURCE: Final[int] = 99999
+
+
 def add_instance_generic_resources(
     settings: DaskMonitoringSettings, instance: EC2InstanceData
 ) -> None:
@@ -351,3 +355,7 @@ def add_instance_generic_resources(
     instance.resources.generic_resources[DASK_WORKER_THREAD_RESOURCE_NAME] = (
         instance_threads
     )
+
+    instance.resources.generic_resources[
+        create_ec2_resource_constraint_key(instance.type)
+    ] = _LARGE_RESOURCE
diff --git a/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py b/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py
index 0ec7755c3c61..6cf0562976e5 100644
--- a/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py
+++ b/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py
@@ -638,7 +638,7 @@ async def test_cluster_scaling_up_and_down(  # noqa: PLR0915
     )
     mock_docker_tag_node.reset_mock()
     mock_docker_set_node_availability.assert_not_called()
-    mock_rabbitmq_post_message.assert_called_once()
+    assert mock_rabbitmq_post_message.call_count == 3
     mock_rabbitmq_post_message.reset_mock()
 
     # now we have 1 monitored node that needs to be mocked

From 225491909141ca46af2cbf76d17158a9fe598fae Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 17 Oct 2025 16:39:41 +0200
Subject: [PATCH 65/93] sonar

---
 packages/aws-library/src/aws_library/ec2/_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/aws-library/src/aws_library/ec2/_models.py b/packages/aws-library/src/aws_library/ec2/_models.py
index 6d4ff3a7cdca..c8bfb91d099d 100644
--- a/packages/aws-library/src/aws_library/ec2/_models.py
+++ b/packages/aws-library/src/aws_library/ec2/_models.py
@@ -61,7 +61,7 @@ def __ge__(self, other: "Resources") -> bool:
                 k, a
             )  # NOTE: get from other, default to "a" resources so that non-existing keys can be compared as equal
             if isinstance(a, int | float) and isinstance(b, int | float):
-                if not (a >= b):
+                if a < b:
                     return False
             elif a != b:
                 assert isinstance(a, str | None)  # nosec

From b9d7428460e5de0f1b0970b1edda13075f0c9eb7 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 17 Oct 2025 17:15:11 +0200
Subject: [PATCH 66/93] fix?

---
 .../src/aws_library/ec2/_models.py            | 42 +++++++++++--------
 packages/aws-library/tests/test_ec2_models.py | 12 +++---
 ...t_modules_cluster_scaling_computational.py |  1 -
 3 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/packages/aws-library/src/aws_library/ec2/_models.py b/packages/aws-library/src/aws_library/ec2/_models.py
index c8bfb91d099d..982d73dfa839 100644
--- a/packages/aws-library/src/aws_library/ec2/_models.py
+++ b/packages/aws-library/src/aws_library/ec2/_models.py
@@ -46,37 +46,43 @@ def create_as_empty(cls) -> "Resources":
     def __ge__(self, other: "Resources") -> bool:
         """operator for >= comparison
         if self has greater or equal resources than other, returns True
+        This will return True only if any of the resources in self is greater or equal to other
+
         Note that generic_resources are compared only if they are numeric
         Non-numeric generic resources must be equal in both or only defined in self
         to be considered greater or equal
         """
+        if self == other:
+            return True
+        return self > other
+
+    def __gt__(self, other: "Resources") -> bool:
+        """operator for > comparison
+        if self has any resources gretaer than other, returns True (even if different resource types are smaller)
 
-        if not (self.cpus >= other.cpus and self.ram >= other.ram):
-            return False
+        Note that generic_resources are compared only if they are numeric
+        Non-numeric generic resources must be equal in both or only defined in self
+        to be considered greater
+        """
+        if (self.cpus > other.cpus) or (self.ram > other.ram):
+            return True
 
         keys = set(self.generic_resources) | set(other.generic_resources)
         for k in keys:
             a = self.generic_resources.get(k)
-            b = other.generic_resources.get(
-                k, a
-            )  # NOTE: get from other, default to "a" resources so that non-existing keys can be compared as equal
+            b = other.generic_resources.get(k)
+            if a is None:
+                continue
+            if b is None:
+                return True
             if isinstance(a, int | float) and isinstance(b, int | float):
-                if a < b:
-                    return False
+                if a > b:
+                    return True
             elif a != b:
                 assert isinstance(a, str | None)  # nosec
                 assert isinstance(b, int | float | str | None)  # nosec
-                return False
-        return True
-
-    def __gt__(self, other: "Resources") -> bool:
-        """operator for > comparison
-        if self has greater resources than other, returns True
-        Note that generic_resources are compared only if they are numeric
-        Non-numeric generic resources must be equal in both or only defined in self
-        to be considered greater
-        """
-        return self >= other and self != other
+                return True
+        return False
 
     def __add__(self, other: "Resources") -> "Resources":
         """operator for adding two Resources
diff --git a/packages/aws-library/tests/test_ec2_models.py b/packages/aws-library/tests/test_ec2_models.py
index 131b24da87f8..0a77b88c38aa 100644
--- a/packages/aws-library/tests/test_ec2_models.py
+++ b/packages/aws-library/tests/test_ec2_models.py
@@ -36,7 +36,7 @@
         (
             Resources(cpus=0.05, ram=ByteSize(1)),
             Resources(cpus=0.1, ram=ByteSize(0)),
-            False,
+            True,  # ram is larger
         ),
         (
             Resources(cpus=0.1, ram=ByteSize(0)),
@@ -46,7 +46,7 @@
         (
             Resources(cpus=0.1, ram=ByteSize(0), generic_resources={"GPU": 1}),
             Resources(cpus=0.1, ram=ByteSize(1)),
-            False,  # ram is not enough
+            True,  # GPU is larger
         ),
         (
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1}),
@@ -71,7 +71,7 @@
         (
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": "2"}),
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 2}),
-            False,
+            True,  # string resrouces are not comparable so "2" is considered larger
         ),
         (
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "yes"}),
@@ -117,7 +117,7 @@ def test_resources_ge_operator(
         (
             Resources(cpus=0.05, ram=ByteSize(1)),
             Resources(cpus=0.1, ram=ByteSize(0)),
-            False,
+            True,
         ),
         (
             Resources(cpus=0.1, ram=ByteSize(0)),
@@ -127,7 +127,7 @@ def test_resources_ge_operator(
         (
             Resources(cpus=0.1, ram=ByteSize(0), generic_resources={"GPU": 1}),
             Resources(cpus=0.1, ram=ByteSize(1)),
-            False,  # ram is not enough
+            True,  # ram is not enough
         ),
         (
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1}),
@@ -152,7 +152,7 @@ def test_resources_ge_operator(
         (
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": "2"}),
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 2}),
-            False,
+            True,  # string resources are not comparable, so a > b
         ),
         (
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "yes"}),
diff --git a/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py b/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py
index 6cf0562976e5..5f51a4f34c19 100644
--- a/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py
+++ b/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py
@@ -934,7 +934,6 @@ async def test_cluster_does_not_scale_up_if_defined_instance_is_not_fitting_reso
         [InstanceTypeType | None, Resources], DaskTaskResources
     ],
     ec2_client: EC2Client,
-    faker: Faker,
     caplog: pytest.LogCaptureFixture,
 ):
     # we have nothing running now

From 824dfa7b9e16f025d8c13a1320b27b755da6ffc7 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 17 Oct 2025 17:35:37 +0200
Subject: [PATCH 67/93] improve error

---
 .../src/simcore_service_autoscaling/core/errors.py          | 6 ++++--
 .../simcore_service_autoscaling/utils/cluster_scaling.py    | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/core/errors.py b/services/autoscaling/src/simcore_service_autoscaling/core/errors.py
index e4294631224a..0277acf38936 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/core/errors.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/core/errors.py
@@ -18,7 +18,7 @@ class TaskRequiresUnauthorizedEC2InstanceTypeError(AutoscalingRuntimeError):
 
 class TaskRequirementsAboveRequiredEC2InstanceTypeError(AutoscalingRuntimeError):
     msg_template: str = (
-        "Task {task} requires {instance_type} but requires {resources}. "
+        "Task {task} requires {instance_type} but requires {resources}. {resources_diff} are missing! "
         "TIP: Ensure task resources requirements fit required instance type available resources."
     )
 
@@ -43,4 +43,6 @@ class DaskNoWorkersError(AutoscalingRuntimeError):
 
 
 class DaskWorkerNotFoundError(AutoscalingRuntimeError):
-    msg_template: str = "Dask worker running on {worker_host} is not registered to scheduler in {url}, it is not found!"
+    msg_template: str = (
+        "Dask worker running on {worker_host} is not registered to scheduler in {url}, it is not found!"
+    )
diff --git a/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py b/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py
index 13c25dcd2112..93e86c99eaec 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py
@@ -114,6 +114,7 @@ def find_selected_instance_type_for_task(
             task=task,
             instance_type=selected_instance,
             resources=task_required_resources,
+            resources_diff=task_required_resources - selected_instance.resources,
         )
 
     return selected_instance

From 6020b89dd61f144a5060cf8c860907bb27b0612f Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 17 Oct 2025 17:36:59 +0200
Subject: [PATCH 68/93] add resource info on instance types as well

---
 .../cluster_scaling/_auto_scaling_core.py     | 11 ++++++++--
 .../_provider_computational.py                | 12 +++++++++++
 .../cluster_scaling/_provider_protocol.py     |  5 +++++
 .../modules/dask.py                           | 20 +++++++++++++++++++
 4 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py
index 967b85281d18..912233cb58a3 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py
@@ -346,7 +346,9 @@ async def _try_attach_pending_ec2s(
     )
 
 
-async def _sorted_allowed_instance_types(app: FastAPI) -> list[EC2InstanceType]:
+async def _sorted_allowed_instance_types(
+    app: FastAPI, auto_scaling_mode: AutoscalingProvider
+) -> list[EC2InstanceType]:
     app_settings: ApplicationSettings = app.state.settings
     assert app_settings.AUTOSCALING_EC2_INSTANCES  # nosec
     ec2_client = get_ec2_client(app)
@@ -370,6 +372,8 @@ def _as_selection(instance_type: EC2InstanceType) -> int:
         return allowed_instance_type_names.index(f"{instance_type.name}")
 
     allowed_instance_types.sort(key=_as_selection)
+    for instance_type in allowed_instance_types:
+        auto_scaling_mode.add_instance_type_generic_resource(app, instance_type)
     return allowed_instance_types
 
 
@@ -1578,7 +1582,10 @@ async def auto_scale_cluster(
     the additional load.
     """
     # current state
-    allowed_instance_types = await _sorted_allowed_instance_types(app)
+    allowed_instance_types = await _sorted_allowed_instance_types(
+        app, auto_scaling_mode
+    )
+
     cluster = await _analyze_current_cluster(
         app, auto_scaling_mode, allowed_instance_types
     )
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
index 70cb9eedad47..585d503ab4ce 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
@@ -3,6 +3,7 @@
 from typing import Any, cast
 
 from aws_library.ec2 import EC2InstanceData, EC2Tags, Resources
+from aws_library.ec2._models import EC2InstanceType
 from fastapi import FastAPI
 from models_library.clusters import ClusterAuthentication
 from models_library.docker import DockerLabelKey
@@ -191,3 +192,14 @@ def add_instance_generic_resources(
         app_settings = get_application_settings(app)
         assert app_settings.AUTOSCALING_DASK  # nosec
         dask.add_instance_generic_resources(app_settings.AUTOSCALING_DASK, instance)
+
+    def add_instance_type_generic_resource(
+        self, app: FastAPI, instance_type: EC2InstanceType
+    ) -> None:
+        assert self  # nosec
+        assert app  # nosec
+        app_settings = get_application_settings(app)
+        assert app_settings.AUTOSCALING_DASK  # nosec
+        dask.add_instance_type_generic_resource(
+            app_settings.AUTOSCALING_DASK, instance_type
+        )
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py
index 71355d21bcf3..e161893f71bf 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py
@@ -1,6 +1,7 @@
 from typing import Protocol
 
 from aws_library.ec2 import EC2InstanceData, EC2Tags, Resources
+from aws_library.ec2._models import EC2InstanceType
 from fastapi import FastAPI
 from models_library.docker import DockerLabelKey
 from models_library.generated_models.docker_rest_api import Node as DockerNode
@@ -51,3 +52,7 @@ async def try_retire_nodes(self, app: FastAPI) -> None: ...
     def add_instance_generic_resources(
         self, app: FastAPI, instance: EC2InstanceData
     ) -> None: ...
+
+    def add_instance_type_generic_resource(
+        self, app: FastAPI, instance_type: EC2InstanceType
+    ) -> None: ...
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index b7290d129919..5d28f31223cc 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -9,6 +9,7 @@
 import dask.typing
 import distributed
 from aws_library.ec2 import EC2InstanceData, Resources
+from aws_library.ec2._models import EC2InstanceType
 from dask_task_models_library.resource_constraints import (
     DASK_WORKER_THREAD_RESOURCE_NAME,
     DaskTaskResources,
@@ -359,3 +360,22 @@ def add_instance_generic_resources(
     instance.resources.generic_resources[
         create_ec2_resource_constraint_key(instance.type)
     ] = _LARGE_RESOURCE
+
+
+def add_instance_type_generic_resource(
+    settings: DaskMonitoringSettings, instance_type: EC2InstanceType
+) -> None:
+    instance_threads = round(instance_type.resources.cpus)
+    if settings.DASK_NTHREADS > 0:
+        # this overrides everything
+        instance_threads = settings.DASK_NTHREADS
+    if settings.DASK_NTHREADS_MULTIPLIER > 1:
+        instance_threads = instance_threads * settings.DASK_NTHREADS_MULTIPLIER
+
+    instance_type.resources.generic_resources[DASK_WORKER_THREAD_RESOURCE_NAME] = (
+        instance_threads
+    )
+
+    instance_type.resources.generic_resources[
+        create_ec2_resource_constraint_key(instance_type.name)
+    ] = _LARGE_RESOURCE

From 1c0869f04c31e9d9473c8961dc8911c2caad0f18 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Sun, 19 Oct 2025 22:40:44 +0200
Subject: [PATCH 69/93] mypy

---
 .../cluster_scaling/_auto_scaling_core.py        | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py
index 912233cb58a3..6c34fdaa2f57 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py
@@ -94,26 +94,32 @@ async def _analyze_current_cluster(
     docker_nodes: list[Node] = await auto_scaling_mode.get_monitored_nodes(app)
 
     # get the EC2 instances we have
-    existing_ec2_instances = await get_ec2_client(app).get_instances(
+    existing_ec2_instances: list[EC2InstanceData] = await get_ec2_client(
+        app
+    ).get_instances(
         key_names=[app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_KEY_NAME],
         tags=auto_scaling_mode.get_ec2_tags(app),
         state_names=["pending", "running"],
     )
 
-    terminated_ec2_instances = await get_ec2_client(app).get_instances(
+    terminated_ec2_instances: list[EC2InstanceData] = await get_ec2_client(
+        app
+    ).get_instances(
         key_names=[app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_KEY_NAME],
         tags=auto_scaling_mode.get_ec2_tags(app),
         state_names=["terminated"],
     )
 
-    warm_buffer_ec2_instances = await get_ec2_client(app).get_instances(
+    warm_buffer_ec2_instances: list[EC2InstanceData] = await get_ec2_client(
+        app
+    ).get_instances(
         key_names=[app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_KEY_NAME],
         tags=get_deactivated_warm_buffer_ec2_tags(auto_scaling_mode.get_ec2_tags(app)),
         state_names=["stopped"],
     )
 
-    for instance in itertools.chain(existing_ec2_instances, warm_buffer_ec2_instances):
-        auto_scaling_mode.add_instance_generic_resources(app, instance)
+    for i in itertools.chain(existing_ec2_instances, warm_buffer_ec2_instances):
+        auto_scaling_mode.add_instance_generic_resources(app, i)
 
     attached_ec2s, pending_ec2s = associate_ec2_instances_with_nodes(
         docker_nodes, existing_ec2_instances

From e55ec441b4b68714afe06cc793b35812d6d25087 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Sun, 19 Oct 2025 22:49:51 +0200
Subject: [PATCH 70/93] need to be fixed

---
 packages/aws-library/src/aws_library/ec2/_models.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/packages/aws-library/src/aws_library/ec2/_models.py b/packages/aws-library/src/aws_library/ec2/_models.py
index 982d73dfa839..faf1380d09c4 100644
--- a/packages/aws-library/src/aws_library/ec2/_models.py
+++ b/packages/aws-library/src/aws_library/ec2/_models.py
@@ -43,6 +43,7 @@ class Resources(BaseModel, frozen=True):
     def create_as_empty(cls) -> "Resources":
         return cls(cpus=0, ram=ByteSize(0))
 
+    # TODO: this is not ok. everything shall be compared!
     def __ge__(self, other: "Resources") -> bool:
         """operator for >= comparison
         if self has greater or equal resources than other, returns True
@@ -58,7 +59,7 @@ def __ge__(self, other: "Resources") -> bool:
 
     def __gt__(self, other: "Resources") -> bool:
         """operator for > comparison
-        if self has any resources gretaer than other, returns True (even if different resource types are smaller)
+        if self has any resources greater than other, returns True (even if different resource types are smaller)
 
         Note that generic_resources are compared only if they are numeric
         Non-numeric generic resources must be equal in both or only defined in self

From d2f241a67291d9e92f87226a2b0eef2996728de2 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Mon, 20 Oct 2025 08:55:07 +0200
Subject: [PATCH 71/93] done

---
 packages/aws-library/src/aws_library/ec2/_models.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/packages/aws-library/src/aws_library/ec2/_models.py b/packages/aws-library/src/aws_library/ec2/_models.py
index faf1380d09c4..803bceb7ef88 100644
--- a/packages/aws-library/src/aws_library/ec2/_models.py
+++ b/packages/aws-library/src/aws_library/ec2/_models.py
@@ -43,11 +43,10 @@ class Resources(BaseModel, frozen=True):
     def create_as_empty(cls) -> "Resources":
         return cls(cpus=0, ram=ByteSize(0))
 
-    # TODO: this is not ok. everything shall be compared!
     def __ge__(self, other: "Resources") -> bool:
         """operator for >= comparison
         if self has greater or equal resources than other, returns True
-        This will return True only if any of the resources in self is greater or equal to other
+        This will return True only if all of the resources in self are greater or equal to other
 
         Note that generic_resources are compared only if they are numeric
         Non-numeric generic resources must be equal in both or only defined in self
@@ -59,10 +58,11 @@ def __ge__(self, other: "Resources") -> bool:
 
     def __gt__(self, other: "Resources") -> bool:
         """operator for > comparison
-        if self has any resources greater than other, returns True (even if different resource types are smaller)
+        if self has resources greater than other, returns True
+        This will return True only if all of the resources in self are greater than other
 
         Note that generic_resources are compared only if they are numeric
-        Non-numeric generic resources must be equal in both or only defined in self
+        Non-numeric generic resources must only be defined in self
         to be considered greater
         """
         if (self.cpus > other.cpus) or (self.ram > other.ram):

From 31aa4c299a3a62a7051a4beb67f34201cc2396d3 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Mon, 20 Oct 2025 13:29:11 +0200
Subject: [PATCH 72/93] fixed tests

---
 .../src/aws_library/ec2/_models.py            | 26 +++++++++++--------
 packages/aws-library/tests/test_ec2_models.py | 20 ++++++++++----
 2 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/packages/aws-library/src/aws_library/ec2/_models.py b/packages/aws-library/src/aws_library/ec2/_models.py
index 803bceb7ef88..09f49bb6aae4 100644
--- a/packages/aws-library/src/aws_library/ec2/_models.py
+++ b/packages/aws-library/src/aws_library/ec2/_models.py
@@ -65,25 +65,29 @@ def __gt__(self, other: "Resources") -> bool:
         Non-numeric generic resources must only be defined in self
         to be considered greater
         """
-        if (self.cpus > other.cpus) or (self.ram > other.ram):
-            return True
+        if (self.cpus < other.cpus) or (self.ram < other.ram):
+            return False
 
         keys = set(self.generic_resources) | set(other.generic_resources)
         for k in keys:
             a = self.generic_resources.get(k)
             b = other.generic_resources.get(k)
             if a is None:
-                continue
+                return False
             if b is None:
-                return True
+                # a is greater as b is not defined
+                continue
             if isinstance(a, int | float) and isinstance(b, int | float):
-                if a > b:
-                    return True
-            elif a != b:
-                assert isinstance(a, str | None)  # nosec
-                assert isinstance(b, int | float | str | None)  # nosec
-                return True
-        return False
+                if a < b:
+                    return False
+            else:
+                # remaining options is a is str and b is str or mixed types
+                assert isinstance(a, str)  # nosec
+                assert isinstance(b, int | float | str)  # nosec
+
+        # here we have either everything greater or equal or non-comparable strings
+
+        return self != other
 
     def __add__(self, other: "Resources") -> "Resources":
         """operator for adding two Resources
diff --git a/packages/aws-library/tests/test_ec2_models.py b/packages/aws-library/tests/test_ec2_models.py
index 0a77b88c38aa..0b1fa016fa79 100644
--- a/packages/aws-library/tests/test_ec2_models.py
+++ b/packages/aws-library/tests/test_ec2_models.py
@@ -36,7 +36,7 @@
         (
             Resources(cpus=0.05, ram=ByteSize(1)),
             Resources(cpus=0.1, ram=ByteSize(0)),
-            True,  # ram is larger
+            False,  # CPU is smaller
         ),
         (
             Resources(cpus=0.1, ram=ByteSize(0)),
@@ -46,7 +46,7 @@
         (
             Resources(cpus=0.1, ram=ByteSize(0), generic_resources={"GPU": 1}),
             Resources(cpus=0.1, ram=ByteSize(1)),
-            True,  # GPU is larger
+            False,  # RAM is smaller
         ),
         (
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1}),
@@ -71,7 +71,7 @@
         (
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": "2"}),
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 2}),
-            True,  # string resrouces are not comparable so "2" is considered larger
+            True,  # string resources are not comparable so "2" is considered larger
         ),
         (
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "yes"}),
@@ -117,7 +117,7 @@ def test_resources_ge_operator(
         (
             Resources(cpus=0.05, ram=ByteSize(1)),
             Resources(cpus=0.1, ram=ByteSize(0)),
-            True,
+            False,  # CPU is smaller
         ),
         (
             Resources(cpus=0.1, ram=ByteSize(0)),
@@ -127,7 +127,7 @@ def test_resources_ge_operator(
         (
             Resources(cpus=0.1, ram=ByteSize(0), generic_resources={"GPU": 1}),
             Resources(cpus=0.1, ram=ByteSize(1)),
-            True,  # ram is not enough
+            False,  # ram is not enough
         ),
         (
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1}),
@@ -144,6 +144,11 @@ def test_resources_ge_operator(
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 2}),
             False,
         ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 2}),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 1}),
+            True,
+        ),
         (
             Resources(cpus=0.1, ram=ByteSize(1)),
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"GPU": 2}),
@@ -169,6 +174,11 @@ def test_resources_ge_operator(
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "yes"}),
             False,
         ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "yes"}),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "no"}),
+            True,
+        ),
     ],
 )
 def test_resources_gt_operator(a: Resources, b: Resources, a_greater_than_b: bool):

From ac714225a6c683036305b8195ecbc982b6e170fe Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Mon, 20 Oct 2025 13:55:03 +0200
Subject: [PATCH 73/93] added missing calls

---
 .../cluster_scaling/_provider_dynamic.py        | 17 +++++++++++++++++
 .../utils/cluster_scaling.py                    |  2 +-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py
index e6dbca840e37..ac28d9e775f3 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py
@@ -1,4 +1,5 @@
 from aws_library.ec2 import EC2InstanceData, EC2Tags, Resources
+from aws_library.ec2._models import EC2InstanceType
 from fastapi import FastAPI
 from models_library.docker import DockerLabelKey
 from models_library.generated_models.docker_rest_api import Node, Task
@@ -104,3 +105,19 @@ async def try_retire_nodes(self, app: FastAPI) -> None:
         assert self  # nosec
         assert app  # nosec
         # nothing to do here
+
+    def add_instance_generic_resources(
+        self, app: FastAPI, instance: EC2InstanceData
+    ) -> None:
+        assert self  # nosec
+        assert app  # nosec
+        assert instance  # nosec
+        # nothing to do at the moment
+
+    def add_instance_type_generic_resource(
+        self, app: FastAPI, instance_type: EC2InstanceType
+    ) -> None:
+        assert self  # nosec
+        assert app  # nosec
+        assert instance_type  # nosec
+        # nothing to do at the moment
diff --git a/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py b/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py
index 93e86c99eaec..cc2c1ad3ee0c 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/utils/cluster_scaling.py
@@ -109,7 +109,7 @@ def find_selected_instance_type_for_task(
     selected_instance = filtered_instances[0]
 
     # check that the assigned resources and the machine resource fit
-    if task_required_resources > selected_instance.resources:
+    if not (task_required_resources <= selected_instance.resources):
         raise TaskRequirementsAboveRequiredEC2InstanceTypeError(
             task=task,
             instance_type=selected_instance,

From 28bdfee62dac673a762e5654fb759d832ab08a46 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Mon, 20 Oct 2025 13:56:48 +0200
Subject: [PATCH 74/93] @copilot review

---
 services/autoscaling/tests/unit/test_utils_cluster_scaling.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/services/autoscaling/tests/unit/test_utils_cluster_scaling.py b/services/autoscaling/tests/unit/test_utils_cluster_scaling.py
index 5525cedc9268..1c325c1f6234 100644
--- a/services/autoscaling/tests/unit/test_utils_cluster_scaling.py
+++ b/services/autoscaling/tests/unit/test_utils_cluster_scaling.py
@@ -296,7 +296,6 @@ def test_sort_drained_nodes(
     assert app_settings.AUTOSCALING_EC2_INSTANCES
     machine_buffer_type = get_hot_buffer_type(random_fake_available_instances)
     _NUM_DRAINED_NODES = 20
-    assert app_settings.AUTOSCALING_EC2_INSTANCES
     _NUM_NODE_WITH_TYPE_BUFFER = (
         3 * app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER
     )

From 04e68cbf38745f266c1f14d13f8218b9ef5d63b9 Mon Sep 17 00:00:00 2001
From: Sylvain <35365065+sanderegg@users.noreply.github.com>
Date: Mon, 20 Oct 2025 13:58:12 +0200
Subject: [PATCH 75/93] Update
 services/autoscaling/src/simcore_service_autoscaling/core/errors.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 .../autoscaling/src/simcore_service_autoscaling/core/errors.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/core/errors.py b/services/autoscaling/src/simcore_service_autoscaling/core/errors.py
index 0277acf38936..d1020d382f76 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/core/errors.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/core/errors.py
@@ -18,7 +18,7 @@ class TaskRequiresUnauthorizedEC2InstanceTypeError(AutoscalingRuntimeError):
 
 class TaskRequirementsAboveRequiredEC2InstanceTypeError(AutoscalingRuntimeError):
     msg_template: str = (
-        "Task {task} requires {instance_type} but requires {resources}. {resources_diff} are missing! "
+        "Task {task} specifies instance type {instance_type} but requests {resources}. {resources_diff} are missing! "
         "TIP: Ensure task resources requirements fit required instance type available resources."
     )
 

From d396dab1738522f82fd35b1201501c03992a1af1 Mon Sep 17 00:00:00 2001
From: Sylvain <35365065+sanderegg@users.noreply.github.com>
Date: Mon, 20 Oct 2025 14:05:26 +0200
Subject: [PATCH 76/93] Update
 services/autoscaling/tests/unit/test_modules_dask.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 services/autoscaling/tests/unit/test_modules_dask.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/services/autoscaling/tests/unit/test_modules_dask.py b/services/autoscaling/tests/unit/test_modules_dask.py
index 4ed547d9f4d2..d99c0f2f0869 100644
--- a/services/autoscaling/tests/unit/test_modules_dask.py
+++ b/services/autoscaling/tests/unit/test_modules_dask.py
@@ -429,7 +429,7 @@ async def test_compute_cluster_total_resources(
 
 @pytest.mark.parametrize(
     "dask_nthreads, dask_nthreads_multiplier, expected_threads_resource",
-    [(4, 1, 4), (4, 2, 8), (0, 2.0, -1)],
+    [(4, 1, 4), (4, 2, 8), (0, 2, -1)],
 )
 def test_add_instance_generic_resources(
     scheduler_url: AnyUrl,

From 2b521095423cbe3b61e73296701e349dceb7fcca Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Mon, 20 Oct 2025 14:11:24 +0200
Subject: [PATCH 77/93] ensure thread is at least 1

---
 .../src/simcore_service_autoscaling/modules/dask.py           | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index 5d28f31223cc..4f72410a9f8a 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -347,7 +347,7 @@ async def try_retire_nodes(
 def add_instance_generic_resources(
     settings: DaskMonitoringSettings, instance: EC2InstanceData
 ) -> None:
-    instance_threads = round(instance.resources.cpus)
+    instance_threads = min(1, round(instance.resources.cpus))
     if settings.DASK_NTHREADS > 0:
         # this overrides everything
         instance_threads = settings.DASK_NTHREADS
@@ -365,7 +365,7 @@ def add_instance_generic_resources(
 def add_instance_type_generic_resource(
     settings: DaskMonitoringSettings, instance_type: EC2InstanceType
 ) -> None:
-    instance_threads = round(instance_type.resources.cpus)
+    instance_threads = min(1, round(instance_type.resources.cpus))
     if settings.DASK_NTHREADS > 0:
         # this overrides everything
         instance_threads = settings.DASK_NTHREADS

From 56c4326f4b7697acb5c438cc693d20518cbc213a Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Mon, 20 Oct 2025 15:52:46 +0200
Subject: [PATCH 78/93] improve coverage

---
 .../tests/unit/test_utils_cluster_scaling.py   | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/services/autoscaling/tests/unit/test_utils_cluster_scaling.py b/services/autoscaling/tests/unit/test_utils_cluster_scaling.py
index 1c325c1f6234..5a15d63a6ed3 100644
--- a/services/autoscaling/tests/unit/test_utils_cluster_scaling.py
+++ b/services/autoscaling/tests/unit/test_utils_cluster_scaling.py
@@ -75,6 +75,24 @@ async def test_associate_ec2_instances_with_nodes_with_no_correspondence(
     assert len(non_associated_instances) == len(ec2_instances)
 
 
+async def test_associate_ec2_instances_with_nodes_with_invalid_dns(
+    fake_ec2_instance_data: Callable[..., EC2InstanceData],
+    node: Callable[..., DockerNode],
+):
+    nodes = [node() for _ in range(10)]
+    ec2_instances = [
+        fake_ec2_instance_data(aws_private_dns="invalid-dns-name") for _ in range(10)
+    ]
+
+    (
+        associated_instances,
+        non_associated_instances,
+    ) = associate_ec2_instances_with_nodes(nodes, ec2_instances)
+
+    assert not associated_instances
+    assert non_associated_instances
+
+
 async def test_associate_ec2_instances_with_corresponding_nodes(
     fake_ec2_instance_data: Callable[..., EC2InstanceData],
     node: Callable[..., DockerNode],

From e3b998e711b13b1191d0662bb1e878aa52ff7208 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Mon, 20 Oct 2025 16:02:19 +0200
Subject: [PATCH 79/93] added test and a fix

---
 .../modules/instrumentation/_core.py          |  2 +-
 .../unit/test_modules_instrumentation_core.py | 30 +++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 services/autoscaling/tests/unit/test_modules_instrumentation_core.py

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_core.py b/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_core.py
index 9de65aac078f..af84e97bc01b 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_core.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/instrumentation/_core.py
@@ -35,7 +35,7 @@ async def on_shutdown() -> None: ...
 
 
 def get_instrumentation(app: FastAPI) -> AutoscalingInstrumentation:
-    if not app.state.instrumentation:
+    if not hasattr(app.state, "instrumentation"):
         raise ConfigurationError(
             msg="Instrumentation not setup. Please check the configuration."
         )
diff --git a/services/autoscaling/tests/unit/test_modules_instrumentation_core.py b/services/autoscaling/tests/unit/test_modules_instrumentation_core.py
new file mode 100644
index 000000000000..b3a843d8adbe
--- /dev/null
+++ b/services/autoscaling/tests/unit/test_modules_instrumentation_core.py
@@ -0,0 +1,30 @@
+import pytest
+from fastapi import FastAPI
+from pytest_simcore.helpers.typing_env import EnvVarsDict
+from simcore_service_autoscaling.core.errors import ConfigurationError
+from simcore_service_autoscaling.modules.instrumentation._core import (
+    get_instrumentation,
+    has_instrumentation,
+)
+
+
+@pytest.fixture
+def disabled_instrumentation(
+    app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    monkeypatch.setenv("AUTOSCALING_PROMETHEUS_INSTRUMENTATION_ENABLED", "false")
+
+
+async def test_disabled_instrumentation(
+    disabled_rabbitmq: None,
+    disabled_ec2: None,
+    disabled_ssm: None,
+    disabled_instrumentation: None,
+    mocked_redis_server: None,
+    initialized_app: FastAPI,
+):
+    # instrumentation disabled by default
+    assert not has_instrumentation(initialized_app)
+
+    with pytest.raises(ConfigurationError):
+        get_instrumentation(initialized_app)

From 3bb8fad16a83afe1f13b89acb0b52236e41597fd Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Mon, 20 Oct 2025 16:24:21 +0200
Subject: [PATCH 80/93] fix code

---
 .../autoscaling/src/simcore_service_autoscaling/modules/dask.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index 4f72410a9f8a..546a7ad9917f 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -347,7 +347,7 @@ async def try_retire_nodes(
 def add_instance_generic_resources(
     settings: DaskMonitoringSettings, instance: EC2InstanceData
 ) -> None:
-    instance_threads = min(1, round(instance.resources.cpus))
+    instance_threads = max(1, round(instance.resources.cpus))
     if settings.DASK_NTHREADS > 0:
         # this overrides everything
         instance_threads = settings.DASK_NTHREADS

From a6c322b2abec66dece6bfb91c177a4893ee7ed16 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Mon, 20 Oct 2025 17:15:16 +0200
Subject: [PATCH 81/93] adjust ram cpu

---
 .../resource_constraints.py                   | 21 ++++++++++++++
 .../cluster_scaling/_auto_scaling_core.py     |  2 +-
 .../_provider_computational.py                | 15 +++++++++-
 .../cluster_scaling/_provider_dynamic.py      | 28 +++++++++++++++++--
 .../cluster_scaling/_provider_protocol.py     |  2 +-
 .../db/repositories/comp_tasks/_utils.py      | 16 ++++++-----
 6 files changed, 72 insertions(+), 12 deletions(-)

diff --git a/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py b/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py
index 55c700541a73..68716c9d5b67 100644
--- a/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py
+++ b/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py
@@ -26,3 +26,24 @@ def get_ec2_instance_type_from_resources(
         if resource_name.startswith(DASK_TASK_EC2_RESOURCE_RESTRICTION_KEY):
             return resource_name.split(":")[-1]
     return None
+
+
+_RAM_SAFE_MARGIN_RATIO: Final[float] = (
+    0.1  # NOTE: machines always have less available RAM than advertised
+)
+_CPUS_SAFE_MARGIN: Final[float] = 0.1
+
+
+def estimate_dask_worker_resources_from_ec2_instance(
+    cpus: float, ram: int
+) -> tuple[float, float]:
+    """Estimates the resources available to a dask worker running in an EC2 instance,
+    taking into account safe margins for CPU and RAM.
+
+    Returns:
+        tuple: Estimated resources for the dask worker (cpus, ram).
+    """
+    worker_cpus = min(0.1, cpus - _CPUS_SAFE_MARGIN)  # ensure at least 0.1 CPU
+    worker_ram = int(ram * (1 - _RAM_SAFE_MARGIN_RATIO))  # apply safe margin
+
+    return (worker_cpus, worker_ram)
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py
index 6c34fdaa2f57..37c7dcd6102d 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py
@@ -379,7 +379,7 @@ def _as_selection(instance_type: EC2InstanceType) -> int:
 
     allowed_instance_types.sort(key=_as_selection)
     for instance_type in allowed_instance_types:
-        auto_scaling_mode.add_instance_type_generic_resource(app, instance_type)
+        auto_scaling_mode.adjust_instance_type_resources(app, instance_type)
     return allowed_instance_types
 
 
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
index 585d503ab4ce..9701bc19fe79 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
@@ -1,9 +1,13 @@
 import collections
+import dataclasses
 import logging
 from typing import Any, cast
 
 from aws_library.ec2 import EC2InstanceData, EC2Tags, Resources
 from aws_library.ec2._models import EC2InstanceType
+from dask_task_models_library.resource_constraints import (
+    estimate_dask_worker_resources_from_ec2_instance,
+)
 from fastapi import FastAPI
 from models_library.clusters import ClusterAuthentication
 from models_library.docker import DockerLabelKey
@@ -193,13 +197,22 @@ def add_instance_generic_resources(
         assert app_settings.AUTOSCALING_DASK  # nosec
         dask.add_instance_generic_resources(app_settings.AUTOSCALING_DASK, instance)
 
-    def add_instance_type_generic_resource(
+    def adjust_instance_type_resources(
         self, app: FastAPI, instance_type: EC2InstanceType
     ) -> None:
         assert self  # nosec
         assert app  # nosec
         app_settings = get_application_settings(app)
         assert app_settings.AUTOSCALING_DASK  # nosec
+        adjusted_cpus, adjusted_ram = estimate_dask_worker_resources_from_ec2_instance(
+            instance_type.resources.cpus, instance_type.resources.ram
+        )
+        dataclasses.replace(
+            instance_type,
+            resources=instance_type.resources.model_copy(
+                update={"cpus": adjusted_cpus, "ram": ByteSize(adjusted_ram)}
+            ),
+        )
         dask.add_instance_type_generic_resource(
             app_settings.AUTOSCALING_DASK, instance_type
         )
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py
index ac28d9e775f3..e6b40c22b14a 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py
@@ -1,8 +1,12 @@
+import dataclasses
+from typing import Final
+
 from aws_library.ec2 import EC2InstanceData, EC2Tags, Resources
 from aws_library.ec2._models import EC2InstanceType
 from fastapi import FastAPI
 from models_library.docker import DockerLabelKey
 from models_library.generated_models.docker_rest_api import Node, Task
+from pydantic import ByteSize, TypeAdapter
 from types_aiobotocore_ec2.literals import InstanceTypeType
 
 from ...core.settings import get_application_settings
@@ -10,6 +14,15 @@
 from ...utils import utils_docker, utils_ec2
 from ..docker import get_docker_client
 
+_MACHINE_TOTAL_RAM_SAFE_MARGIN_RATIO: Final[float] = (
+    0.1  # NOTE: machines always have less available RAM than advertised
+)
+_SIDECARS_OPS_SAFE_RAM_MARGIN: Final[ByteSize] = TypeAdapter(ByteSize).validate_python(
+    "1GiB"
+)
+_CPUS_SAFE_MARGIN: Final[float] = 1.4
+_MIN_NUM_CPUS: Final[float] = 0.5
+
 
 class DynamicAutoscalingProvider:
     async def get_monitored_nodes(self, app: FastAPI) -> list[Node]:
@@ -114,10 +127,21 @@ def add_instance_generic_resources(
         assert instance  # nosec
         # nothing to do at the moment
 
-    def add_instance_type_generic_resource(
+    def adjust_instance_type_resources(
         self, app: FastAPI, instance_type: EC2InstanceType
     ) -> None:
         assert self  # nosec
         assert app  # nosec
-        assert instance_type  # nosec
         # nothing to do at the moment
+        adjusted_cpus = float(instance_type.resources.cpus) - _CPUS_SAFE_MARGIN
+        adjusted_ram = int(
+            instance_type.resources.ram
+            - _MACHINE_TOTAL_RAM_SAFE_MARGIN_RATIO * instance_type.resources.ram
+            - _SIDECARS_OPS_SAFE_RAM_MARGIN
+        )
+        dataclasses.replace(
+            instance_type,
+            resources=instance_type.resources.model_copy(
+                update={"cpus": adjusted_cpus, "ram": ByteSize(adjusted_ram)}
+            ),
+        )
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py
index e161893f71bf..62003854fef3 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py
@@ -53,6 +53,6 @@ def add_instance_generic_resources(
         self, app: FastAPI, instance: EC2InstanceData
     ) -> None: ...
 
-    def add_instance_type_generic_resource(
+    def adjust_instance_type_resources(
         self, app: FastAPI, instance_type: EC2InstanceType
     ) -> None: ...
diff --git a/services/director-v2/src/simcore_service_director_v2/modules/db/repositories/comp_tasks/_utils.py b/services/director-v2/src/simcore_service_director_v2/modules/db/repositories/comp_tasks/_utils.py
index 10103909a631..3446cb2f1497 100644
--- a/services/director-v2/src/simcore_service_director_v2/modules/db/repositories/comp_tasks/_utils.py
+++ b/services/director-v2/src/simcore_service_director_v2/modules/db/repositories/comp_tasks/_utils.py
@@ -5,6 +5,9 @@
 
 import arrow
 from dask_task_models_library.container_tasks.protocol import ContainerEnvsDict
+from dask_task_models_library.resource_constraints import (
+    estimate_dask_worker_resources_from_ec2_instance,
+)
 from models_library.api_schemas_catalog.services import ServiceGet
 from models_library.api_schemas_clusters_keeper.ec2_instances import EC2InstanceTypeGet
 from models_library.api_schemas_directorv2.services import (
@@ -292,15 +295,14 @@ def _by_type_name(ec2: EC2InstanceTypeGet) -> bool:
             image_resources: ImageResources = node_resources[
                 DEFAULT_SINGLE_SERVICE_NAME
             ]
-            image_resources.resources["CPU"].set_value(
-                float(selected_ec2_instance_type.cpus) - _CPUS_SAFE_MARGIN
-            )
-            image_resources.resources["RAM"].set_value(
-                int(
-                    selected_ec2_instance_type.ram
-                    - _RAM_SAFE_MARGIN_RATIO * selected_ec2_instance_type.ram
+            adjusted_cpus, adjusted_ram = (
+                estimate_dask_worker_resources_from_ec2_instance(
+                    float(selected_ec2_instance_type.cpus),
+                    selected_ec2_instance_type.ram,
                 )
             )
+            image_resources.resources["CPU"].set_value(adjusted_cpus)
+            image_resources.resources["RAM"].set_value(adjusted_ram)
 
             await project_nodes_repo.update(
                 connection,

From 2e713951ba6bdc38396bc339ffb03f745d33827a Mon Sep 17 00:00:00 2001
From: Sylvain <35365065+sanderegg@users.noreply.github.com>
Date: Mon, 20 Oct 2025 17:49:41 +0200
Subject: [PATCH 82/93] Update
 packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 .../src/dask_task_models_library/resource_constraints.py        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py b/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py
index 68716c9d5b67..715c037f107b 100644
--- a/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py
+++ b/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py
@@ -43,7 +43,7 @@ def estimate_dask_worker_resources_from_ec2_instance(
     Returns:
         tuple: Estimated resources for the dask worker (cpus, ram).
     """
-    worker_cpus = min(0.1, cpus - _CPUS_SAFE_MARGIN)  # ensure at least 0.1 CPU
+    worker_cpus = max(0.1, cpus - _CPUS_SAFE_MARGIN)  # ensure at least 0.1 CPU
     worker_ram = int(ram * (1 - _RAM_SAFE_MARGIN_RATIO))  # apply safe margin
 
     return (worker_cpus, worker_ram)

From 4cc9bb3a324d10afd309540daeca8ae8e9e04cf5 Mon Sep 17 00:00:00 2001
From: Sylvain <35365065+sanderegg@users.noreply.github.com>
Date: Mon, 20 Oct 2025 17:49:56 +0200
Subject: [PATCH 83/93] Update
 services/autoscaling/src/simcore_service_autoscaling/modules/dask.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 .../autoscaling/src/simcore_service_autoscaling/modules/dask.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index 546a7ad9917f..cf16afc2b5fc 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -365,7 +365,7 @@ def add_instance_generic_resources(
 def add_instance_type_generic_resource(
     settings: DaskMonitoringSettings, instance_type: EC2InstanceType
 ) -> None:
-    instance_threads = min(1, round(instance_type.resources.cpus))
+    instance_threads = max(1, round(instance_type.resources.cpus))
     if settings.DASK_NTHREADS > 0:
         # this overrides everything
         instance_threads = settings.DASK_NTHREADS

From ed8fcd0ededaf5cca614f634fd653aa691a17519 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Mon, 20 Oct 2025 17:55:24 +0200
Subject: [PATCH 84/93] fix return value

---
 .../modules/cluster_scaling/_auto_scaling_core.py          | 6 +++---
 .../modules/cluster_scaling/_provider_computational.py     | 7 ++++---
 .../modules/cluster_scaling/_provider_dynamic.py           | 4 ++--
 .../modules/cluster_scaling/_provider_protocol.py          | 2 +-
 4 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py
index 37c7dcd6102d..535df02d3cf2 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_auto_scaling_core.py
@@ -377,10 +377,10 @@ def _as_selection(instance_type: EC2InstanceType) -> int:
         # NOTE: will raise ValueError if allowed_instance_types not in allowed_instance_type_names
         return allowed_instance_type_names.index(f"{instance_type.name}")
 
-    allowed_instance_types.sort(key=_as_selection)
-    for instance_type in allowed_instance_types:
+    return [
         auto_scaling_mode.adjust_instance_type_resources(app, instance_type)
-    return allowed_instance_types
+        for instance_type in sorted(allowed_instance_types, key=_as_selection)
+    ]
 
 
 async def _activate_and_notify(
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
index 9701bc19fe79..d580868627f2 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_computational.py
@@ -199,7 +199,7 @@ def add_instance_generic_resources(
 
     def adjust_instance_type_resources(
         self, app: FastAPI, instance_type: EC2InstanceType
-    ) -> None:
+    ) -> EC2InstanceType:
         assert self  # nosec
         assert app  # nosec
         app_settings = get_application_settings(app)
@@ -207,12 +207,13 @@ def adjust_instance_type_resources(
         adjusted_cpus, adjusted_ram = estimate_dask_worker_resources_from_ec2_instance(
             instance_type.resources.cpus, instance_type.resources.ram
         )
-        dataclasses.replace(
+        replaced_instance_type = dataclasses.replace(
             instance_type,
             resources=instance_type.resources.model_copy(
                 update={"cpus": adjusted_cpus, "ram": ByteSize(adjusted_ram)}
             ),
         )
         dask.add_instance_type_generic_resource(
-            app_settings.AUTOSCALING_DASK, instance_type
+            app_settings.AUTOSCALING_DASK, replaced_instance_type
         )
+        return replaced_instance_type
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py
index e6b40c22b14a..7aba033ba1fb 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py
@@ -129,7 +129,7 @@ def add_instance_generic_resources(
 
     def adjust_instance_type_resources(
         self, app: FastAPI, instance_type: EC2InstanceType
-    ) -> None:
+    ) -> EC2InstanceType:
         assert self  # nosec
         assert app  # nosec
         # nothing to do at the moment
@@ -139,7 +139,7 @@ def adjust_instance_type_resources(
             - _MACHINE_TOTAL_RAM_SAFE_MARGIN_RATIO * instance_type.resources.ram
             - _SIDECARS_OPS_SAFE_RAM_MARGIN
         )
-        dataclasses.replace(
+        return dataclasses.replace(
             instance_type,
             resources=instance_type.resources.model_copy(
                 update={"cpus": adjusted_cpus, "ram": ByteSize(adjusted_ram)}
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py
index 62003854fef3..d2f711229c4f 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_protocol.py
@@ -55,4 +55,4 @@ def add_instance_generic_resources(
 
     def adjust_instance_type_resources(
         self, app: FastAPI, instance_type: EC2InstanceType
-    ) -> None: ...
+    ) -> EC2InstanceType: ...

From e23a7897c83056666cc15e03daa9fbe06f54e773 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Tue, 21 Oct 2025 17:20:04 +0200
Subject: [PATCH 85/93] created a base function to compute resources

---
 .../src/servicelib/docker_utils.py            | 30 +++++++++++++++++++
 .../cluster_scaling/_provider_dynamic.py      | 24 +++++----------
 .../projects/_projects_service.py             | 29 +++++++++---------
 3 files changed, 52 insertions(+), 31 deletions(-)

diff --git a/packages/service-library/src/servicelib/docker_utils.py b/packages/service-library/src/servicelib/docker_utils.py
index a919cb9487d7..374c05595beb 100644
--- a/packages/service-library/src/servicelib/docker_utils.py
+++ b/packages/service-library/src/servicelib/docker_utils.py
@@ -326,3 +326,33 @@ async def _pull_image_with_retry() -> None:
                 )
 
         await _pull_image_with_retry()
+
+
+_CPUS_SAFE_MARGIN: Final[float] = (
+    1.4  # accounts for machine overhead (ops + sidecar itself)
+)
+_MACHINE_TOTAL_RAM_SAFE_MARGIN_RATIO: Final[float] = (
+    0.1  # NOTE: machines always have less available RAM than advertised
+)
+_SIDECARS_OPS_SAFE_RAM_MARGIN: Final[ByteSize] = TypeAdapter(ByteSize).validate_python(
+    "1GiB"
+)
+DYNAMIC_SIDECAR_MIN_CPUS: Final[float] = 0.5
+
+
+def estimate_dynamic_sidecar_resources_from_ec2_instance(
+    cpus: float, ram: int
+) -> tuple[float, int]:
+    """Estimates the resources available to a dynamic-sidecar running in an EC2 instance,
+    taking into account safe margins for CPU and RAM, as the EC2 full resources are not completely visible
+
+    Returns:
+        tuple: Estimated resources for the dynamic-sidecar (cpus, ram).
+    """
+    # dynamic-sidecar usually needs less CPU
+    sidecar_cpus = max(DYNAMIC_SIDECAR_MIN_CPUS, cpus - _CPUS_SAFE_MARGIN)
+    sidecar_ram = int(
+        ram - _MACHINE_TOTAL_RAM_SAFE_MARGIN_RATIO * ram - _SIDECARS_OPS_SAFE_RAM_MARGIN
+    )
+
+    return (sidecar_cpus, sidecar_ram)
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py
index 7aba033ba1fb..d7499dc92e1a 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_provider_dynamic.py
@@ -1,12 +1,12 @@
 import dataclasses
-from typing import Final
 
 from aws_library.ec2 import EC2InstanceData, EC2Tags, Resources
 from aws_library.ec2._models import EC2InstanceType
 from fastapi import FastAPI
 from models_library.docker import DockerLabelKey
 from models_library.generated_models.docker_rest_api import Node, Task
-from pydantic import ByteSize, TypeAdapter
+from pydantic import ByteSize
+from servicelib.docker_utils import estimate_dynamic_sidecar_resources_from_ec2_instance
 from types_aiobotocore_ec2.literals import InstanceTypeType
 
 from ...core.settings import get_application_settings
@@ -14,15 +14,6 @@
 from ...utils import utils_docker, utils_ec2
 from ..docker import get_docker_client
 
-_MACHINE_TOTAL_RAM_SAFE_MARGIN_RATIO: Final[float] = (
-    0.1  # NOTE: machines always have less available RAM than advertised
-)
-_SIDECARS_OPS_SAFE_RAM_MARGIN: Final[ByteSize] = TypeAdapter(ByteSize).validate_python(
-    "1GiB"
-)
-_CPUS_SAFE_MARGIN: Final[float] = 1.4
-_MIN_NUM_CPUS: Final[float] = 0.5
-
 
 class DynamicAutoscalingProvider:
     async def get_monitored_nodes(self, app: FastAPI) -> list[Node]:
@@ -132,13 +123,12 @@ def adjust_instance_type_resources(
     ) -> EC2InstanceType:
         assert self  # nosec
         assert app  # nosec
-        # nothing to do at the moment
-        adjusted_cpus = float(instance_type.resources.cpus) - _CPUS_SAFE_MARGIN
-        adjusted_ram = int(
-            instance_type.resources.ram
-            - _MACHINE_TOTAL_RAM_SAFE_MARGIN_RATIO * instance_type.resources.ram
-            - _SIDECARS_OPS_SAFE_RAM_MARGIN
+        adjusted_cpus, adjusted_ram = (
+            estimate_dynamic_sidecar_resources_from_ec2_instance(
+                instance_type.resources.cpus, instance_type.resources.ram
+            )
         )
+
         return dataclasses.replace(
             instance_type,
             resources=instance_type.resources.model_copy(
diff --git a/services/web/server/src/simcore_service_webserver/projects/_projects_service.py b/services/web/server/src/simcore_service_webserver/projects/_projects_service.py
index 9bd620d3d23b..4d244d2c1f35 100644
--- a/services/web/server/src/simcore_service_webserver/projects/_projects_service.py
+++ b/services/web/server/src/simcore_service_webserver/projects/_projects_service.py
@@ -88,6 +88,10 @@
     X_FORWARDED_PROTO,
     X_SIMCORE_USER_AGENT,
 )
+from servicelib.docker_utils import (
+    DYNAMIC_SIDECAR_MIN_CPUS,
+    estimate_dynamic_sidecar_resources_from_ec2_instance,
+)
 from servicelib.logging_utils import log_context
 from servicelib.rabbitmq import RemoteMethodNotRegisteredError, RPCServerError
 from servicelib.rabbitmq.rpc_interfaces.catalog import services as catalog_rpc
@@ -652,12 +656,12 @@ def _by_type_name(ec2: EC2InstanceTypeGet) -> bool:
             app, user_id, project_id, node_id, service_key, service_version
         )
         scalable_service_name = DEFAULT_SINGLE_SERVICE_NAME
-        new_cpus_value = float(selected_ec2_instance_type.cpus) - _CPUS_SAFE_MARGIN
-        new_ram_value = int(
-            selected_ec2_instance_type.ram
-            - _MACHINE_TOTAL_RAM_SAFE_MARGIN_RATIO * selected_ec2_instance_type.ram
-            - _SIDECARS_OPS_SAFE_RAM_MARGIN
+        new_cpus_value, new_ram_value = (
+            estimate_dynamic_sidecar_resources_from_ec2_instance(
+                selected_ec2_instance_type.cpus, selected_ec2_instance_type.ram
+            )
         )
+
         if DEFAULT_SINGLE_SERVICE_NAME not in node_resources:
             # NOTE: we go for the largest sub-service and scale it up/down
             scalable_service_name, hungry_service_resources = max(
@@ -680,17 +684,14 @@ def _by_type_name(ec2: EC2InstanceTypeGet) -> bool:
                         }
                     )
             new_cpus_value = max(
-                float(selected_ec2_instance_type.cpus)
-                - _CPUS_SAFE_MARGIN
-                - other_services_resources["CPU"],
-                _MIN_NUM_CPUS,
+                new_cpus_value - other_services_resources["CPU"],
+                DYNAMIC_SIDECAR_MIN_CPUS,
             )
-            new_ram_value = int(
-                selected_ec2_instance_type.ram
-                - _MACHINE_TOTAL_RAM_SAFE_MARGIN_RATIO * selected_ec2_instance_type.ram
-                - other_services_resources["RAM"]
-                - _SIDECARS_OPS_SAFE_RAM_MARGIN
+
+            new_ram_value = max(
+                int(new_ram_value - other_services_resources["RAM"]), 128 * 1024 * 1024
             )
+
         # scale the service
         node_resources[scalable_service_name].resources["CPU"].set_value(new_cpus_value)
         node_resources[scalable_service_name].resources["RAM"].set_value(new_ram_value)

From 525954cdf015b87bfbb9dd9eee713bed374339d0 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Tue, 21 Oct 2025 17:57:33 +0200
Subject: [PATCH 86/93] fixed tests

---
 .../unit/test_modules_cluster_scaling_dynamic.py   | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/services/autoscaling/tests/unit/test_modules_cluster_scaling_dynamic.py b/services/autoscaling/tests/unit/test_modules_cluster_scaling_dynamic.py
index bf9e42a1ec67..b16ac41233f0 100644
--- a/services/autoscaling/tests/unit/test_modules_cluster_scaling_dynamic.py
+++ b/services/autoscaling/tests/unit/test_modules_cluster_scaling_dynamic.py
@@ -978,7 +978,7 @@ async def _assert_wait_for_ec2_instances_terminated() -> None:
             _ScaleUpParams(
                 imposed_instance_type=None,
                 service_resources=Resources(
-                    cpus=4, ram=TypeAdapter(ByteSize).validate_python("128Gib")
+                    cpus=4, ram=TypeAdapter(ByteSize).validate_python("114Gib")
                 ),
                 num_services=1,
                 expected_instance_type="r5n.4xlarge",
@@ -990,7 +990,7 @@ async def _assert_wait_for_ec2_instances_terminated() -> None:
             _ScaleUpParams(
                 imposed_instance_type="t2.xlarge",
                 service_resources=Resources(
-                    cpus=4, ram=TypeAdapter(ByteSize).validate_python("4Gib")
+                    cpus=2.6, ram=TypeAdapter(ByteSize).validate_python("4Gib")
                 ),
                 num_services=1,
                 expected_instance_type="t2.xlarge",
@@ -1002,7 +1002,7 @@ async def _assert_wait_for_ec2_instances_terminated() -> None:
             _ScaleUpParams(
                 imposed_instance_type="r5n.8xlarge",
                 service_resources=Resources(
-                    cpus=4, ram=TypeAdapter(ByteSize).validate_python("128Gib")
+                    cpus=4, ram=TypeAdapter(ByteSize).validate_python("114Gib")
                 ),
                 num_services=1,
                 expected_instance_type="r5n.8xlarge",
@@ -1165,7 +1165,7 @@ async def test_cluster_scaling_up_and_down_against_aws(
                 ),
                 num_services=10,
                 expected_instance_type="r5n.4xlarge",  # 1 GPU, 16 CPUs, 128GiB
-                expected_num_instances=4,
+                expected_num_instances=5,
             ),
             id="sim4life-light",
         ),
@@ -1254,7 +1254,7 @@ async def test_cluster_scaling_up_starts_multiple_instances(
             _ScaleUpParams(
                 imposed_instance_type="g4dn.2xlarge",  # 1 GPU, 8 CPUs, 32GiB
                 service_resources=Resources(
-                    cpus=8, ram=TypeAdapter(ByteSize).validate_python("15Gib")
+                    cpus=6.6, ram=TypeAdapter(ByteSize).validate_python("15Gib")
                 ),
                 num_services=12,
                 expected_instance_type="g4dn.2xlarge",  # 1 GPU, 8 CPUs, 32GiB
@@ -1263,7 +1263,7 @@ async def test_cluster_scaling_up_starts_multiple_instances(
             _ScaleUpParams(
                 imposed_instance_type="g4dn.8xlarge",  # 32CPUs, 128GiB
                 service_resources=Resources(
-                    cpus=32, ram=TypeAdapter(ByteSize).validate_python("20480MB")
+                    cpus=30.6, ram=TypeAdapter(ByteSize).validate_python("20480MB")
                 ),
                 num_services=7,
                 expected_instance_type="g4dn.8xlarge",  # 32CPUs, 128GiB
@@ -1556,7 +1556,7 @@ async def test_cluster_adapts_machines_on_the_fly(  # noqa: PLR0915
             _ScaleUpParams(
                 imposed_instance_type=None,
                 service_resources=Resources(
-                    cpus=4, ram=TypeAdapter(ByteSize).validate_python("128Gib")
+                    cpus=4, ram=TypeAdapter(ByteSize).validate_python("114Gib")
                 ),
                 num_services=1,
                 expected_instance_type="r5n.4xlarge",

From fc095151cbf99d17af9a32c97e432739ccad13a0 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Tue, 21 Oct 2025 17:58:50 +0200
Subject: [PATCH 87/93] pylint

---
 .../autoscaling/tests/unit/test_modules_instrumentation_core.py  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/services/autoscaling/tests/unit/test_modules_instrumentation_core.py b/services/autoscaling/tests/unit/test_modules_instrumentation_core.py
index b3a843d8adbe..8b1eff250e9a 100644
--- a/services/autoscaling/tests/unit/test_modules_instrumentation_core.py
+++ b/services/autoscaling/tests/unit/test_modules_instrumentation_core.py
@@ -12,6 +12,7 @@
 def disabled_instrumentation(
     app_environment: EnvVarsDict, monkeypatch: pytest.MonkeyPatch
 ) -> None:
+    assert app_environment
     monkeypatch.setenv("AUTOSCALING_PROMETHEUS_INSTRUMENTATION_ENABLED", "false")
 
 

From a7d975be232fd9f65c0bcd1d1f6547869d31290c Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Tue, 21 Oct 2025 17:59:02 +0200
Subject: [PATCH 88/93] linter

---
 .../tests/unit/test_modules_instrumentation_core.py       | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/services/autoscaling/tests/unit/test_modules_instrumentation_core.py b/services/autoscaling/tests/unit/test_modules_instrumentation_core.py
index 8b1eff250e9a..ffc8d87bcb9d 100644
--- a/services/autoscaling/tests/unit/test_modules_instrumentation_core.py
+++ b/services/autoscaling/tests/unit/test_modules_instrumentation_core.py
@@ -1,3 +1,11 @@
+# pylint: disable=no-value-for-parameter
+# pylint: disable=redefined-outer-name
+# pylint: disable=too-many-arguments
+# pylint: disable=too-many-positional-arguments
+# pylint: disable=too-many-statements
+# pylint: disable=unused-argument
+# pylint: disable=unused-variable
+
 import pytest
 from fastapi import FastAPI
 from pytest_simcore.helpers.typing_env import EnvVarsDict

From c998a25f7a76e3d9d56033b8f2cf8f8cc01d7043 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Tue, 21 Oct 2025 18:29:26 +0200
Subject: [PATCH 89/93] fixed tests

---
 ...t_modules_cluster_scaling_computational.py | 29 +++++++++++++------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py b/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py
index 5f51a4f34c19..bba8531e0032 100644
--- a/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py
+++ b/services/autoscaling/tests/unit/test_modules_cluster_scaling_computational.py
@@ -22,7 +22,9 @@
 import pytest
 from aws_library.ec2 import Resources
 from dask_task_models_library.resource_constraints import (
+    DASK_WORKER_THREAD_RESOURCE_NAME,
     create_ec2_resource_constraint_key,
+    estimate_dask_worker_resources_from_ec2_instance,
 )
 from faker import Faker
 from fastapi import FastAPI
@@ -259,16 +261,25 @@ async def _create_task_with_resources(
         instance_types = await ec2_client.describe_instance_types(
             InstanceTypes=[dask_task_imposed_ec2_type]
         )
+
         assert instance_types
         assert "InstanceTypes" in instance_types
         assert instance_types["InstanceTypes"]
         assert "MemoryInfo" in instance_types["InstanceTypes"][0]
         assert "SizeInMiB" in instance_types["InstanceTypes"][0]["MemoryInfo"]
+        ec2_ram = TypeAdapter(ByteSize).validate_python(
+            f"{instance_types['InstanceTypes'][0]['MemoryInfo']['SizeInMiB']}MiB",
+        )
+        assert "VCpuInfo" in instance_types["InstanceTypes"][0]
+        assert "DefaultVCpus" in instance_types["InstanceTypes"][0]["VCpuInfo"]
+        ec2_cpus = instance_types["InstanceTypes"][0]["VCpuInfo"]["DefaultVCpus"]
+        required_cpus, required_ram = estimate_dask_worker_resources_from_ec2_instance(
+            ec2_cpus, ec2_ram
+        )
         task_resources = Resources(
-            cpus=1,
-            ram=TypeAdapter(ByteSize).validate_python(
-                f"{instance_types['InstanceTypes'][0]['MemoryInfo']['SizeInMiB']}MiB",
-            ),
+            cpus=required_cpus,
+            ram=ByteSize(required_ram),
+            generic_resources={DASK_WORKER_THREAD_RESOURCE_NAME: 1},
         )
 
     assert task_resources
@@ -443,7 +454,7 @@ async def test_cluster_scaling_with_task_with_too_much_resources_starts_nothing(
             _ScaleUpParams(
                 imposed_instance_type=None,
                 task_resources=Resources(
-                    cpus=1, ram=TypeAdapter(ByteSize).validate_python("128Gib")
+                    cpus=1, ram=TypeAdapter(ByteSize).validate_python("115Gib")
                 ),
                 num_tasks=1,
                 expected_instance_type="r5n.4xlarge",
@@ -465,7 +476,7 @@ async def test_cluster_scaling_with_task_with_too_much_resources_starts_nothing(
             _ScaleUpParams(
                 imposed_instance_type="r5n.8xlarge",
                 task_resources=Resources(
-                    cpus=1, ram=TypeAdapter(ByteSize).validate_python("116Gib")
+                    cpus=1, ram=TypeAdapter(ByteSize).validate_python("115Gib")
                 ),
                 num_tasks=1,
                 expected_instance_type="r5n.8xlarge",
@@ -1281,7 +1292,7 @@ async def test_cluster_scaling_up_more_than_allowed_with_multiple_types_max_star
             _ScaleUpParams(
                 imposed_instance_type=None,
                 task_resources=Resources(
-                    cpus=1, ram=TypeAdapter(ByteSize).validate_python("128Gib")
+                    cpus=1, ram=TypeAdapter(ByteSize).validate_python("115Gib")
                 ),
                 num_tasks=1,
                 expected_instance_type="r5n.4xlarge",
@@ -1456,7 +1467,7 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted(
             _ScaleUpParams(
                 imposed_instance_type="g4dn.2xlarge",  # 1 GPU, 8 CPUs, 32GiB
                 task_resources=Resources(
-                    cpus=8, ram=TypeAdapter(ByteSize).validate_python("15Gib")
+                    cpus=7.9, ram=TypeAdapter(ByteSize).validate_python("15Gib")
                 ),
                 num_tasks=12,
                 expected_instance_type="g4dn.2xlarge",  # 1 GPU, 8 CPUs, 32GiB
@@ -1465,7 +1476,7 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted(
             _ScaleUpParams(
                 imposed_instance_type="g4dn.8xlarge",  # 32CPUs, 128GiB
                 task_resources=Resources(
-                    cpus=32, ram=TypeAdapter(ByteSize).validate_python("20480MB")
+                    cpus=31.9, ram=TypeAdapter(ByteSize).validate_python("20480MB")
                 ),
                 num_tasks=7,
                 expected_instance_type="g4dn.8xlarge",  # 32CPUs, 128GiB

From 6371b25502a76809b6c511a86cbc68d7fde43c11 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Tue, 21 Oct 2025 18:29:33 +0200
Subject: [PATCH 90/93] fixed types

---
 .../src/dask_task_models_library/resource_constraints.py        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py b/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py
index 715c037f107b..7770ba74050a 100644
--- a/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py
+++ b/packages/dask-task-models-library/src/dask_task_models_library/resource_constraints.py
@@ -36,7 +36,7 @@ def get_ec2_instance_type_from_resources(
 
 def estimate_dask_worker_resources_from_ec2_instance(
     cpus: float, ram: int
-) -> tuple[float, float]:
+) -> tuple[float, int]:
     """Estimates the resources available to a dask worker running in an EC2 instance,
     taking into account safe margins for CPU and RAM.
 

From c773ac1cd8fa97da42a398995adda312c355b638 Mon Sep 17 00:00:00 2001
From: Sylvain <35365065+sanderegg@users.noreply.github.com>
Date: Tue, 21 Oct 2025 21:36:23 +0200
Subject: [PATCH 91/93] Update
 services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 .../modules/cluster_scaling/_utils_computational.py             | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
index f5ed682f6669..1b5225966809 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/cluster_scaling/_utils_computational.py
@@ -27,7 +27,7 @@
 def resources_from_dask_task(task: DaskTask) -> Resources:
     task_resources = (
         _DEFAULT_DASK_RESOURCES | task.required_resources
-    )  # merge with defaults to ensure there is always some minimal resource defined
+    )  # merge defaults with task resources (task resources override defaults)
 
     return Resources.from_flat_dict(
         cast(dict[str, GenericResourceValueType], task_resources),

From 29d4211419cc72f64bef989cea35e30887b24347 Mon Sep 17 00:00:00 2001
From: Sylvain <35365065+sanderegg@users.noreply.github.com>
Date: Tue, 21 Oct 2025 21:36:51 +0200
Subject: [PATCH 92/93] Update
 packages/aws-library/src/aws_library/ec2/_models.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 packages/aws-library/src/aws_library/ec2/_models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/aws-library/src/aws_library/ec2/_models.py b/packages/aws-library/src/aws_library/ec2/_models.py
index 09f49bb6aae4..d5ff855ad355 100644
--- a/packages/aws-library/src/aws_library/ec2/_models.py
+++ b/packages/aws-library/src/aws_library/ec2/_models.py
@@ -292,8 +292,8 @@ def validate_bash_calls(cls, v):
                 # NOTE: this will not capture runtime errors, but at least some syntax errors such as invalid quotes
                 sh.bash(
                     "-n",
-                    temp_file.name,  # pyright: ignore[reportCallIssue]
-                )  # sh is untyped, but this call is safe for bash syntax checking
+                    temp_file.name,  # pyright: ignore[reportCallIssue] - sh is untyped but safe for bash syntax checking
+                )
         except sh.ErrorReturnCode as exc:
             msg = f"Invalid bash call in custom_boot_scripts: {v}, Error: {exc.stderr}"
             raise ValueError(msg) from exc

From f2bcb526e12eb9d8966ff96e4dd8a6eff46016a4 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Wed, 22 Oct 2025 18:43:17 +0200
Subject: [PATCH 93/93] @pcrespov review: add some more string comparisons

---
 .../aws-library/src/aws_library/ec2/_models.py    | 10 ++++++++++
 packages/aws-library/tests/test_ec2_models.py     | 15 +++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/packages/aws-library/src/aws_library/ec2/_models.py b/packages/aws-library/src/aws_library/ec2/_models.py
index d5ff855ad355..3e5a2b00691d 100644
--- a/packages/aws-library/src/aws_library/ec2/_models.py
+++ b/packages/aws-library/src/aws_library/ec2/_models.py
@@ -1,3 +1,4 @@
+import contextlib
 import datetime
 import re
 import tempfile
@@ -17,6 +18,8 @@
     StrictFloat,
     StrictInt,
     StringConstraints,
+    TypeAdapter,
+    ValidationError,
     field_validator,
 )
 from pydantic.config import JsonDict
@@ -82,8 +85,15 @@ def __gt__(self, other: "Resources") -> bool:
                     return False
             else:
                 # remaining options is a is str and b is str or mixed types
+                # NOTE: we cannot compare strings unless they are equal or some kind of boolean (e.g. "true", "false", "yes", "no", "1", "0")
                 assert isinstance(a, str)  # nosec
                 assert isinstance(b, int | float | str)  # nosec
+                # let's try to get a boolean out of the values to compare them
+                with contextlib.suppress(ValidationError):
+                    a_as_boolean = TypeAdapter(bool).validate_python(a)
+                    b_as_boolean = TypeAdapter(bool).validate_python(b)
+                    if not a_as_boolean and b_as_boolean:
+                        return False
 
         # here we have either everything greater or equal or non-comparable strings
 
diff --git a/packages/aws-library/tests/test_ec2_models.py b/packages/aws-library/tests/test_ec2_models.py
index 0b1fa016fa79..22f03a0bd102 100644
--- a/packages/aws-library/tests/test_ec2_models.py
+++ b/packages/aws-library/tests/test_ec2_models.py
@@ -83,6 +83,16 @@
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "yes"}),
             True,
         ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "yes"}),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "no"}),
+            True,
+        ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "no"}),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "yes"}),
+            False,
+        ),
         (
             Resources(cpus=0.1, ram=ByteSize(1)),
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "yes"}),
@@ -179,6 +189,11 @@ def test_resources_ge_operator(
             Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "no"}),
             True,
         ),
+        (
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "no"}),
+            Resources(cpus=0.1, ram=ByteSize(1), generic_resources={"SSE": "yes"}),
+            False,
+        ),
     ],
 )
 def test_resources_gt_operator(a: Resources, b: Resources, a_greater_than_b: bool):