elastic · jan-elastic · Oct 21, 2024 · Oct 18, 2024
diff --git a/.../plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingContext.java b/.../plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingContext.java
@@ -177,7 +177,7 @@ public boolean isEmpty() {
         return anomalyDetectionTasks.isEmpty()
             && snapshotUpgradeTasks.isEmpty()
             && dataframeAnalyticsTasks.isEmpty()
-            && modelAssignments.isEmpty();
+            && modelAssignments.values().stream().allMatch(assignment -> assignment.totalTargetAllocations() == 0);
     }
 
     public List<String> findPartiallyAllocatedModels() {

diff --git a/...rc/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java b/...rc/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java
@@ -29,6 +29,12 @@
 import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingDeciderContext;
 import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingDeciderResult;
 import org.elasticsearch.xpack.core.ml.MachineLearningField;
+import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction;
+import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings;
+import org.elasticsearch.xpack.core.ml.inference.assignment.AssignmentState;
+import org.elasticsearch.xpack.core.ml.inference.assignment.Priority;
+import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignment;
+import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignmentMetadata;
 import org.elasticsearch.xpack.core.ml.job.config.JobState;
 import org.elasticsearch.xpack.ml.MachineLearning;
 import org.elasticsearch.xpack.ml.job.NodeLoad;
@@ -262,6 +268,69 @@ public void testScale_GivenUndeterminedMemory_ShouldReturnNullCapacity() {
         assertThat(result.requiredCapacity(), is(nullValue()));
     }
 
+    public void testScale_GivenModelWithZeroAllocations() {
+        MlAutoscalingDeciderService service = buildService();
+        service.onMaster();
+
+        ClusterState clusterState = new ClusterState.Builder(new ClusterName("cluster")).metadata(
+            Metadata.builder()
+                .putCustom(
+                    TrainedModelAssignmentMetadata.NAME,
+                    new TrainedModelAssignmentMetadata(
+                        Map.of(
+                            "model-with-zero-allocations",
+                            TrainedModelAssignment.Builder.empty(
+                                new StartTrainedModelDeploymentAction.TaskParams(
+                                    "model-with-zero-allocations",
+                                    "model-with-zero-allocations-deployment",
+                                    400,
+                                    0,
+                                    2,
+                                    100,
+                                    null,
+                                    Priority.NORMAL,
+                                    0L,
+                                    0L
+                                ),
+                                new AdaptiveAllocationsSettings(true, 0, 4)
+                            ).setAssignmentState(AssignmentState.STARTED).build()
+                        )
+                    )
+                )
+                .build()
+        ).nodes(DiscoveryNodes.builder().add(buildNode("ml-node", ByteSizeValue.ofGb(4), 8)).build()).build();
+
+        AutoscalingDeciderResult result = service.scale(
+            Settings.EMPTY,
+            new DeciderContext(
+                clusterState,
+                new AutoscalingCapacity(
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null),
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null)
+                )
+            )
+        );
+        // First call doesn't downscale as delay has not been satisfied
+        assertThat(result.reason().summary(), containsString("down scale delay has not been satisfied"));
+
+        // Let's move time forward 1 hour
+        timeSupplier.setOffset(TimeValue.timeValueHours(1));
+
+        result = service.scale(
+            Settings.EMPTY,
+            new DeciderContext(
+                clusterState,
+                new AutoscalingCapacity(
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null),
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null)
+                )
+            )
+        );
+        assertThat(result.reason().summary(), equalTo("Requesting scale down as tier and/or node size could be smaller"));
+        assertThat(result.requiredCapacity().total().memory().getBytes(), equalTo(0L));
+        assertThat(result.requiredCapacity().node().memory().getBytes(), equalTo(0L));
+    }
+
     private DiscoveryNode buildNode(String id, ByteSizeValue machineMemory, int allocatedProcessors) {
         return DiscoveryNodeUtils.create(
             id,