Fix ML autoscaling (classic cloud) for models with zero allocations

jan-elastic · jan-elastic · commit aee4db282618 · 2024-10-18T10:54:15.000+02:00
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingContext.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingContext.java
@@ -180,6 +180,13 @@ public boolean isEmpty() {
             && modelAssignments.isEmpty();
     }
 
+    public boolean hasOnlyZeroAllocationModels() {
+        return anomalyDetectionTasks.isEmpty()
+            && snapshotUpgradeTasks.isEmpty()
+            && dataframeAnalyticsTasks.isEmpty()
+            && modelAssignments.values().stream().allMatch(assignment -> assignment.totalTargetAllocations() == 0);
+    }
+
     public List<String> findPartiallyAllocatedModels() {
         return modelAssignments.entrySet()
             .stream()
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderService.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderService.java
@@ -131,7 +131,7 @@ public AutoscalingDeciderResult scale(Settings configuration, AutoscalingDecider
             .setPassedConfiguration(configuration);
 
         // We don't need to check anything as there are no tasks
-        if (mlContext.isEmpty()) {
+        if (mlContext.isEmpty() || mlContext.hasOnlyZeroAllocationModels()) {
             // This is a quick path to downscale.
             // simply return `0` for scale down if delay is satisfied
             return downscaleToZero(configuration, context, currentNativeMemoryCapacity, reasonBuilder);
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java
@@ -29,6 +29,12 @@
 import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingDeciderContext;
 import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingDeciderResult;
 import org.elasticsearch.xpack.core.ml.MachineLearningField;
+import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction;
+import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings;
+import org.elasticsearch.xpack.core.ml.inference.assignment.AssignmentState;
+import org.elasticsearch.xpack.core.ml.inference.assignment.Priority;
+import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignment;
+import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignmentMetadata;
 import org.elasticsearch.xpack.core.ml.job.config.JobState;
 import org.elasticsearch.xpack.ml.MachineLearning;
 import org.elasticsearch.xpack.ml.job.NodeLoad;
@@ -262,6 +268,69 @@ public void testScale_GivenUndeterminedMemory_ShouldReturnNullCapacity() {
         assertThat(result.requiredCapacity(), is(nullValue()));
     }
 
+    public void testScale_GivenModelWithZeroAllocations() {
+        MlAutoscalingDeciderService service = buildService();
+        service.onMaster();
+
+        ClusterState clusterState = new ClusterState.Builder(new ClusterName("cluster")).metadata(
+            Metadata.builder()
+                .putCustom(
+                    TrainedModelAssignmentMetadata.NAME,
+                    new TrainedModelAssignmentMetadata(
+                        Map.of(
+                            "model-with-zero-allocations",
+                            TrainedModelAssignment.Builder.empty(
+                                new StartTrainedModelDeploymentAction.TaskParams(
+                                    "model-with-zero-allocations",
+                                    "model-with-zero-allocations-deployment",
+                                    400,
+                                    0,
+                                    2,
+                                    100,
+                                    null,
+                                    Priority.NORMAL,
+                                    0L,
+                                    0L
+                                ),
+                                new AdaptiveAllocationsSettings(true, 0, 4)
+                            ).setAssignmentState(AssignmentState.STARTED).build()
+                        )
+                    )
+                )
+                .build()
+        ).nodes(DiscoveryNodes.builder().add(buildNode("ml-node", ByteSizeValue.ofGb(4), 8)).build()).build();
+
+        AutoscalingDeciderResult result = service.scale(
+            Settings.EMPTY,
+            new DeciderContext(
+                clusterState,
+                new AutoscalingCapacity(
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null),
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null)
+                )
+            )
+        );
+        // First call doesn't downscale as delay has not been satisfied
+        assertThat(result.reason().summary(), containsString("down scale delay has not been satisfied"));
+
+        // Let's move time forward 1 hour
+        timeSupplier.setOffset(TimeValue.timeValueHours(1));
+
+        result = service.scale(
+            Settings.EMPTY,
+            new DeciderContext(
+                clusterState,
+                new AutoscalingCapacity(
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null),
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null)
+                )
+            )
+        );
+        assertThat(result.reason().summary(), equalTo("Requesting scale down as tier and/or node size could be smaller"));
+        assertThat(result.requiredCapacity().total().memory().getBytes(), equalTo(0L));
+        assertThat(result.requiredCapacity().node().memory().getBytes(), equalTo(0L));
+    }
+
     private DiscoveryNode buildNode(String id, ByteSizeValue machineMemory, int allocatedProcessors) {
         return DiscoveryNodeUtils.create(
             id,