Fix scale up for model allocations (#115189)

jan-elastic · jan-elastic · commit 8d6248d54422 · 2024-10-21T13:12:58.000+02:00
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingContext.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingContext.java
@@ -177,7 +177,7 @@ public boolean isEmpty() {
         return anomalyDetectionTasks.isEmpty()
             && snapshotUpgradeTasks.isEmpty()
             && dataframeAnalyticsTasks.isEmpty()
-            && modelAssignments.isEmpty();
+            && modelAssignments.values().stream().allMatch(assignment -> assignment.getTaskParams().getNumberOfAllocations() == 0);
     }
 
     public List<String> findPartiallyAllocatedModels() {
diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java
@@ -48,6 +48,7 @@
 import static org.elasticsearch.xpack.ml.utils.NativeMemoryCalculator.STATIC_JVM_UPPER_THRESHOLD;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThan;
 import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.nullValue;
 import static org.mockito.ArgumentMatchers.any;
@@ -262,6 +263,116 @@ public void testScale_GivenUndeterminedMemory_ShouldReturnNullCapacity() {
         assertThat(result.requiredCapacity(), is(nullValue()));
     }
 
+    public void testScale_GivenModelWithZeroAllocations() {
+        MlAutoscalingDeciderService service = buildService();
+        service.onMaster();
+
+        ClusterState clusterState = new ClusterState.Builder(new ClusterName("cluster")).metadata(
+            Metadata.builder()
+                .putCustom(
+                    TrainedModelAssignmentMetadata.NAME,
+                    new TrainedModelAssignmentMetadata(
+                        Map.of(
+                            "model-with-zero-allocations",
+                            TrainedModelAssignment.Builder.empty(
+                                new StartTrainedModelDeploymentAction.TaskParams(
+                                    "model-with-zero-allocations",
+                                    "model-with-zero-allocations-deployment",
+                                    400,
+                                    0,
+                                    2,
+                                    100,
+                                    null,
+                                    Priority.NORMAL,
+                                    0L,
+                                    0L
+                                ),
+                                new AdaptiveAllocationsSettings(true, 0, 4)
+                            ).setAssignmentState(AssignmentState.STARTED).build()
+                        )
+                    )
+                )
+                .build()
+        ).nodes(DiscoveryNodes.builder().add(buildNode("ml-node", ByteSizeValue.ofGb(4), 8)).build()).build();
+
+        AutoscalingDeciderResult result = service.scale(
+            Settings.EMPTY,
+            new DeciderContext(
+                clusterState,
+                new AutoscalingCapacity(
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null),
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null)
+                )
+            )
+        );
+        // First call doesn't downscale as delay has not been satisfied
+        assertThat(result.reason().summary(), containsString("down scale delay has not been satisfied"));
+
+        // Let's move time forward 1 hour
+        timeSupplier.setOffset(TimeValue.timeValueHours(1));
+
+        result = service.scale(
+            Settings.EMPTY,
+            new DeciderContext(
+                clusterState,
+                new AutoscalingCapacity(
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null),
+                    new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null)
+                )
+            )
+        );
+        assertThat(result.reason().summary(), equalTo("Requesting scale down as tier and/or node size could be smaller"));
+        assertThat(result.requiredCapacity().total().memory().getBytes(), equalTo(0L));
+        assertThat(result.requiredCapacity().node().memory().getBytes(), equalTo(0L));
+    }
+
+    public void testScale_GivenTrainedModelAllocationAndNoMlNode() {
+        MlAutoscalingDeciderService service = buildService();
+        service.onMaster();
+
+        ClusterState clusterState = new ClusterState.Builder(new ClusterName("cluster")).metadata(
+            Metadata.builder()
+                .putCustom(
+                    TrainedModelAssignmentMetadata.NAME,
+                    new TrainedModelAssignmentMetadata(
+                        Map.of(
+                            "model",
+                            TrainedModelAssignment.Builder.empty(
+                                new StartTrainedModelDeploymentAction.TaskParams(
+                                    "model",
+                                    "model-deployment",
+                                    400,
+                                    1,
+                                    2,
+                                    100,
+                                    null,
+                                    Priority.NORMAL,
+                                    0L,
+                                    0L
+                                ),
+                                new AdaptiveAllocationsSettings(true, 0, 4)
+                            ).setAssignmentState(AssignmentState.STARTING).build()
+                        )
+                    )
+                )
+                .build()
+        ).build();
+
+        AutoscalingDeciderResult result = service.scale(
+            Settings.EMPTY,
+            new DeciderContext(
+                clusterState,
+                new AutoscalingCapacity(AutoscalingCapacity.AutoscalingResources.ZERO, AutoscalingCapacity.AutoscalingResources.ZERO)
+            )
+        );
+
+        assertThat(result.reason().summary(), containsString("requesting scale up"));
+        assertThat(result.requiredCapacity().total().memory().getBytes(), greaterThan(TEST_JOB_SIZE));
+        assertThat(result.requiredCapacity().total().processors().count(), equalTo(2.0));
+        assertThat(result.requiredCapacity().node().memory().getBytes(), greaterThan(TEST_JOB_SIZE));
+        assertThat(result.requiredCapacity().node().processors().count(), equalTo(2.0));
+    }
+
     private DiscoveryNode buildNode(String id, ByteSizeValue machineMemory, int allocatedProcessors) {
         return DiscoveryNodeUtils.create(
             id,

Original file line number	Diff line number	Diff line change
`@@ -177,7 +177,7 @@ public boolean isEmpty() {`
`177`	`177`	`return anomalyDetectionTasks.isEmpty()`
`178`	`178`	`&& snapshotUpgradeTasks.isEmpty()`
`179`	`179`	`&& dataframeAnalyticsTasks.isEmpty()`
`180`		`- && modelAssignments.isEmpty();`
	`180`	`+ && modelAssignments.values().stream().allMatch(assignment -> assignment.getTaskParams().getNumberOfAllocations() == 0);`
`181`	`181`	`}`
`182`	`182`
`183`	`183`	`public List<String> findPartiallyAllocatedModels() {`