diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingContext.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingContext.java index cca59f27d5c76..f266dda6e3e5d 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingContext.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingContext.java @@ -177,7 +177,7 @@ public boolean isEmpty() { return anomalyDetectionTasks.isEmpty() && snapshotUpgradeTasks.isEmpty() && dataframeAnalyticsTasks.isEmpty() - && modelAssignments.isEmpty(); + && modelAssignments.values().stream().allMatch(assignment -> assignment.totalTargetAllocations() == 0); } public List findPartiallyAllocatedModels() { diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java index 632730bc7f141..a1db31c474f31 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/autoscaling/MlAutoscalingDeciderServiceTests.java @@ -29,6 +29,12 @@ import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingDeciderContext; import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingDeciderResult; import org.elasticsearch.xpack.core.ml.MachineLearningField; +import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction; +import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings; +import org.elasticsearch.xpack.core.ml.inference.assignment.AssignmentState; +import org.elasticsearch.xpack.core.ml.inference.assignment.Priority; +import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignment; +import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignmentMetadata; import org.elasticsearch.xpack.core.ml.job.config.JobState; import org.elasticsearch.xpack.ml.MachineLearning; import org.elasticsearch.xpack.ml.job.NodeLoad; @@ -262,6 +268,69 @@ public void testScale_GivenUndeterminedMemory_ShouldReturnNullCapacity() { assertThat(result.requiredCapacity(), is(nullValue())); } + public void testScale_GivenModelWithZeroAllocations() { + MlAutoscalingDeciderService service = buildService(); + service.onMaster(); + + ClusterState clusterState = new ClusterState.Builder(new ClusterName("cluster")).metadata( + Metadata.builder() + .putCustom( + TrainedModelAssignmentMetadata.NAME, + new TrainedModelAssignmentMetadata( + Map.of( + "model-with-zero-allocations", + TrainedModelAssignment.Builder.empty( + new StartTrainedModelDeploymentAction.TaskParams( + "model-with-zero-allocations", + "model-with-zero-allocations-deployment", + 400, + 0, + 2, + 100, + null, + Priority.NORMAL, + 0L, + 0L + ), + new AdaptiveAllocationsSettings(true, 0, 4) + ).setAssignmentState(AssignmentState.STARTED).build() + ) + ) + ) + .build() + ).nodes(DiscoveryNodes.builder().add(buildNode("ml-node", ByteSizeValue.ofGb(4), 8)).build()).build(); + + AutoscalingDeciderResult result = service.scale( + Settings.EMPTY, + new DeciderContext( + clusterState, + new AutoscalingCapacity( + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null), + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null) + ) + ) + ); + // First call doesn't downscale as delay has not been satisfied + assertThat(result.reason().summary(), containsString("down scale delay has not been satisfied")); + + // Let's move time forward 1 hour + timeSupplier.setOffset(TimeValue.timeValueHours(1)); + + result = service.scale( + Settings.EMPTY, + new DeciderContext( + clusterState, + new AutoscalingCapacity( + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null), + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null) + ) + ) + ); + assertThat(result.reason().summary(), equalTo("Requesting scale down as tier and/or node size could be smaller")); + assertThat(result.requiredCapacity().total().memory().getBytes(), equalTo(0L)); + assertThat(result.requiredCapacity().node().memory().getBytes(), equalTo(0L)); + } + private DiscoveryNode buildNode(String id, ByteSizeValue machineMemory, int allocatedProcessors) { return DiscoveryNodeUtils.create( id,