|
29 | 29 | import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingDeciderContext; |
30 | 30 | import org.elasticsearch.xpack.autoscaling.capacity.AutoscalingDeciderResult; |
31 | 31 | import org.elasticsearch.xpack.core.ml.MachineLearningField; |
| 32 | +import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction; |
| 33 | +import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings; |
| 34 | +import org.elasticsearch.xpack.core.ml.inference.assignment.AssignmentState; |
| 35 | +import org.elasticsearch.xpack.core.ml.inference.assignment.Priority; |
| 36 | +import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignment; |
| 37 | +import org.elasticsearch.xpack.core.ml.inference.assignment.TrainedModelAssignmentMetadata; |
32 | 38 | import org.elasticsearch.xpack.core.ml.job.config.JobState; |
33 | 39 | import org.elasticsearch.xpack.ml.MachineLearning; |
34 | 40 | import org.elasticsearch.xpack.ml.job.NodeLoad; |
@@ -262,6 +268,69 @@ public void testScale_GivenUndeterminedMemory_ShouldReturnNullCapacity() { |
262 | 268 | assertThat(result.requiredCapacity(), is(nullValue())); |
263 | 269 | } |
264 | 270 |
|
| 271 | + public void testScale_GivenModelWithZeroAllocations() { |
| 272 | + MlAutoscalingDeciderService service = buildService(); |
| 273 | + service.onMaster(); |
| 274 | + |
| 275 | + ClusterState clusterState = new ClusterState.Builder(new ClusterName("cluster")).metadata( |
| 276 | + Metadata.builder() |
| 277 | + .putCustom( |
| 278 | + TrainedModelAssignmentMetadata.NAME, |
| 279 | + new TrainedModelAssignmentMetadata( |
| 280 | + Map.of( |
| 281 | + "model-with-zero-allocations", |
| 282 | + TrainedModelAssignment.Builder.empty( |
| 283 | + new StartTrainedModelDeploymentAction.TaskParams( |
| 284 | + "model-with-zero-allocations", |
| 285 | + "model-with-zero-allocations-deployment", |
| 286 | + 400, |
| 287 | + 0, |
| 288 | + 2, |
| 289 | + 100, |
| 290 | + null, |
| 291 | + Priority.NORMAL, |
| 292 | + 0L, |
| 293 | + 0L |
| 294 | + ), |
| 295 | + new AdaptiveAllocationsSettings(true, 0, 4) |
| 296 | + ).setAssignmentState(AssignmentState.STARTED).build() |
| 297 | + ) |
| 298 | + ) |
| 299 | + ) |
| 300 | + .build() |
| 301 | + ).nodes(DiscoveryNodes.builder().add(buildNode("ml-node", ByteSizeValue.ofGb(4), 8)).build()).build(); |
| 302 | + |
| 303 | + AutoscalingDeciderResult result = service.scale( |
| 304 | + Settings.EMPTY, |
| 305 | + new DeciderContext( |
| 306 | + clusterState, |
| 307 | + new AutoscalingCapacity( |
| 308 | + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null), |
| 309 | + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null) |
| 310 | + ) |
| 311 | + ) |
| 312 | + ); |
| 313 | + // First call doesn't downscale as delay has not been satisfied |
| 314 | + assertThat(result.reason().summary(), containsString("down scale delay has not been satisfied")); |
| 315 | + |
| 316 | + // Let's move time forward 1 hour |
| 317 | + timeSupplier.setOffset(TimeValue.timeValueHours(1)); |
| 318 | + |
| 319 | + result = service.scale( |
| 320 | + Settings.EMPTY, |
| 321 | + new DeciderContext( |
| 322 | + clusterState, |
| 323 | + new AutoscalingCapacity( |
| 324 | + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null), |
| 325 | + new AutoscalingCapacity.AutoscalingResources(null, ByteSizeValue.ofGb(4), null) |
| 326 | + ) |
| 327 | + ) |
| 328 | + ); |
| 329 | + assertThat(result.reason().summary(), equalTo("Requesting scale down as tier and/or node size could be smaller")); |
| 330 | + assertThat(result.requiredCapacity().total().memory().getBytes(), equalTo(0L)); |
| 331 | + assertThat(result.requiredCapacity().node().memory().getBytes(), equalTo(0L)); |
| 332 | + } |
| 333 | + |
265 | 334 | private DiscoveryNode buildNode(String id, ByteSizeValue machineMemory, int allocatedProcessors) { |
266 | 335 | return DiscoveryNodeUtils.create( |
267 | 336 | id, |
|
0 commit comments