From 817c1c08f0b361109156dee0c7eae30fc88dda66 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 1 Sep 2025 10:03:43 +0200 Subject: [PATCH 1/8] Test before model assignment. --- .../TrainedModelAssignmentRebalancer.java | 24 +++++-- .../assignment/planning/AssignmentPlan.java | 4 +- .../planning/ZoneAwareAssignmentPlanner.java | 4 +- ...TrainedModelAssignmentRebalancerTests.java | 68 +++++++++++++++++++ 4 files changed, 93 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java index f523b4b086f35..91efb23e039c4 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java @@ -138,7 +138,9 @@ static void copyAssignments(AssignmentPlan source, AssignmentPlan.Builder dest, Map sourceNodeAssignments = source.assignments(deployment).orElse(Map.of()); for (Map.Entry sourceAssignment : sourceNodeAssignments.entrySet()) { AssignmentPlan.Node node = originalNodeById.get(sourceAssignment.getKey().id()); - dest.assignModelToNode(deployment, node, sourceAssignment.getValue()); + if(dest.canAssign(deployment, node, sourceAssignment.getValue())) { + dest.assignModelToNode(deployment, node, sourceAssignment.getValue()); + } } } } @@ -318,7 +320,12 @@ private Map, List> createNodesByZoneMap() { } private static long getNodeFreeMemoryExcludingPerNodeOverheadAndNativeInference(NodeLoad load) { - return load.getFreeMemoryExcludingPerNodeOverhead() - load.getAssignedNativeInferenceMemory(); + // load.getFreeMemoryExcludingPerNodeOverhead() = maxMemory - assignedJobMemoryExcludingPerNodeOverhead - 30MB native executable code overhead + // assignedJobMemoryExcludingPerNodeOverhead = assignedAnomalyDetectorMemory + assignedDataFrameAnalyticsMemory + assignedNativeInferenceMemory + // load.getAssignedNativeInferenceMemory() = assignedNativeInferenceMemory + // TODO: (valeriy) assignedNativeInferenceMemory is double counted in the current calculation. + return load.getFreeMemoryExcludingPerNodeOverhead()/* - load.getAssignedNativeInferenceMemory()*/; + } private TrainedModelAssignmentMetadata.Builder buildAssignmentsFromPlan(AssignmentPlan assignmentPlan) { @@ -405,8 +412,17 @@ private Optional explainAssignment( if (Strings.isNullOrEmpty(load.getError()) == false) { return Optional.of(load.getError()); } - - if (deployment.memoryBytes() > assignmentPlan.getRemainingNodeMemory(node.getId())) { + // TODO (valeriy): this test should be actually true, but it is false, because we use the "naked" deployment footprint + // Get existing allocations for this node to avoid double counting + int existingAllocationsOnNode = assignmentPlan.assignments(deployment) + .flatMap(assignments -> assignments.entrySet().stream() + .filter(entry -> entry.getKey().id().equals(node.getId())) + .findFirst() + .map(Map.Entry::getValue)) + .orElse(0); + int notYetAssignedAllocations = deployment.allocations() - assignmentPlan.totalAllocations(deployment); +// if (deployment.estimateMemoryUsageBytes(deployment.allocations() - existingAllocationsOnNode) > assignmentPlan.getRemainingNodeMemory(node.getId())) { + if (deployment.estimateAdditionalMemoryUsageBytes(existingAllocationsOnNode, existingAllocationsOnNode + notYetAssignedAllocations) > assignmentPlan.getRemainingNodeMemory(node.getId())) { // If any ML processes are running on a node we require some space to load the shared libraries. // So if none are currently running then this per-node overhead must be added to the requirement. // From node load we know if we had any jobs or models assigned before the rebalance. diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java index a90a8cb9d5262..84f5e9d6a0646 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java @@ -107,7 +107,7 @@ public long estimateMemoryUsageBytes(int allocations) { ); } - long estimateAdditionalMemoryUsageBytes(int allocationsOld, int allocationsNew) { + public long estimateAdditionalMemoryUsageBytes(int allocationsOld, int allocationsNew) { return StartTrainedModelDeploymentAction.estimateMemoryUsageBytes( modelId, memoryBytes, @@ -417,7 +417,7 @@ int getRemainingAllocations(Deployment m) { return remainingModelAllocations.get(m); } - boolean canAssign(Deployment deployment, Node node, int allocations) { + public boolean canAssign(Deployment deployment, Node node, int allocations) { long requiredMemory = getDeploymentMemoryRequirement(deployment, node, allocations); return canAssign(deployment, node, allocations, requiredMemory); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java index 64cd40fdc537d..8b26dfe8e747a 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java @@ -211,7 +211,9 @@ private AssignmentPlan swapOriginalDeploymentsInPlan( Map nodeAssignments = plan.assignments(planDeployment).orElse(Map.of()); for (Map.Entry assignment : nodeAssignments.entrySet()) { Node originalNode = originalNodeById.get(assignment.getKey().id()); - finalPlanBuilder.assignModelToNode(originalDeployment, originalNode, assignment.getValue()); + if (finalPlanBuilder.canAssign(originalDeployment, originalNode, assignment.getValue())) { + finalPlanBuilder.assignModelToNode(originalDeployment, originalNode, assignment.getValue()); + } } } return finalPlanBuilder.build(); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java index b873493100798..2b79ac51764c3 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java @@ -1262,4 +1262,72 @@ private static DiscoveryNode buildNode(String name, long nativeMemory, int alloc ) .build(); } + + public void testRebalance_GivenDeploymentWithMemoryRequirements_ConsidersNativeExecutableOverhead() { + // Create a node with just enough memory to fit the model plus native executable overhead + long modelMemory = ByteSizeValue.ofMb(200).getBytes(); + long memoryOverhead = ByteSizeValue.ofMb(240).getBytes(); + long JVMOverhead = ByteSizeValue.ofMb(50).getBytes(); + long nodeMemory = memoryOverhead + modelMemory*2 + JVMOverhead; + + DiscoveryNode node = buildNode("node-1", nodeMemory, 4); + + String deploymentId = "model-with-overhead-test"; + StartTrainedModelDeploymentAction.TaskParams taskParams = normalPriorityParams( + deploymentId, + deploymentId, + modelMemory, + 1, + 1 + ); + + TrainedModelAssignmentMetadata currentMetadata = TrainedModelAssignmentMetadata.Builder.empty().build(); + Map nodeLoads = new HashMap<>(); + + // This node has no jobs or models yet, so the overhead should be accounted for + nodeLoads.put(node, NodeLoad.builder("node-1") + .setMaxMemory(nodeMemory) + .build()); + + TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer( + currentMetadata, + nodeLoads, + Map.of(List.of(), List.of(node)), + Optional.of(new CreateTrainedModelAssignmentAction.Request(taskParams, null)), + 1 + ).rebalance().build(); + + // Verify the deployment was successful + TrainedModelAssignment assignment = result.getDeploymentAssignment(deploymentId); + assertThat(assignment, is(notNullValue())); + assertThat(assignment.getAssignmentState(), equalTo(AssignmentState.STARTING)); + assertThat(assignment.getNodeRoutingTable(), is(aMapWithSize(1))); + assertThat(assignment.getNodeRoutingTable(), hasKey("node-1")); + assertThat(assignment.getReason().isPresent(), is(false)); + + // Now try with a node that has slightly less memory - this should fail + long insufficientNodeMemory = nodeMemory - ByteSizeValue.ofMb(21).getBytes(); + DiscoveryNode insufficientNode = buildNode("node-2", insufficientNodeMemory, 4); + + Map insufficientNodeLoads = Map.of( + insufficientNode, NodeLoad.builder("node-2") + .setMaxMemory(insufficientNodeMemory) + .build() + ); + + TrainedModelAssignmentMetadata insufficientResult = new TrainedModelAssignmentRebalancer( + TrainedModelAssignmentMetadata.Builder.empty().build(), + insufficientNodeLoads, + Map.of(List.of(), List.of(insufficientNode)), + Optional.of(new CreateTrainedModelAssignmentAction.Request(taskParams, null)), + 1) + .rebalance().build(); + + TrainedModelAssignment insufficientAssignment = insufficientResult.getDeploymentAssignment(deploymentId); + assertThat(insufficientAssignment, is(notNullValue())); + assertThat(insufficientAssignment.getAssignmentState(), equalTo(AssignmentState.STARTING)); + assertThat(insufficientAssignment.getNodeRoutingTable(), is(anEmptyMap())); + assertThat(insufficientAssignment.getReason().isPresent(), is(true)); + assertThat(insufficientAssignment.getReason().get(), containsString("insufficient available memory")); + } } From c8411cb745f9ba2fb92120877e7708f900742602 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 1 Sep 2025 10:05:03 +0200 Subject: [PATCH 2/8] formatting --- .../TrainedModelAssignmentRebalancer.java | 27 +++++++++++-------- ...TrainedModelAssignmentRebalancerTests.java | 23 +++++----------- 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java index 91efb23e039c4..18f2e32244fd7 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java @@ -138,7 +138,7 @@ static void copyAssignments(AssignmentPlan source, AssignmentPlan.Builder dest, Map sourceNodeAssignments = source.assignments(deployment).orElse(Map.of()); for (Map.Entry sourceAssignment : sourceNodeAssignments.entrySet()) { AssignmentPlan.Node node = originalNodeById.get(sourceAssignment.getKey().id()); - if(dest.canAssign(deployment, node, sourceAssignment.getValue())) { + if (dest.canAssign(deployment, node, sourceAssignment.getValue())) { dest.assignModelToNode(deployment, node, sourceAssignment.getValue()); } } @@ -320,8 +320,10 @@ private Map, List> createNodesByZoneMap() { } private static long getNodeFreeMemoryExcludingPerNodeOverheadAndNativeInference(NodeLoad load) { - // load.getFreeMemoryExcludingPerNodeOverhead() = maxMemory - assignedJobMemoryExcludingPerNodeOverhead - 30MB native executable code overhead - // assignedJobMemoryExcludingPerNodeOverhead = assignedAnomalyDetectorMemory + assignedDataFrameAnalyticsMemory + assignedNativeInferenceMemory + // load.getFreeMemoryExcludingPerNodeOverhead() = maxMemory - assignedJobMemoryExcludingPerNodeOverhead - 30MB native executable + // code overhead + // assignedJobMemoryExcludingPerNodeOverhead = assignedAnomalyDetectorMemory + assignedDataFrameAnalyticsMemory + + // assignedNativeInferenceMemory // load.getAssignedNativeInferenceMemory() = assignedNativeInferenceMemory // TODO: (valeriy) assignedNativeInferenceMemory is double counted in the current calculation. return load.getFreeMemoryExcludingPerNodeOverhead()/* - load.getAssignedNativeInferenceMemory()*/; @@ -412,17 +414,20 @@ private Optional explainAssignment( if (Strings.isNullOrEmpty(load.getError()) == false) { return Optional.of(load.getError()); } - // TODO (valeriy): this test should be actually true, but it is false, because we use the "naked" deployment footprint - // Get existing allocations for this node to avoid double counting int existingAllocationsOnNode = assignmentPlan.assignments(deployment) - .flatMap(assignments -> assignments.entrySet().stream() - .filter(entry -> entry.getKey().id().equals(node.getId())) - .findFirst() - .map(Map.Entry::getValue)) + .flatMap( + assignments -> assignments.entrySet() + .stream() + .filter(entry -> entry.getKey().id().equals(node.getId())) + .findFirst() + .map(Map.Entry::getValue) + ) .orElse(0); int notYetAssignedAllocations = deployment.allocations() - assignmentPlan.totalAllocations(deployment); -// if (deployment.estimateMemoryUsageBytes(deployment.allocations() - existingAllocationsOnNode) > assignmentPlan.getRemainingNodeMemory(node.getId())) { - if (deployment.estimateAdditionalMemoryUsageBytes(existingAllocationsOnNode, existingAllocationsOnNode + notYetAssignedAllocations) > assignmentPlan.getRemainingNodeMemory(node.getId())) { + if (deployment.estimateAdditionalMemoryUsageBytes( + existingAllocationsOnNode, + existingAllocationsOnNode + notYetAssignedAllocations + ) > assignmentPlan.getRemainingNodeMemory(node.getId())) { // If any ML processes are running on a node we require some space to load the shared libraries. // So if none are currently running then this per-node overhead must be added to the requirement. // From node load we know if we had any jobs or models assigned before the rebalance. diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java index 2b79ac51764c3..40d30b71ffbd6 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java @@ -1268,26 +1268,18 @@ public void testRebalance_GivenDeploymentWithMemoryRequirements_ConsidersNativeE long modelMemory = ByteSizeValue.ofMb(200).getBytes(); long memoryOverhead = ByteSizeValue.ofMb(240).getBytes(); long JVMOverhead = ByteSizeValue.ofMb(50).getBytes(); - long nodeMemory = memoryOverhead + modelMemory*2 + JVMOverhead; + long nodeMemory = memoryOverhead + modelMemory * 2 + JVMOverhead; DiscoveryNode node = buildNode("node-1", nodeMemory, 4); String deploymentId = "model-with-overhead-test"; - StartTrainedModelDeploymentAction.TaskParams taskParams = normalPriorityParams( - deploymentId, - deploymentId, - modelMemory, - 1, - 1 - ); + StartTrainedModelDeploymentAction.TaskParams taskParams = normalPriorityParams(deploymentId, deploymentId, modelMemory, 1, 1); TrainedModelAssignmentMetadata currentMetadata = TrainedModelAssignmentMetadata.Builder.empty().build(); Map nodeLoads = new HashMap<>(); // This node has no jobs or models yet, so the overhead should be accounted for - nodeLoads.put(node, NodeLoad.builder("node-1") - .setMaxMemory(nodeMemory) - .build()); + nodeLoads.put(node, NodeLoad.builder("node-1").setMaxMemory(nodeMemory).build()); TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer( currentMetadata, @@ -1310,9 +1302,8 @@ public void testRebalance_GivenDeploymentWithMemoryRequirements_ConsidersNativeE DiscoveryNode insufficientNode = buildNode("node-2", insufficientNodeMemory, 4); Map insufficientNodeLoads = Map.of( - insufficientNode, NodeLoad.builder("node-2") - .setMaxMemory(insufficientNodeMemory) - .build() + insufficientNode, + NodeLoad.builder("node-2").setMaxMemory(insufficientNodeMemory).build() ); TrainedModelAssignmentMetadata insufficientResult = new TrainedModelAssignmentRebalancer( @@ -1320,8 +1311,8 @@ public void testRebalance_GivenDeploymentWithMemoryRequirements_ConsidersNativeE insufficientNodeLoads, Map.of(List.of(), List.of(insufficientNode)), Optional.of(new CreateTrainedModelAssignmentAction.Request(taskParams, null)), - 1) - .rebalance().build(); + 1 + ).rebalance().build(); TrainedModelAssignment insufficientAssignment = insufficientResult.getDeploymentAssignment(deploymentId); assertThat(insufficientAssignment, is(notNullValue())); From 454a3fbd88c0837bfca3a293cc6aabf7141c3d50 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 1 Sep 2025 10:16:14 +0200 Subject: [PATCH 3/8] Update docs/changelog/133916.yaml --- docs/changelog/133916.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/133916.yaml diff --git a/docs/changelog/133916.yaml b/docs/changelog/133916.yaml new file mode 100644 index 0000000000000..d35c0d044c7fa --- /dev/null +++ b/docs/changelog/133916.yaml @@ -0,0 +1,5 @@ +pr: 133916 +summary: Fix model assignment error handling and assignment explanation generation +area: Machine Learning +type: bug +issues: [] From 08be4c4f39d2dc6cd6ca77174c52ef8420b09b84 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 1 Sep 2025 10:21:34 +0200 Subject: [PATCH 4/8] remove double-counting of inference memory --- .../TrainedModelAssignmentRebalancer.java | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java index 18f2e32244fd7..e209eb38d3b77 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java @@ -300,9 +300,7 @@ private Map, List> createNodesByZoneMap() { nodes.add( new AssignmentPlan.Node( discoveryNode.getId(), - // We subtract native inference memory as the planner expects available memory for - // native inference including current assignments. - getNodeFreeMemoryExcludingPerNodeOverheadAndNativeInference(load), + load.getFreeMemoryExcludingPerNodeOverhead(), MlProcessors.get(discoveryNode, allocatedProcessorsScale).roundUp() ) ); @@ -319,17 +317,6 @@ private Map, List> createNodesByZoneMap() { })); } - private static long getNodeFreeMemoryExcludingPerNodeOverheadAndNativeInference(NodeLoad load) { - // load.getFreeMemoryExcludingPerNodeOverhead() = maxMemory - assignedJobMemoryExcludingPerNodeOverhead - 30MB native executable - // code overhead - // assignedJobMemoryExcludingPerNodeOverhead = assignedAnomalyDetectorMemory + assignedDataFrameAnalyticsMemory + - // assignedNativeInferenceMemory - // load.getAssignedNativeInferenceMemory() = assignedNativeInferenceMemory - // TODO: (valeriy) assignedNativeInferenceMemory is double counted in the current calculation. - return load.getFreeMemoryExcludingPerNodeOverhead()/* - load.getAssignedNativeInferenceMemory()*/; - - } - private TrainedModelAssignmentMetadata.Builder buildAssignmentsFromPlan(AssignmentPlan assignmentPlan) { TrainedModelAssignmentMetadata.Builder builder = TrainedModelAssignmentMetadata.Builder.empty(); for (AssignmentPlan.Deployment deployment : assignmentPlan.deployments()) { From 432e66e46682e57cb081bb5a33b9c97c4394d23b Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Wed, 3 Sep 2025 15:27:57 +0200 Subject: [PATCH 5/8] remove duplicate test --- ...TrainedModelAssignmentRebalancerTests.java | 61 +------------------ 1 file changed, 1 insertion(+), 60 deletions(-) diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java index 727ab9e356c71..cfef7939ba236 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java @@ -1198,7 +1198,7 @@ public void testCopyAssignments() { assertThat(deployment2Assignments.get().get(node2), equalTo(1)); } - public void testRebalance_GivenDeploymentWithMemoryRequirements_ConsidersNativeExecutableOverhead() { + public void testRebalance_GivenDeploymentWithMemoryRequirements_ExplainMissingAllocations() { // Create a node with just enough memory to fit the model plus native executable overhead long modelMemory = ByteSizeValue.ofMb(200).getBytes(); long memoryOverhead = ByteSizeValue.ofMb(240).getBytes(); @@ -1320,63 +1320,4 @@ private static DiscoveryNode buildNode(String name, long nativeMemory, int alloc ) .build(); } - - public void testRebalance_GivenDeploymentWithMemoryRequirements_ConsidersNativeExecutableOverhead() { - // Create a node with just enough memory to fit the model plus native executable overhead - long modelMemory = ByteSizeValue.ofMb(200).getBytes(); - long memoryOverhead = ByteSizeValue.ofMb(240).getBytes(); - long JVMOverhead = ByteSizeValue.ofMb(50).getBytes(); - long nodeMemory = memoryOverhead + modelMemory * 2 + JVMOverhead; - - DiscoveryNode node = buildNode("node-1", nodeMemory, 4); - - String deploymentId = "model-with-overhead-test"; - StartTrainedModelDeploymentAction.TaskParams taskParams = normalPriorityParams(deploymentId, deploymentId, modelMemory, 1, 1); - - TrainedModelAssignmentMetadata currentMetadata = TrainedModelAssignmentMetadata.Builder.empty().build(); - Map nodeLoads = new HashMap<>(); - - // This node has no jobs or models yet, so the overhead should be accounted for - nodeLoads.put(node, NodeLoad.builder("node-1").setMaxMemory(nodeMemory).build()); - - TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer( - currentMetadata, - nodeLoads, - Map.of(List.of(), List.of(node)), - Optional.of(new CreateTrainedModelAssignmentAction.Request(taskParams, null)), - 1 - ).rebalance().build(); - - // Verify the deployment was successful - TrainedModelAssignment assignment = result.getDeploymentAssignment(deploymentId); - assertThat(assignment, is(notNullValue())); - assertThat(assignment.getAssignmentState(), equalTo(AssignmentState.STARTING)); - assertThat(assignment.getNodeRoutingTable(), is(aMapWithSize(1))); - assertThat(assignment.getNodeRoutingTable(), hasKey("node-1")); - assertThat(assignment.getReason().isPresent(), is(false)); - - // Now try with a node that has slightly less memory - this should fail - long insufficientNodeMemory = nodeMemory - ByteSizeValue.ofMb(21).getBytes(); - DiscoveryNode insufficientNode = buildNode("node-2", insufficientNodeMemory, 4); - - Map insufficientNodeLoads = Map.of( - insufficientNode, - NodeLoad.builder("node-2").setMaxMemory(insufficientNodeMemory).build() - ); - - TrainedModelAssignmentMetadata insufficientResult = new TrainedModelAssignmentRebalancer( - TrainedModelAssignmentMetadata.Builder.empty().build(), - insufficientNodeLoads, - Map.of(List.of(), List.of(insufficientNode)), - Optional.of(new CreateTrainedModelAssignmentAction.Request(taskParams, null)), - 1 - ).rebalance().build(); - - TrainedModelAssignment insufficientAssignment = insufficientResult.getDeploymentAssignment(deploymentId); - assertThat(insufficientAssignment, is(notNullValue())); - assertThat(insufficientAssignment.getAssignmentState(), equalTo(AssignmentState.STARTING)); - assertThat(insufficientAssignment.getNodeRoutingTable(), is(anEmptyMap())); - assertThat(insufficientAssignment.getReason().isPresent(), is(true)); - assertThat(insufficientAssignment.getReason().get(), containsString("insufficient available memory")); - } } From c72ba428a6b4d2734117d1b6dc83cffdde6b3a54 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 8 Sep 2025 10:49:28 +0200 Subject: [PATCH 6/8] move canAssign into assignModelToNode --- .../TrainedModelAssignmentRebalancer.java | 4 +--- .../planning/AbstractPreserveAllocations.java | 3 +-- .../assignment/planning/AssignmentPlan.java | 4 ++-- .../planning/RandomizedAssignmentRounding.java | 6 +----- .../planning/ZoneAwareAssignmentPlanner.java | 14 +++++++++++--- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java index c904b342d6623..b44a1f17facef 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java @@ -138,9 +138,7 @@ static void copyAssignments(AssignmentPlan source, AssignmentPlan.Builder dest, Map sourceNodeAssignments = source.assignments(deployment).orElse(Map.of()); for (Map.Entry sourceAssignment : sourceNodeAssignments.entrySet()) { AssignmentPlan.Node node = originalNodeById.get(sourceAssignment.getKey().id()); - if (dest.canAssign(deployment, node, sourceAssignment.getValue())) { - dest.assignModelToNode(deployment, node, sourceAssignment.getValue()); - } + dest.assignModelToNode(deployment, node, sourceAssignment.getValue()); } } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AbstractPreserveAllocations.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AbstractPreserveAllocations.java index 8a0bbe2ecdd5e..a451f301668bd 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AbstractPreserveAllocations.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AbstractPreserveAllocations.java @@ -103,8 +103,7 @@ AssignmentPlan mergePreservedAllocations(AssignmentPlan assignmentPlan) { 0 ); - long requiredMemory = mergedPlanBuilder.getDeploymentMemoryRequirement(deploymentNewAllocations, n, newAllocations); - if (newAllocations > 0 && mergedPlanBuilder.canAssign(deploymentNewAllocations, n, newAllocations, requiredMemory)) { + if (newAllocations > 0) { mergedPlanBuilder.assignModelToNode(deploymentNewAllocations, n, newAllocations); } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java index fd9f64f21af62..9d743f8a202b4 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java @@ -417,7 +417,7 @@ int getRemainingAllocations(Deployment m) { return remainingModelAllocations.get(m); } - public boolean canAssign(Deployment deployment, Node node, int allocations) { + boolean canAssign(Deployment deployment, Node node, int allocations) { long requiredMemory = getDeploymentMemoryRequirement(deployment, node, allocations); return canAssign(deployment, node, allocations, requiredMemory); } @@ -441,7 +441,7 @@ public Builder assignModelToNode(Deployment deployment, Node node, int allocatio } public Builder assignModelToNode(Deployment deployment, Node node, int allocations, long requiredMemory) { - if (allocations <= 0) { + if (allocations <= 0 || canAssign(deployment, node, allocations, requiredMemory) == false) { return this; } if (requiredMemory > remainingNodeMemory.get(node)) { diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/RandomizedAssignmentRounding.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/RandomizedAssignmentRounding.java index 8bdc99998a0c2..e28af9a0b6108 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/RandomizedAssignmentRounding.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/RandomizedAssignmentRounding.java @@ -310,11 +310,7 @@ private void unassignOversizedModels(Node n) { private AssignmentPlan toPlan() { AssignmentPlan.Builder builder = AssignmentPlan.builder(nodes, deployments); for (Map.Entry, Integer> assignment : tryAssigningRemainingCores().entrySet()) { - // TODO (#101612) The model should be assigned to the node only when it is possible. This means, that canAssign should be - // integrated into the assignModelToNode. - if (builder.canAssign(assignment.getKey().v1(), assignment.getKey().v2(), assignment.getValue())) { - builder.assignModelToNode(assignment.getKey().v1(), assignment.getKey().v2(), assignment.getValue()); - } + builder.assignModelToNode(assignment.getKey().v1(), assignment.getKey().v2(), assignment.getValue()); } return builder.build(); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java index 8b26dfe8e747a..1ccf8840c55be 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java @@ -91,6 +91,8 @@ private AssignmentPlan computePlan(boolean tryAssigningPreviouslyAssignedModels) remainingZones, tryAssigningPreviouslyAssignedModels ); + + // Update remaining allocations to account for allocations satisfied in this zone plan.deployments() .forEach( d -> deploymentIdToRemainingAllocations.computeIfPresent( @@ -211,14 +213,20 @@ private AssignmentPlan swapOriginalDeploymentsInPlan( Map nodeAssignments = plan.assignments(planDeployment).orElse(Map.of()); for (Map.Entry assignment : nodeAssignments.entrySet()) { Node originalNode = originalNodeById.get(assignment.getKey().id()); - if (finalPlanBuilder.canAssign(originalDeployment, originalNode, assignment.getValue())) { - finalPlanBuilder.assignModelToNode(originalDeployment, originalNode, assignment.getValue()); - } + finalPlanBuilder.assignModelToNode(originalDeployment, originalNode, assignment.getValue()); } } return finalPlanBuilder.build(); } + /** + * The mergeAllocationsByNodeIdByDeploymentId method is responsible for consolidating allocation data + * from multiple AssignmentPlan objects into a single structure. This structure maps deployment IDs + * to their respective node allocations, allowing the system to track how resources are distributed + * across nodes for each deployment. + * @param plans List of AssignmentPlan objects to merge allocations from + * @return + */ private Map> mergeAllocationsByNodeIdByDeploymentId(List plans) { Map> allocationsByNodeIdByDeploymentId = new HashMap<>(); deployments.forEach(d -> allocationsByNodeIdByDeploymentId.put(d.deploymentId(), new HashMap<>())); From b1c00f2dffde42f280adbd7a0bbe65d241f9b93a Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 8 Sep 2025 15:31:59 +0200 Subject: [PATCH 7/8] fix unit tests --- .../assignment/planning/AssignmentPlan.java | 29 +++++++++++++------ .../planning/AssignmentPlanTests.java | 10 +++---- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java index 9d743f8a202b4..082a2606d27bd 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java @@ -444,6 +444,25 @@ public Builder assignModelToNode(Deployment deployment, Node node, int allocatio if (allocations <= 0 || canAssign(deployment, node, allocations, requiredMemory) == false) { return this; } + + validateAssignment(deployment, node, allocations); + + assignments.get(deployment).compute(node, (n, assignedAllocations) -> assignedAllocations + allocations); + accountMemory(deployment, node, requiredMemory); + + if (deployment.priority == Priority.NORMAL) { + remainingNodeCores.compute(node, (n, remCores) -> remCores - allocations * deployment.threadsPerAllocation()); + } + remainingModelAllocations.compute(deployment, (m, remModelThreads) -> remModelThreads - allocations); + return this; + } + + void validateAssignment(Deployment deployment, Node node, int allocations) { + long requiredMemory = getDeploymentMemoryRequirement(deployment, node, allocations); + validateAssignment(deployment, node, allocations, requiredMemory); + } + + private void validateAssignment(Deployment deployment, Node node, int allocations, long requiredMemory) { if (requiredMemory > remainingNodeMemory.get(node)) { throw new IllegalArgumentException( "not enough memory on node [" @@ -455,6 +474,7 @@ public Builder assignModelToNode(Deployment deployment, Node node, int allocatio + "]" ); } + if (deployment.priority == Priority.NORMAL && allocations * deployment.threadsPerAllocation() > remainingNodeCores.get(node)) { throw new IllegalArgumentException( "not enough cores on node [" @@ -468,15 +488,6 @@ public Builder assignModelToNode(Deployment deployment, Node node, int allocatio + "]" ); } - - assignments.get(deployment).compute(node, (n, assignedAllocations) -> assignedAllocations + allocations); - accountMemory(deployment, node, requiredMemory); - - if (deployment.priority == Priority.NORMAL) { - remainingNodeCores.compute(node, (n, remCores) -> remCores - allocations * deployment.threadsPerAllocation()); - } - remainingModelAllocations.compute(deployment, (m, remModelThreads) -> remModelThreads - allocations); - return this; } private int getAssignedAllocations(Deployment deployment, Node node) { diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java index c7f166a19bb69..eb4abb4483c26 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java @@ -238,7 +238,7 @@ public void testAssignModelToNode_GivenPreviouslyUnassignedModelDoesNotFit() { Deployment m = new AssignmentPlan.Deployment("m_1", "m_1", ByteSizeValue.ofMb(50).getBytes(), 2, 2, Map.of(), 0, null, 0, 0); AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); - Exception e = expectThrows(IllegalArgumentException.class, () -> builder.assignModelToNode(m, n, 1)); + Exception e = expectThrows(IllegalArgumentException.class, () -> builder.validateAssignment(m, n, 1)); assertThat(e.getMessage(), equalTo("not enough memory on node [n_1] to assign [1] allocations to deployment [m_1]")); } @@ -261,7 +261,7 @@ public void testAssignModelToNode_GivenPreviouslyAssignedModelDoesNotFit() { AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); - Exception e = expectThrows(IllegalArgumentException.class, () -> builder.assignModelToNode(m, n, 2)); + Exception e = expectThrows(IllegalArgumentException.class, () -> builder.validateAssignment(m, n, 2)); assertThat(e.getMessage(), containsString("not enough memory on node")); } { // new memory format @@ -281,7 +281,7 @@ public void testAssignModelToNode_GivenPreviouslyAssignedModelDoesNotFit() { AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); - Exception e = expectThrows(IllegalArgumentException.class, () -> builder.assignModelToNode(m, n, 2)); + Exception e = expectThrows(IllegalArgumentException.class, () -> builder.validateAssignment(m, n, 2)); assertThat(e.getMessage(), containsString("not enough memory on node")); } } @@ -291,7 +291,7 @@ public void testAssignModelToNode_GivenNotEnoughCores_AndSingleThreadPerAllocati Deployment m = new AssignmentPlan.Deployment("m_1", "m_1", ByteSizeValue.ofMb(100).getBytes(), 5, 1, Map.of(), 0, null, 0, 0); AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); - Exception e = expectThrows(IllegalArgumentException.class, () -> builder.assignModelToNode(m, n, 5)); + Exception e = expectThrows(IllegalArgumentException.class, () -> builder.validateAssignment(m, n, 5)); assertThat( e.getMessage(), @@ -315,7 +315,7 @@ public void testAssignModelToNode_GivenNotEnoughCores_AndMultipleThreadsPerAlloc ); AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); - Exception e = expectThrows(IllegalArgumentException.class, () -> builder.assignModelToNode(m, n, 3)); + Exception e = expectThrows(IllegalArgumentException.class, () -> builder.validateAssignment(m, n, 3)); assertThat( e.getMessage(), From 8d3a59fed9817b626d2a2bdb8fd8feda07512fe0 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Tue, 9 Sep 2025 11:16:36 +0200 Subject: [PATCH 8/8] remove redundant validateAssignment function --- .../assignment/planning/AssignmentPlan.java | 35 --------- .../planning/AssignmentPlanTests.java | 72 ++----------------- 2 files changed, 4 insertions(+), 103 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java index 082a2606d27bd..e629fa1a9e8e4 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java @@ -445,8 +445,6 @@ public Builder assignModelToNode(Deployment deployment, Node node, int allocatio return this; } - validateAssignment(deployment, node, allocations); - assignments.get(deployment).compute(node, (n, assignedAllocations) -> assignedAllocations + allocations); accountMemory(deployment, node, requiredMemory); @@ -457,39 +455,6 @@ public Builder assignModelToNode(Deployment deployment, Node node, int allocatio return this; } - void validateAssignment(Deployment deployment, Node node, int allocations) { - long requiredMemory = getDeploymentMemoryRequirement(deployment, node, allocations); - validateAssignment(deployment, node, allocations, requiredMemory); - } - - private void validateAssignment(Deployment deployment, Node node, int allocations, long requiredMemory) { - if (requiredMemory > remainingNodeMemory.get(node)) { - throw new IllegalArgumentException( - "not enough memory on node [" - + node.id() - + "] to assign [" - + allocations - + "] allocations to deployment [" - + deployment.deploymentId() - + "]" - ); - } - - if (deployment.priority == Priority.NORMAL && allocations * deployment.threadsPerAllocation() > remainingNodeCores.get(node)) { - throw new IllegalArgumentException( - "not enough cores on node [" - + node.id() - + "] to assign [" - + allocations - + "] allocations to deployment [" - + deployment.deploymentId() - + "]; required threads per allocation [" - + deployment.threadsPerAllocation() - + "]" - ); - } - } - private int getAssignedAllocations(Deployment deployment, Node node) { return assignments.get(deployment).get(node); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java index eb4abb4483c26..fc39766c1fcf3 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java @@ -16,7 +16,6 @@ import java.util.Map; import static org.hamcrest.Matchers.contains; -import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.is; @@ -233,73 +232,15 @@ public void testAssignModelToNode_GivenNewPlanDoesNotSatisfyCurrentAssignment() } } - public void testAssignModelToNode_GivenPreviouslyUnassignedModelDoesNotFit() { - Node n = new Node("n_1", ByteSizeValue.ofMb(340 - 1).getBytes(), 4); - Deployment m = new AssignmentPlan.Deployment("m_1", "m_1", ByteSizeValue.ofMb(50).getBytes(), 2, 2, Map.of(), 0, null, 0, 0); - - AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); - Exception e = expectThrows(IllegalArgumentException.class, () -> builder.validateAssignment(m, n, 1)); - - assertThat(e.getMessage(), equalTo("not enough memory on node [n_1] to assign [1] allocations to deployment [m_1]")); - } - - public void testAssignModelToNode_GivenPreviouslyAssignedModelDoesNotFit() { - { // old memory format - Node n = new Node("n_1", ByteSizeValue.ofMb(340 - 1).getBytes(), 4); - AssignmentPlan.Deployment m = new AssignmentPlan.Deployment( - "m_1", - "m_1", - ByteSizeValue.ofMb(50).getBytes(), - 2, - 2, - Map.of("n_1", 1), - 0, - null, - 0, - 0 - ); - - AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); - - Exception e = expectThrows(IllegalArgumentException.class, () -> builder.validateAssignment(m, n, 2)); - assertThat(e.getMessage(), containsString("not enough memory on node")); - } - { // new memory format - Node n = new Node("n_1", ByteSizeValue.ofMb(340 - 1).getBytes(), 4); - AssignmentPlan.Deployment m = new AssignmentPlan.Deployment( - "m_1", - "m_1", - ByteSizeValue.ofMb(30).getBytes(), - 2, - 2, - Map.of("n_1", 1), - 0, - null, - ByteSizeValue.ofMb(300).getBytes(), - ByteSizeValue.ofMb(5).getBytes() - ); - - AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); - - Exception e = expectThrows(IllegalArgumentException.class, () -> builder.validateAssignment(m, n, 2)); - assertThat(e.getMessage(), containsString("not enough memory on node")); - } - } - - public void testAssignModelToNode_GivenNotEnoughCores_AndSingleThreadPerAllocation() { + public void testCanAssign_GivenNotEnoughCores_AndSingleThreadPerAllocation() { Node n = new Node("n_1", ByteSizeValue.ofMb(500).getBytes(), 4); Deployment m = new AssignmentPlan.Deployment("m_1", "m_1", ByteSizeValue.ofMb(100).getBytes(), 5, 1, Map.of(), 0, null, 0, 0); AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); - Exception e = expectThrows(IllegalArgumentException.class, () -> builder.validateAssignment(m, n, 5)); - - assertThat( - e.getMessage(), - equalTo("not enough cores on node [n_1] to assign [5] allocations to deployment [m_1]; required threads per allocation [1]") - ); + assertThat(builder.canAssign(m, n, 5), is(false)); } - public void testAssignModelToNode_GivenNotEnoughCores_AndMultipleThreadsPerAllocation() { + public void testCanAssign_GivenNotEnoughCores_AndMultipleThreadsPerAllocation() { Node n = new Node("n_1", ByteSizeValue.ofMb(500).getBytes(), 5); AssignmentPlan.Deployment m = new AssignmentPlan.Deployment( "m_1", @@ -315,12 +256,7 @@ public void testAssignModelToNode_GivenNotEnoughCores_AndMultipleThreadsPerAlloc ); AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); - Exception e = expectThrows(IllegalArgumentException.class, () -> builder.validateAssignment(m, n, 3)); - - assertThat( - e.getMessage(), - equalTo("not enough cores on node [n_1] to assign [3] allocations to deployment [m_1]; required threads per allocation [2]") - ); + assertThat(builder.canAssign(m, n, 3), is(false)); } public void testAssignModelToNode_GivenSameModelAssignedTwice() {