diff --git a/docs/changelog/133916.yaml b/docs/changelog/133916.yaml new file mode 100644 index 0000000000000..d35c0d044c7fa --- /dev/null +++ b/docs/changelog/133916.yaml @@ -0,0 +1,5 @@ +pr: 133916 +summary: Fix model assignment error handling and assignment explanation generation +area: Machine Learning +type: bug +issues: [] diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AbstractPreserveAllocations.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AbstractPreserveAllocations.java index 8a0bbe2ecdd5e..a451f301668bd 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AbstractPreserveAllocations.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AbstractPreserveAllocations.java @@ -103,8 +103,7 @@ AssignmentPlan mergePreservedAllocations(AssignmentPlan assignmentPlan) { 0 ); - long requiredMemory = mergedPlanBuilder.getDeploymentMemoryRequirement(deploymentNewAllocations, n, newAllocations); - if (newAllocations > 0 && mergedPlanBuilder.canAssign(deploymentNewAllocations, n, newAllocations, requiredMemory)) { + if (newAllocations > 0) { mergedPlanBuilder.assignModelToNode(deploymentNewAllocations, n, newAllocations); } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java index 063014d616925..e629fa1a9e8e4 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java @@ -441,33 +441,9 @@ public Builder assignModelToNode(Deployment deployment, Node node, int allocatio } public Builder assignModelToNode(Deployment deployment, Node node, int allocations, long requiredMemory) { - if (allocations <= 0) { + if (allocations <= 0 || canAssign(deployment, node, allocations, requiredMemory) == false) { return this; } - if (requiredMemory > remainingNodeMemory.get(node)) { - throw new IllegalArgumentException( - "not enough memory on node [" - + node.id() - + "] to assign [" - + allocations - + "] allocations to deployment [" - + deployment.deploymentId() - + "]" - ); - } - if (deployment.priority == Priority.NORMAL && allocations * deployment.threadsPerAllocation() > remainingNodeCores.get(node)) { - throw new IllegalArgumentException( - "not enough cores on node [" - + node.id() - + "] to assign [" - + allocations - + "] allocations to deployment [" - + deployment.deploymentId() - + "]; required threads per allocation [" - + deployment.threadsPerAllocation() - + "]" - ); - } assignments.get(deployment).compute(node, (n, assignedAllocations) -> assignedAllocations + allocations); accountMemory(deployment, node, requiredMemory); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/RandomizedAssignmentRounding.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/RandomizedAssignmentRounding.java index 8bdc99998a0c2..e28af9a0b6108 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/RandomizedAssignmentRounding.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/RandomizedAssignmentRounding.java @@ -310,11 +310,7 @@ private void unassignOversizedModels(Node n) { private AssignmentPlan toPlan() { AssignmentPlan.Builder builder = AssignmentPlan.builder(nodes, deployments); for (Map.Entry, Integer> assignment : tryAssigningRemainingCores().entrySet()) { - // TODO (#101612) The model should be assigned to the node only when it is possible. This means, that canAssign should be - // integrated into the assignModelToNode. - if (builder.canAssign(assignment.getKey().v1(), assignment.getKey().v2(), assignment.getValue())) { - builder.assignModelToNode(assignment.getKey().v1(), assignment.getKey().v2(), assignment.getValue()); - } + builder.assignModelToNode(assignment.getKey().v1(), assignment.getKey().v2(), assignment.getValue()); } return builder.build(); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java index 64cd40fdc537d..1ccf8840c55be 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java @@ -91,6 +91,8 @@ private AssignmentPlan computePlan(boolean tryAssigningPreviouslyAssignedModels) remainingZones, tryAssigningPreviouslyAssignedModels ); + + // Update remaining allocations to account for allocations satisfied in this zone plan.deployments() .forEach( d -> deploymentIdToRemainingAllocations.computeIfPresent( @@ -217,6 +219,14 @@ private AssignmentPlan swapOriginalDeploymentsInPlan( return finalPlanBuilder.build(); } + /** + * The mergeAllocationsByNodeIdByDeploymentId method is responsible for consolidating allocation data + * from multiple AssignmentPlan objects into a single structure. This structure maps deployment IDs + * to their respective node allocations, allowing the system to track how resources are distributed + * across nodes for each deployment. + * @param plans List of AssignmentPlan objects to merge allocations from + * @return + */ private Map> mergeAllocationsByNodeIdByDeploymentId(List plans) { Map> allocationsByNodeIdByDeploymentId = new HashMap<>(); deployments.forEach(d -> allocationsByNodeIdByDeploymentId.put(d.deploymentId(), new HashMap<>())); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java index 65e91f8402ce5..cfef7939ba236 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancerTests.java @@ -1198,7 +1198,7 @@ public void testCopyAssignments() { assertThat(deployment2Assignments.get().get(node2), equalTo(1)); } - public void testRebalance_GivenDeploymentWithMemoryRequirements_ConsidersNativeExecutableOverhead() { + public void testRebalance_GivenDeploymentWithMemoryRequirements_ExplainMissingAllocations() { // Create a node with just enough memory to fit the model plus native executable overhead long modelMemory = ByteSizeValue.ofMb(200).getBytes(); long memoryOverhead = ByteSizeValue.ofMb(240).getBytes(); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java index c7f166a19bb69..fc39766c1fcf3 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanTests.java @@ -16,7 +16,6 @@ import java.util.Map; import static org.hamcrest.Matchers.contains; -import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.is; @@ -233,73 +232,15 @@ public void testAssignModelToNode_GivenNewPlanDoesNotSatisfyCurrentAssignment() } } - public void testAssignModelToNode_GivenPreviouslyUnassignedModelDoesNotFit() { - Node n = new Node("n_1", ByteSizeValue.ofMb(340 - 1).getBytes(), 4); - Deployment m = new AssignmentPlan.Deployment("m_1", "m_1", ByteSizeValue.ofMb(50).getBytes(), 2, 2, Map.of(), 0, null, 0, 0); - - AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); - Exception e = expectThrows(IllegalArgumentException.class, () -> builder.assignModelToNode(m, n, 1)); - - assertThat(e.getMessage(), equalTo("not enough memory on node [n_1] to assign [1] allocations to deployment [m_1]")); - } - - public void testAssignModelToNode_GivenPreviouslyAssignedModelDoesNotFit() { - { // old memory format - Node n = new Node("n_1", ByteSizeValue.ofMb(340 - 1).getBytes(), 4); - AssignmentPlan.Deployment m = new AssignmentPlan.Deployment( - "m_1", - "m_1", - ByteSizeValue.ofMb(50).getBytes(), - 2, - 2, - Map.of("n_1", 1), - 0, - null, - 0, - 0 - ); - - AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); - - Exception e = expectThrows(IllegalArgumentException.class, () -> builder.assignModelToNode(m, n, 2)); - assertThat(e.getMessage(), containsString("not enough memory on node")); - } - { // new memory format - Node n = new Node("n_1", ByteSizeValue.ofMb(340 - 1).getBytes(), 4); - AssignmentPlan.Deployment m = new AssignmentPlan.Deployment( - "m_1", - "m_1", - ByteSizeValue.ofMb(30).getBytes(), - 2, - 2, - Map.of("n_1", 1), - 0, - null, - ByteSizeValue.ofMb(300).getBytes(), - ByteSizeValue.ofMb(5).getBytes() - ); - - AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); - - Exception e = expectThrows(IllegalArgumentException.class, () -> builder.assignModelToNode(m, n, 2)); - assertThat(e.getMessage(), containsString("not enough memory on node")); - } - } - - public void testAssignModelToNode_GivenNotEnoughCores_AndSingleThreadPerAllocation() { + public void testCanAssign_GivenNotEnoughCores_AndSingleThreadPerAllocation() { Node n = new Node("n_1", ByteSizeValue.ofMb(500).getBytes(), 4); Deployment m = new AssignmentPlan.Deployment("m_1", "m_1", ByteSizeValue.ofMb(100).getBytes(), 5, 1, Map.of(), 0, null, 0, 0); AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); - Exception e = expectThrows(IllegalArgumentException.class, () -> builder.assignModelToNode(m, n, 5)); - - assertThat( - e.getMessage(), - equalTo("not enough cores on node [n_1] to assign [5] allocations to deployment [m_1]; required threads per allocation [1]") - ); + assertThat(builder.canAssign(m, n, 5), is(false)); } - public void testAssignModelToNode_GivenNotEnoughCores_AndMultipleThreadsPerAllocation() { + public void testCanAssign_GivenNotEnoughCores_AndMultipleThreadsPerAllocation() { Node n = new Node("n_1", ByteSizeValue.ofMb(500).getBytes(), 5); AssignmentPlan.Deployment m = new AssignmentPlan.Deployment( "m_1", @@ -315,12 +256,7 @@ public void testAssignModelToNode_GivenNotEnoughCores_AndMultipleThreadsPerAlloc ); AssignmentPlan.Builder builder = AssignmentPlan.builder(List.of(n), List.of(m)); - Exception e = expectThrows(IllegalArgumentException.class, () -> builder.assignModelToNode(m, n, 3)); - - assertThat( - e.getMessage(), - equalTo("not enough cores on node [n_1] to assign [3] allocations to deployment [m_1]; required threads per allocation [2]") - ); + assertThat(builder.canAssign(m, n, 3), is(false)); } public void testAssignModelToNode_GivenSameModelAssignedTwice() {