From 8de57a77a271ed24e6df7109f7cd77af3c4a8a2b Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 28 Jul 2025 12:03:46 +0200 Subject: [PATCH] [ML] Prevent the trained model deployment memory estimation from double-counting allocations. (#131990) This reverts commit 971cfb9. The refactoring in 971cfb9 introduced a bug that could potentially lead to double-counting of the number of allocations in the trained model memory estimation. --- docs/changelog/131990.yaml | 6 ++++++ .../TrainedModelAssignmentRebalancer.java | 10 +++++++--- .../planning/AbstractPreserveAllocations.java | 3 ++- .../assignment/planning/AssignmentPlan.java | 14 +++++++------- .../planning/RandomizedAssignmentRounding.java | 2 ++ .../planning/ZoneAwareAssignmentPlanner.java | 12 +++++++++--- 6 files changed, 33 insertions(+), 14 deletions(-) create mode 100644 docs/changelog/131990.yaml diff --git a/docs/changelog/131990.yaml b/docs/changelog/131990.yaml new file mode 100644 index 0000000000000..29d9f620006ae --- /dev/null +++ b/docs/changelog/131990.yaml @@ -0,0 +1,6 @@ +pr: 131990 +summary: Prevent the trained model deployment memory estimation from double-counting + allocations +area: Machine Learning +type: bug +issues: [] diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java index 624ef5434e2a0..53f9560648169 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java @@ -144,9 +144,13 @@ private static void copyAssignments( for (Map.Entry assignment : nodeAssignments.entrySet()) { AssignmentPlan.Node originalNode = originalNodeById.get(assignment.getKey().id()); dest.assignModelToNode(m, originalNode, assignment.getValue()); - // As the node has all its available memory we need to manually account memory of models with - // current allocations. - dest.accountMemory(m, originalNode); + if (m.currentAllocationsByNodeId().containsKey(originalNode.id())) { + // TODO (#101612) requiredMemory should be calculated by the AssignmentPlan.Builder + // As the node has all its available memory we need to manually account memory of models with + // current allocations. + long requiredMemory = m.estimateMemoryUsageBytes(m.currentAllocationsByNodeId().get(originalNode.id())); + dest.accountMemory(m, originalNode, requiredMemory); + } } } } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AbstractPreserveAllocations.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AbstractPreserveAllocations.java index 0151c8f5ee9c8..96b44083193de 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AbstractPreserveAllocations.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AbstractPreserveAllocations.java @@ -69,7 +69,7 @@ Deployment modifyModelPreservingPreviousAssignments(Deployment m) { AssignmentPlan mergePreservedAllocations(AssignmentPlan assignmentPlan) { // As the model/node objects the assignment plan are the modified ones, // they will not match the models/nodes members we have in this class. - // Therefore, we build a lookup table based on the ids, so we can merge the plan + // Therefore, we build a lookup table based on the ids so we can merge the plan // with its preserved allocations. final Map, Integer> plannedAssignmentsByModelNodeIdPair = new HashMap<>(); for (Deployment m : assignmentPlan.models()) { @@ -81,6 +81,7 @@ AssignmentPlan mergePreservedAllocations(AssignmentPlan assignmentPlan) { AssignmentPlan.Builder mergedPlanBuilder = AssignmentPlan.builder(nodes, deployments); for (Node n : nodes) { + // TODO (#101612) Should the first loop happen in the builder constructor? for (Deployment deploymentAllocationsToPreserve : deployments) { // if the model m is already allocated on the node n and I want to preserve this allocation diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java index 7fc16394ed85c..caccaa7925532 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java @@ -408,7 +408,8 @@ public Builder assignModelToNode(Deployment deployment, Node node, int allocatio if (allocations <= 0) { return this; } - if (requiredMemory > remainingNodeMemory.get(node)) { + if (/*isAlreadyAssigned(deployment, node) == false + &&*/ requiredMemory > remainingNodeMemory.get(node)) { throw new IllegalArgumentException( "not enough memory on node [" + node.id() @@ -454,14 +455,13 @@ private static int getCurrentAllocations(Deployment m, Node n) { } public void accountMemory(Deployment m, Node n) { - if (m.currentAllocationsByNodeId().containsKey(n.id())) { - int allocations = m.currentAllocationsByNodeId().get(n.id()); - long requiredMemory = m.estimateMemoryUsageBytes(allocations); - accountMemory(m, n, requiredMemory); - } + // TODO (#101612) remove or refactor unused method + long requiredMemory = getDeploymentMemoryRequirement(m, n, getCurrentAllocations(m, n)); + accountMemory(m, n, requiredMemory); } - private void accountMemory(Deployment m, Node n, long requiredMemory) { + public void accountMemory(Deployment m, Node n, long requiredMemory) { + // TODO (#101612) computation of required memory should be done internally remainingNodeMemory.computeIfPresent(n, (k, v) -> v - requiredMemory); if (remainingNodeMemory.containsKey(n) && remainingNodeMemory.get(n) < 0) { throw new IllegalArgumentException("not enough memory on node [" + n.id() + "] to assign model [" + m.id() + "]"); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/RandomizedAssignmentRounding.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/RandomizedAssignmentRounding.java index 81696cd20d922..8bdc99998a0c2 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/RandomizedAssignmentRounding.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/RandomizedAssignmentRounding.java @@ -310,6 +310,8 @@ private void unassignOversizedModels(Node n) { private AssignmentPlan toPlan() { AssignmentPlan.Builder builder = AssignmentPlan.builder(nodes, deployments); for (Map.Entry, Integer> assignment : tryAssigningRemainingCores().entrySet()) { + // TODO (#101612) The model should be assigned to the node only when it is possible. This means, that canAssign should be + // integrated into the assignModelToNode. if (builder.canAssign(assignment.getKey().v1(), assignment.getKey().v2(), assignment.getValue())) { builder.assignModelToNode(assignment.getKey().v1(), assignment.getKey().v2(), assignment.getValue()); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java index 1f0857391598f..8c2edd6a7b567 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/ZoneAwareAssignmentPlanner.java @@ -185,9 +185,15 @@ private AssignmentPlan swapOriginalModelsInPlan( for (Map.Entry assignment : nodeAssignments.entrySet()) { Node originalNode = originalNodeById.get(assignment.getKey().id()); planBuilder.assignModelToNode(originalDeployment, originalNode, assignment.getValue()); - // As the node has all its available memory we need to manually account memory of models with - // current allocations. - planBuilder.accountMemory(originalDeployment, originalNode); + if (originalDeployment.currentAllocationsByNodeId().containsKey(originalNode.id())) { + // TODO (#101612) requiredMemory should be calculated by the AssignmentPlan.Builder + // As the node has all its available memory we need to manually account memory of models with + // current allocations. + long requiredMemory = originalDeployment.estimateMemoryUsageBytes( + originalDeployment.currentAllocationsByNodeId().get(originalNode.id()) + ); + planBuilder.accountMemory(m, originalNode, requiredMemory); + } } } return planBuilder.build();