diff --git a/docs/changelog/133919.yaml b/docs/changelog/133919.yaml new file mode 100644 index 0000000000000..34c3ecd3ebe57 --- /dev/null +++ b/docs/changelog/133919.yaml @@ -0,0 +1,5 @@ +pr: 133919 +summary: Fix double-counting of inference memory in the assignment rebalancer +area: Machine Learning +type: bug +issues: [] diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java index c497e084dedd1..2cb97b5347ee8 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java @@ -303,9 +303,7 @@ private Map, List> createNodesByZoneMap() { nodes.add( new AssignmentPlan.Node( discoveryNode.getId(), - // We subtract native inference memory as the planner expects available memory for - // native inference including current assignments. - getNodeFreeMemoryExcludingPerNodeOverheadAndNativeInference(load), + load.getFreeMemoryExcludingPerNodeOverhead(), MlProcessors.get(discoveryNode, allocatedProcessorsScale).roundUp() ) ); @@ -322,10 +320,6 @@ private Map, List> createNodesByZoneMap() { })); } - private static long getNodeFreeMemoryExcludingPerNodeOverheadAndNativeInference(NodeLoad load) { - return load.getFreeMemoryExcludingPerNodeOverhead() - load.getAssignedNativeInferenceMemory(); - } - private TrainedModelAssignmentMetadata.Builder buildAssignmentsFromPlan(AssignmentPlan assignmentPlan) { TrainedModelAssignmentMetadata.Builder builder = TrainedModelAssignmentMetadata.Builder.empty(); for (AssignmentPlan.Deployment deployment : assignmentPlan.deployments()) {