diff --git a/docs/changelog/133919.yaml b/docs/changelog/133919.yaml new file mode 100644 index 0000000000000..34c3ecd3ebe57 --- /dev/null +++ b/docs/changelog/133919.yaml @@ -0,0 +1,5 @@ +pr: 133919 +summary: Fix double-counting of inference memory in the assignment rebalancer +area: Machine Learning +type: bug +issues: [] diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java index f523b4b086f35..90f86dbc243f0 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java @@ -298,9 +298,7 @@ private Map, List> createNodesByZoneMap() { nodes.add( new AssignmentPlan.Node( discoveryNode.getId(), - // We subtract native inference memory as the planner expects available memory for - // native inference including current assignments. - getNodeFreeMemoryExcludingPerNodeOverheadAndNativeInference(load), + load.getFreeMemoryExcludingPerNodeOverhead(), MlProcessors.get(discoveryNode, allocatedProcessorsScale).roundUp() ) ); @@ -317,10 +315,6 @@ private Map, List> createNodesByZoneMap() { })); } - private static long getNodeFreeMemoryExcludingPerNodeOverheadAndNativeInference(NodeLoad load) { - return load.getFreeMemoryExcludingPerNodeOverhead() - load.getAssignedNativeInferenceMemory(); - } - private TrainedModelAssignmentMetadata.Builder buildAssignmentsFromPlan(AssignmentPlan assignmentPlan) { TrainedModelAssignmentMetadata.Builder builder = TrainedModelAssignmentMetadata.Builder.empty(); for (AssignmentPlan.Deployment deployment : assignmentPlan.deployments()) {