From ee921499a4590fbac7e930a4e1456e5f79ca02a1 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 1 Sep 2025 11:15:55 +0200 Subject: [PATCH 1/2] remove double-counting of inference memory --- .../assignment/TrainedModelAssignmentRebalancer.java | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java index f523b4b086f35..90f86dbc243f0 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/TrainedModelAssignmentRebalancer.java @@ -298,9 +298,7 @@ private Map, List> createNodesByZoneMap() { nodes.add( new AssignmentPlan.Node( discoveryNode.getId(), - // We subtract native inference memory as the planner expects available memory for - // native inference including current assignments. - getNodeFreeMemoryExcludingPerNodeOverheadAndNativeInference(load), + load.getFreeMemoryExcludingPerNodeOverhead(), MlProcessors.get(discoveryNode, allocatedProcessorsScale).roundUp() ) ); @@ -317,10 +315,6 @@ private Map, List> createNodesByZoneMap() { })); } - private static long getNodeFreeMemoryExcludingPerNodeOverheadAndNativeInference(NodeLoad load) { - return load.getFreeMemoryExcludingPerNodeOverhead() - load.getAssignedNativeInferenceMemory(); - } - private TrainedModelAssignmentMetadata.Builder buildAssignmentsFromPlan(AssignmentPlan assignmentPlan) { TrainedModelAssignmentMetadata.Builder builder = TrainedModelAssignmentMetadata.Builder.empty(); for (AssignmentPlan.Deployment deployment : assignmentPlan.deployments()) { From 50a210a1c0df1a1f60b38f6b00160d942640d117 Mon Sep 17 00:00:00 2001 From: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> Date: Mon, 1 Sep 2025 11:21:13 +0200 Subject: [PATCH 2/2] Update docs/changelog/133919.yaml --- docs/changelog/133919.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/133919.yaml diff --git a/docs/changelog/133919.yaml b/docs/changelog/133919.yaml new file mode 100644 index 0000000000000..34c3ecd3ebe57 --- /dev/null +++ b/docs/changelog/133919.yaml @@ -0,0 +1,5 @@ +pr: 133919 +summary: Fix double-counting of inference memory in the assignment rebalancer +area: Machine Learning +type: bug +issues: []