Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/131990.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 131990
summary: Prevent the trained model deployment memory estimation from double-counting
allocations
area: Machine Learning
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,13 @@ private static void copyAssignments(
for (Map.Entry<AssignmentPlan.Node, Integer> assignment : nodeAssignments.entrySet()) {
AssignmentPlan.Node originalNode = originalNodeById.get(assignment.getKey().id());
dest.assignModelToNode(m, originalNode, assignment.getValue());
// As the node has all its available memory we need to manually account memory of models with
// current allocations.
dest.accountMemory(m, originalNode);
if (m.currentAllocationsByNodeId().containsKey(originalNode.id())) {
// TODO (#101612) requiredMemory should be calculated by the AssignmentPlan.Builder
// As the node has all its available memory we need to manually account memory of models with
// current allocations.
long requiredMemory = m.estimateMemoryUsageBytes(m.currentAllocationsByNodeId().get(originalNode.id()));
dest.accountMemory(m, originalNode, requiredMemory);
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ Deployment modifyModelPreservingPreviousAssignments(Deployment m) {
AssignmentPlan mergePreservedAllocations(AssignmentPlan assignmentPlan) {
// As the model/node objects the assignment plan are the modified ones,
// they will not match the models/nodes members we have in this class.
// Therefore, we build a lookup table based on the ids, so we can merge the plan
// Therefore, we build a lookup table based on the ids so we can merge the plan
// with its preserved allocations.
final Map<Tuple<String, String>, Integer> plannedAssignmentsByDeploymentNodeIdPair = new HashMap<>();
for (Deployment d : assignmentPlan.deployments()) {
Expand All @@ -85,6 +85,7 @@ AssignmentPlan mergePreservedAllocations(AssignmentPlan assignmentPlan) {

AssignmentPlan.Builder mergedPlanBuilder = AssignmentPlan.builder(nodes, deployments);
for (Node n : nodes) {
// TODO (#101612) Should the first loop happen in the builder constructor?
for (Deployment deploymentAllocationsToPreserve : deployments) {

// if the model m is already allocated on the node n and I want to preserve this allocation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,8 @@ public Builder assignModelToNode(Deployment deployment, Node node, int allocatio
if (allocations <= 0) {
return this;
}
if (requiredMemory > remainingNodeMemory.get(node)) {
if (/*isAlreadyAssigned(deployment, node) == false
&&*/ requiredMemory > remainingNodeMemory.get(node)) {
throw new IllegalArgumentException(
"not enough memory on node ["
+ node.id()
Expand Down Expand Up @@ -470,14 +471,13 @@ private static int getCurrentAllocations(Deployment m, Node n) {
}

public void accountMemory(Deployment m, Node n) {
if (m.currentAllocationsByNodeId().containsKey(n.id())) {
int allocations = m.currentAllocationsByNodeId().get(n.id());
long requiredMemory = m.estimateMemoryUsageBytes(allocations);
accountMemory(m, n, requiredMemory);
}
// TODO (#101612) remove or refactor unused method
long requiredMemory = getDeploymentMemoryRequirement(m, n, getCurrentAllocations(m, n));
accountMemory(m, n, requiredMemory);
}

private void accountMemory(Deployment m, Node n, long requiredMemory) {
public void accountMemory(Deployment m, Node n, long requiredMemory) {
// TODO (#101612) computation of required memory should be done internally
remainingNodeMemory.computeIfPresent(n, (k, v) -> v - requiredMemory);
if (remainingNodeMemory.containsKey(n) && remainingNodeMemory.get(n) < 0) {
throw new IllegalArgumentException("not enough memory on node [" + n.id() + "] to assign model [" + m.deploymentId() + "]");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,8 @@ private void unassignOversizedModels(Node n) {
private AssignmentPlan toPlan() {
AssignmentPlan.Builder builder = AssignmentPlan.builder(nodes, deployments);
for (Map.Entry<Tuple<AssignmentPlan.Deployment, Node>, Integer> assignment : tryAssigningRemainingCores().entrySet()) {
// TODO (#101612) The model should be assigned to the node only when it is possible. This means, that canAssign should be
// integrated into the assignModelToNode.
if (builder.canAssign(assignment.getKey().v1(), assignment.getKey().v2(), assignment.getValue())) {
builder.assignModelToNode(assignment.getKey().v1(), assignment.getKey().v2(), assignment.getValue());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,9 +200,15 @@ private AssignmentPlan swapOriginalModelsInPlan(
for (Map.Entry<Node, Integer> assignment : nodeAssignments.entrySet()) {
Node originalNode = originalNodeById.get(assignment.getKey().id());
planBuilder.assignModelToNode(originalDeployment, originalNode, assignment.getValue());
// As the node has all its available memory we need to manually account memory of models with
// current allocations.
planBuilder.accountMemory(originalDeployment, originalNode);
if (originalDeployment.currentAllocationsByNodeId().containsKey(originalNode.id())) {
// TODO (#101612) requiredMemory should be calculated by the AssignmentPlan.Builder
// As the node has all its available memory we need to manually account memory of models with
// current allocations.
long requiredMemory = originalDeployment.estimateMemoryUsageBytes(
originalDeployment.currentAllocationsByNodeId().get(originalNode.id())
);
planBuilder.accountMemory(m, originalNode, requiredMemory);
}
}
}
return planBuilder.build();
Expand Down
Loading