Skip to content

Commit f3cc558

Browse files
authored
Preserve deployments with zero allocations during assignment planning (elastic#137244)
* Preserve deployments with zero allocations during assignment planning. * Update docs/changelog/137244.yaml * polish code * more testing * Add zero-allocation deployments to best plan logging
1 parent 8c110cb commit f3cc558

File tree

4 files changed

+144
-9
lines changed

4 files changed

+144
-9
lines changed

docs/changelog/137244.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 137244
2+
summary: Preserve deployments with zero allocations during assignment planning
3+
area: Machine Learning
4+
type: bug
5+
issues:
6+
- 137134

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlan.java

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import java.util.ArrayList;
1818
import java.util.Collection;
19+
import java.util.Collections;
1920
import java.util.Comparator;
2021
import java.util.HashMap;
2122
import java.util.HashSet;
@@ -190,15 +191,13 @@ public String toString() {
190191

191192
private AssignmentPlan(
192193
Map<Deployment, Map<Node, Integer>> assignments,
193-
Map<Node, Long> remainingNodeMemory,
194-
Map<Node, Integer> remainingNodeCores,
194+
Map<String, Long> remainingNodeMemory,
195+
Map<String, Integer> remainingNodeCores,
195196
Map<Deployment, Integer> remainingModelAllocations
196197
) {
197198
this.assignments = Objects.requireNonNull(assignments);
198-
this.remainingNodeMemory = remainingNodeMemory.entrySet()
199-
.stream()
200-
.collect(Collectors.toMap(e -> e.getKey().id(), e -> e.getValue()));
201-
this.remainingNodeCores = remainingNodeCores.entrySet().stream().collect(Collectors.toMap(e -> e.getKey().id(), e -> e.getValue()));
199+
this.remainingNodeMemory = Objects.requireNonNull(remainingNodeMemory);
200+
this.remainingNodeCores = Objects.requireNonNull(remainingNodeCores);
202201
this.remainingModelAllocations = Objects.requireNonNull(remainingModelAllocations);
203202
}
204203

@@ -315,13 +314,42 @@ private Quality computeQuality() {
315314
return new Quality(isSatisfyingPreviousAssignments, weighedAllocationsScore, memoryScore);
316315
}
317316

317+
/**
318+
* Adds deployments with zero allocations to this plan. These deployments
319+
* are preserved in the plan but have no node assignments. This ensures
320+
* that deployments configured with zero allocations are not lost during
321+
* planning.
322+
*
323+
* Deployments with zero allocations are filtered out during the planning
324+
* process (since they don't require assignment), but they need to be preserved
325+
* in the final plan so that deployment state is maintained correctly.
326+
*
327+
* @param zeroAllocationDeployments deployments to add with empty assignments
328+
* @return a new plan containing the original assignments plus the zero-allocation deployments
329+
*/
330+
public AssignmentPlan withZeroAllocationDeployments(Collection<Deployment> zeroAllocationDeployments) {
331+
Map<Deployment, Map<Node, Integer>> newAssignments = new HashMap<>(assignments);
332+
Map<Deployment, Integer> newRemainingModelAllocations = new HashMap<>(remainingModelAllocations);
333+
for (Deployment deployment : zeroAllocationDeployments) {
334+
assert newAssignments.containsKey(deployment) == false;
335+
newAssignments.put(deployment, Collections.emptyMap());
336+
newRemainingModelAllocations.put(deployment, 0);
337+
}
338+
return new AssignmentPlan(newAssignments, remainingNodeMemory, remainingNodeCores, newRemainingModelAllocations);
339+
}
340+
318341
public String prettyPrint() {
319342
if (assignments.isEmpty()) {
320343
return "Empty plan";
321344
}
322345

323346
Map<Node, List<Tuple<Deployment, Integer>>> nodeToModel = new HashMap<>();
347+
Set<Deployment> zeroAllocationsDeployments = new HashSet<>();
324348
for (Deployment m : assignments.keySet()) {
349+
if (assignments.get(m).isEmpty()) {
350+
zeroAllocationsDeployments.add(m);
351+
continue;
352+
}
325353
for (Node n : assignments.get(m).keySet()) {
326354
List<Tuple<Deployment, Integer>> allocationsPerModel = nodeToModel.containsKey(n) ? nodeToModel.get(n) : new ArrayList<>();
327355
allocationsPerModel.add(Tuple.tuple(m, assignments.get(m).get(n)));
@@ -359,6 +387,11 @@ public String prettyPrint() {
359387
msg.append('\n');
360388
}
361389
}
390+
if (zeroAllocationsDeployments.isEmpty() == false) {
391+
msg.append('\n');
392+
msg.append("Deployments with zero allocations: ");
393+
msg.append(zeroAllocationsDeployments.stream().map(Deployment::deploymentId).collect(Collectors.joining(", ", "[", "]")));
394+
}
362395
return msg.toString();
363396
}
364397

@@ -477,7 +510,12 @@ public AssignmentPlan build() {
477510
}
478511
finalAssignments.put(m, allocationsPerNode);
479512
}
480-
return new AssignmentPlan(finalAssignments, remainingNodeMemory, remainingNodeCores, remainingModelAllocations);
513+
return new AssignmentPlan(
514+
finalAssignments,
515+
remainingNodeMemory.entrySet().stream().collect(Collectors.toMap(e -> e.getKey().id(), Map.Entry::getValue)),
516+
remainingNodeCores.entrySet().stream().collect(Collectors.toMap(e -> e.getKey().id(), Map.Entry::getValue)),
517+
remainingModelAllocations
518+
);
481519
}
482520
}
483521

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlanner.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,18 @@ public class AssignmentPlanner {
4747

4848
private final List<Node> nodes;
4949
private final List<AssignmentPlan.Deployment> deployments;
50+
private final List<AssignmentPlan.Deployment> deploymentsWithZeroAllocations;
5051

5152
public AssignmentPlanner(List<Node> nodes, List<AssignmentPlan.Deployment> deployments) {
5253
this.nodes = nodes.stream().sorted(Comparator.comparing(Node::id)).toList();
5354
this.deployments = deployments.stream()
5455
.filter(deployment -> deployment.allocations() > 0)
5556
.sorted(Comparator.comparing(AssignmentPlan.Deployment::deploymentId))
5657
.toList();
58+
this.deploymentsWithZeroAllocations = deployments.stream()
59+
.filter(deployment -> deployment.allocations() == 0)
60+
.sorted(Comparator.comparing(AssignmentPlan.Deployment::deploymentId))
61+
.toList();
5762
}
5863

5964
public AssignmentPlan computePlan() {
@@ -97,8 +102,11 @@ public AssignmentPlan computePlan(boolean tryAssigningAllPreviouslyAllocatedMode
97102
}
98103
}
99104

100-
logger.debug(() -> "Best plan =\n" + bestPlan.prettyPrint());
101-
logger.debug(() -> prettyPrintOverallStats(bestPlan));
105+
bestPlan = bestPlan.withZeroAllocationDeployments(deploymentsWithZeroAllocations);
106+
if (logger.isDebugEnabled()) {
107+
logger.debug("Best plan =\n{}", bestPlan.prettyPrint());
108+
logger.debug("{}", prettyPrintOverallStats(bestPlan));
109+
}
102110
return bestPlan;
103111
}
104112

x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/assignment/planning/AssignmentPlannerTests.java

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import org.elasticsearch.common.StopWatch;
1111
import org.elasticsearch.common.unit.ByteSizeValue;
1212
import org.elasticsearch.test.ESTestCase;
13+
import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings;
1314
import org.elasticsearch.xpack.ml.inference.assignment.planning.AssignmentPlan.Deployment;
1415
import org.elasticsearch.xpack.ml.inference.assignment.planning.AssignmentPlan.Node;
1516

@@ -24,6 +25,8 @@
2425
import java.util.stream.Stream;
2526

2627
import static org.elasticsearch.test.hamcrest.OptionalMatchers.isEmpty;
28+
import static org.hamcrest.Matchers.contains;
29+
import static org.hamcrest.Matchers.containsInAnyOrder;
2730
import static org.hamcrest.Matchers.equalTo;
2831
import static org.hamcrest.Matchers.everyItem;
2932
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
@@ -1165,7 +1168,87 @@ public void testGivenClusterResize_ShouldRemoveAllocatedDeployments_NewMemoryFie
11651168
assertThat(indexedBasedPlan.get("m_1"), equalTo(Map.of("n_1", 2)));
11661169
assertThat(assignmentPlan.getRemainingNodeMemory(node1.id()), greaterThanOrEqualTo(0L));
11671170
assertThat(assignmentPlan.getRemainingNodeCores(node1.id()), greaterThanOrEqualTo(0));
1171+
}
1172+
1173+
public void testZeroAllocationsDeploymentIsPreserved() {
1174+
Node node = new Node("n_1", ByteSizeValue.ofGb(4).getBytes(), 4);
1175+
Deployment deployment = new Deployment(
1176+
"m_1",
1177+
"m_1",
1178+
ByteSizeValue.ofMb(500).getBytes(),
1179+
0,
1180+
1,
1181+
Map.of(),
1182+
0,
1183+
new AdaptiveAllocationsSettings(true, 0, 42),
1184+
0,
1185+
0
1186+
);
1187+
1188+
AssignmentPlan assignmentPlan = new AssignmentPlanner(List.of(node), List.of(deployment)).computePlan();
11681189

1190+
assertThat(assignmentPlan.deployments(), contains(deployment));
1191+
assertThat(assignmentPlan.assignments(deployment), isEmpty());
1192+
assertThat(assignmentPlan.satisfiesAllocations(deployment), is(true));
1193+
assertThat(assignmentPlan.getRemainingNodeMemory(node.id()), equalTo(ByteSizeValue.ofGb(4).getBytes()));
1194+
assertThat(assignmentPlan.getRemainingNodeCores(node.id()), equalTo(4));
1195+
}
1196+
1197+
public void testMultipleZeroAllocationsDeploymentsArePreserved() {
1198+
Node node = new Node("n_1", ByteSizeValue.ofMb(4096).getBytes(), 4);
1199+
Deployment zeroAllocationDeployment1 = new Deployment(
1200+
"m_1",
1201+
"m_1",
1202+
ByteSizeValue.ofMb(500).getBytes(),
1203+
0,
1204+
1,
1205+
Map.of(),
1206+
0,
1207+
new AdaptiveAllocationsSettings(true, 0, 42),
1208+
0,
1209+
0
1210+
);
1211+
Deployment zeroAllocationDeployment2 = new Deployment(
1212+
"m_2",
1213+
"m_2",
1214+
ByteSizeValue.ofMb(600).getBytes(),
1215+
0,
1216+
1,
1217+
Map.of(),
1218+
4,
1219+
new AdaptiveAllocationsSettings(true, 0, 42),
1220+
0,
1221+
0
1222+
);
1223+
Deployment oneAllocationDeployment = new AssignmentPlan.Deployment(
1224+
"m_1",
1225+
"m_1",
1226+
ByteSizeValue.ofMb(100).getBytes(),
1227+
1,
1228+
1,
1229+
Map.of(),
1230+
0,
1231+
null,
1232+
0,
1233+
0
1234+
);
1235+
1236+
AssignmentPlan assignmentPlan = new AssignmentPlanner(
1237+
List.of(node),
1238+
List.of(zeroAllocationDeployment1, oneAllocationDeployment, zeroAllocationDeployment2)
1239+
).computePlan();
1240+
1241+
assertThat(
1242+
assignmentPlan.deployments(),
1243+
containsInAnyOrder(zeroAllocationDeployment1, zeroAllocationDeployment2, oneAllocationDeployment)
1244+
);
1245+
assertModelFullyAssignedToNode(assignmentPlan, oneAllocationDeployment, node);
1246+
assertThat(assignmentPlan.assignments(zeroAllocationDeployment1), isEmpty());
1247+
assertThat(assignmentPlan.assignments(zeroAllocationDeployment2), isEmpty());
1248+
assertThat(assignmentPlan.assignments(zeroAllocationDeployment1), isEmpty());
1249+
assertThat(assignmentPlan.assignments(zeroAllocationDeployment2), isEmpty());
1250+
assertThat(assignmentPlan.getRemainingNodeMemory(node.id()), greaterThanOrEqualTo(0L));
1251+
assertThat(assignmentPlan.getRemainingNodeCores(node.id()), equalTo(3));
11691252
}
11701253

11711254
public static List<Deployment> createDeploymentsFromPlan(AssignmentPlan plan) {

0 commit comments

Comments
 (0)