@@ -1198,7 +1198,7 @@ public void testCopyAssignments() {
11981198 assertThat (deployment2Assignments .get ().get (node2 ), equalTo (1 ));
11991199 }
12001200
1201- public void testRebalance_GivenDeploymentWithMemoryRequirements_ConsidersNativeExecutableOverhead () {
1201+ public void testRebalance_GivenDeploymentWithMemoryRequirements_ExplainMissingAllocations () {
12021202 // Create a node with just enough memory to fit the model plus native executable overhead
12031203 long modelMemory = ByteSizeValue .ofMb (200 ).getBytes ();
12041204 long memoryOverhead = ByteSizeValue .ofMb (240 ).getBytes ();
@@ -1320,63 +1320,4 @@ private static DiscoveryNode buildNode(String name, long nativeMemory, int alloc
13201320 )
13211321 .build ();
13221322 }
1323-
1324- public void testRebalance_GivenDeploymentWithMemoryRequirements_ConsidersNativeExecutableOverhead () {
1325- // Create a node with just enough memory to fit the model plus native executable overhead
1326- long modelMemory = ByteSizeValue .ofMb (200 ).getBytes ();
1327- long memoryOverhead = ByteSizeValue .ofMb (240 ).getBytes ();
1328- long JVMOverhead = ByteSizeValue .ofMb (50 ).getBytes ();
1329- long nodeMemory = memoryOverhead + modelMemory * 2 + JVMOverhead ;
1330-
1331- DiscoveryNode node = buildNode ("node-1" , nodeMemory , 4 );
1332-
1333- String deploymentId = "model-with-overhead-test" ;
1334- StartTrainedModelDeploymentAction .TaskParams taskParams = normalPriorityParams (deploymentId , deploymentId , modelMemory , 1 , 1 );
1335-
1336- TrainedModelAssignmentMetadata currentMetadata = TrainedModelAssignmentMetadata .Builder .empty ().build ();
1337- Map <DiscoveryNode , NodeLoad > nodeLoads = new HashMap <>();
1338-
1339- // This node has no jobs or models yet, so the overhead should be accounted for
1340- nodeLoads .put (node , NodeLoad .builder ("node-1" ).setMaxMemory (nodeMemory ).build ());
1341-
1342- TrainedModelAssignmentMetadata result = new TrainedModelAssignmentRebalancer (
1343- currentMetadata ,
1344- nodeLoads ,
1345- Map .of (List .of (), List .of (node )),
1346- Optional .of (new CreateTrainedModelAssignmentAction .Request (taskParams , null )),
1347- 1
1348- ).rebalance ().build ();
1349-
1350- // Verify the deployment was successful
1351- TrainedModelAssignment assignment = result .getDeploymentAssignment (deploymentId );
1352- assertThat (assignment , is (notNullValue ()));
1353- assertThat (assignment .getAssignmentState (), equalTo (AssignmentState .STARTING ));
1354- assertThat (assignment .getNodeRoutingTable (), is (aMapWithSize (1 )));
1355- assertThat (assignment .getNodeRoutingTable (), hasKey ("node-1" ));
1356- assertThat (assignment .getReason ().isPresent (), is (false ));
1357-
1358- // Now try with a node that has slightly less memory - this should fail
1359- long insufficientNodeMemory = nodeMemory - ByteSizeValue .ofMb (21 ).getBytes ();
1360- DiscoveryNode insufficientNode = buildNode ("node-2" , insufficientNodeMemory , 4 );
1361-
1362- Map <DiscoveryNode , NodeLoad > insufficientNodeLoads = Map .of (
1363- insufficientNode ,
1364- NodeLoad .builder ("node-2" ).setMaxMemory (insufficientNodeMemory ).build ()
1365- );
1366-
1367- TrainedModelAssignmentMetadata insufficientResult = new TrainedModelAssignmentRebalancer (
1368- TrainedModelAssignmentMetadata .Builder .empty ().build (),
1369- insufficientNodeLoads ,
1370- Map .of (List .of (), List .of (insufficientNode )),
1371- Optional .of (new CreateTrainedModelAssignmentAction .Request (taskParams , null )),
1372- 1
1373- ).rebalance ().build ();
1374-
1375- TrainedModelAssignment insufficientAssignment = insufficientResult .getDeploymentAssignment (deploymentId );
1376- assertThat (insufficientAssignment , is (notNullValue ()));
1377- assertThat (insufficientAssignment .getAssignmentState (), equalTo (AssignmentState .STARTING ));
1378- assertThat (insufficientAssignment .getNodeRoutingTable (), is (anEmptyMap ()));
1379- assertThat (insufficientAssignment .getReason ().isPresent (), is (true ));
1380- assertThat (insufficientAssignment .getReason ().get (), containsString ("insufficient available memory" ));
1381- }
13821323}
0 commit comments