11
11
import org .apache .logging .log4j .Logger ;
12
12
import org .elasticsearch .ElasticsearchStatusException ;
13
13
import org .elasticsearch .ResourceNotFoundException ;
14
- import org .elasticsearch .TransportVersion ;
15
- import org .elasticsearch .TransportVersions ;
16
14
import org .elasticsearch .action .ActionListener ;
17
15
import org .elasticsearch .action .ActionRequestValidationException ;
18
16
import org .elasticsearch .action .support .master .AcknowledgedResponse ;
@@ -79,9 +77,6 @@ public class TrainedModelAssignmentClusterService implements ClusterStateListene
79
77
80
78
private static final Logger logger = LogManager .getLogger (TrainedModelAssignmentClusterService .class );
81
79
82
- private static final TransportVersion RENAME_ALLOCATION_TO_ASSIGNMENT_TRANSPORT_VERSION = TransportVersions .V_8_3_0 ;
83
- public static final TransportVersion DISTRIBUTED_MODEL_ALLOCATION_TRANSPORT_VERSION = TransportVersions .V_8_4_0 ;
84
-
85
80
private final ClusterService clusterService ;
86
81
private final ThreadPool threadPool ;
87
82
private final NodeLoadDetector nodeLoadDetector ;
@@ -169,14 +164,6 @@ public void clusterChanged(ClusterChangedEvent event) {
169
164
return ;
170
165
}
171
166
172
- if (eventStateMinTransportVersionIsBeforeDistributedModelAllocationTransportVersion (event )) {
173
- // we should not try to rebalance assignments while there may be nodes running on a version
174
- // prior to introducing distributed model allocation.
175
- // But we should remove routing to removed or shutting down nodes.
176
- removeRoutingToRemovedOrShuttingDownNodes (event );
177
- return ;
178
- }
179
-
180
167
if (event .nodesAdded ()) {
181
168
logMlNodeHeterogeneity ();
182
169
}
@@ -203,10 +190,6 @@ public void clusterChanged(ClusterChangedEvent event) {
203
190
}
204
191
}
205
192
206
- boolean eventStateMinTransportVersionIsBeforeDistributedModelAllocationTransportVersion (ClusterChangedEvent event ) {
207
- return event .state ().getMinTransportVersion ().before (DISTRIBUTED_MODEL_ALLOCATION_TRANSPORT_VERSION );
208
- }
209
-
210
193
boolean eventStateHasGlobalBlockStateNotRecoveredBlock (ClusterChangedEvent event ) {
211
194
return event .state ().blocks ().hasGlobalBlock (GatewayService .STATE_NOT_RECOVERED_BLOCK );
212
195
}
@@ -400,18 +383,6 @@ public void createNewModelAssignment(
400
383
CreateTrainedModelAssignmentAction .Request request ,
401
384
ActionListener <TrainedModelAssignment > listener
402
385
) {
403
- if (clusterService .state ().getMinTransportVersion ().before (DISTRIBUTED_MODEL_ALLOCATION_TRANSPORT_VERSION )) {
404
- listener .onFailure (
405
- new ElasticsearchStatusException (
406
- "cannot create new assignment [{}] for model [{}] while cluster upgrade is in progress" ,
407
- RestStatus .CONFLICT ,
408
- request .getTaskParams ().getDeploymentId (),
409
- request .getTaskParams ().getModelId ()
410
- )
411
- );
412
- return ;
413
- }
414
-
415
386
if (MlMetadata .getMlMetadata (clusterService .state ()).isResetMode ()) {
416
387
listener .onFailure (
417
388
new ElasticsearchStatusException (
@@ -522,13 +493,11 @@ private static ClusterState update(ClusterState currentState, TrainedModelAssign
522
493
523
494
private static ClusterState forceUpdate (ClusterState currentState , TrainedModelAssignmentMetadata .Builder modelAssignments ) {
524
495
logger .debug (() -> format ("updated assignments: %s" , modelAssignments .build ()));
496
+
525
497
ProjectMetadata .Builder builder = ProjectMetadata .builder (currentState .metadata ().getProject ());
526
- if (currentState .getMinTransportVersion ().onOrAfter (RENAME_ALLOCATION_TO_ASSIGNMENT_TRANSPORT_VERSION )) {
527
- builder .putCustom (TrainedModelAssignmentMetadata .NAME , modelAssignments .build ())
528
- .removeCustom (TrainedModelAssignmentMetadata .DEPRECATED_NAME );
529
- } else {
530
- builder .putCustom (TrainedModelAssignmentMetadata .DEPRECATED_NAME , modelAssignments .buildOld ());
531
- }
498
+ builder .putCustom (TrainedModelAssignmentMetadata .NAME , modelAssignments .build ())
499
+ .removeCustom (TrainedModelAssignmentMetadata .DEPRECATED_NAME );
500
+
532
501
return ClusterState .builder (currentState ).putProjectMetadata (builder ).build ();
533
502
}
534
503
@@ -844,7 +813,7 @@ private void updateDeployment(
844
813
}
845
814
boolean hasUpdates = hasUpdates (numberOfAllocations , adaptiveAllocationsSettingsUpdates , existingAssignment );
846
815
if (hasUpdates == false ) {
847
- logger .info ("no updates" );
816
+ logger .debug ("no updates to be made for deployment [{}]" , deploymentId );
848
817
listener .onResponse (existingAssignment );
849
818
return ;
850
819
}
@@ -858,27 +827,17 @@ private void updateDeployment(
858
827
);
859
828
return ;
860
829
}
861
- if (clusterState .getMinTransportVersion ().before (DISTRIBUTED_MODEL_ALLOCATION_TRANSPORT_VERSION )) {
862
- listener .onFailure (
863
- new ElasticsearchStatusException (
864
- "cannot update deployment with model id [{}] while cluster upgrade is in progress." ,
865
- RestStatus .CONFLICT ,
866
- deploymentId
867
- )
868
- );
869
- return ;
870
- }
871
830
872
- ActionListener <ClusterState > updatedStateListener = ActionListener .wrap (
873
- updatedState -> submitUnbatchedTask ("update model deployment" , new ClusterStateUpdateTask () {
831
+ ActionListener <TrainedModelAssignmentMetadata . Builder > updatedAssignmentListener = ActionListener .wrap (
832
+ updatedAssignment -> submitUnbatchedTask ("update model deployment" , new ClusterStateUpdateTask () {
874
833
875
834
private volatile boolean isUpdated ;
876
835
877
836
@ Override
878
837
public ClusterState execute (ClusterState currentState ) {
879
838
if (areClusterStatesCompatibleForRebalance (clusterState , currentState )) {
880
839
isUpdated = true ;
881
- return updatedState ;
840
+ return update ( currentState , updatedAssignment ) ;
882
841
}
883
842
logger .debug (() -> format ("[%s] Retrying update as cluster state has been modified" , deploymentId ));
884
843
updateDeployment (currentState , deploymentId , numberOfAllocations , adaptiveAllocationsSettings , isInternal , listener );
@@ -910,7 +869,7 @@ public void clusterStateProcessed(ClusterState oldState, ClusterState newState)
910
869
listener ::onFailure
911
870
);
912
871
913
- updateAssignment (clusterState , existingAssignment , numberOfAllocations , adaptiveAllocationsSettings , updatedStateListener );
872
+ updateAssignment (clusterState , existingAssignment , numberOfAllocations , adaptiveAllocationsSettings , updatedAssignmentListener );
914
873
}
915
874
916
875
static boolean hasUpdates (
@@ -944,7 +903,7 @@ private void updateAssignment(
944
903
TrainedModelAssignment assignment ,
945
904
Integer numberOfAllocations ,
946
905
AdaptiveAllocationsSettings adaptiveAllocationsSettings ,
947
- ActionListener <ClusterState > listener
906
+ ActionListener <TrainedModelAssignmentMetadata . Builder > listener
948
907
) {
949
908
threadPool .executor (MachineLearning .UTILITY_THREAD_POOL_NAME ).execute (() -> {
950
909
if (numberOfAllocations == null || numberOfAllocations == assignment .getTaskParams ().getNumberOfAllocations ()) {
@@ -961,21 +920,21 @@ private void updateAndKeepNumberOfAllocations(
961
920
ClusterState clusterState ,
962
921
TrainedModelAssignment assignment ,
963
922
AdaptiveAllocationsSettings adaptiveAllocationsSettings ,
964
- ActionListener <ClusterState > listener
923
+ ActionListener <TrainedModelAssignmentMetadata . Builder > listener
965
924
) {
966
925
TrainedModelAssignment .Builder updatedAssignment = TrainedModelAssignment .Builder .fromAssignment (assignment )
967
926
.setAdaptiveAllocationsSettings (adaptiveAllocationsSettings );
968
927
TrainedModelAssignmentMetadata .Builder builder = TrainedModelAssignmentMetadata .builder (clusterState );
969
928
builder .updateAssignment (assignment .getDeploymentId (), updatedAssignment );
970
- listener .onResponse (update ( clusterState , builder ) );
929
+ listener .onResponse (builder );
971
930
}
972
931
973
932
private void increaseNumberOfAllocations (
974
933
ClusterState clusterState ,
975
934
TrainedModelAssignment assignment ,
976
935
int numberOfAllocations ,
977
936
AdaptiveAllocationsSettings adaptiveAllocationsSettings ,
978
- ActionListener <ClusterState > listener
937
+ ActionListener <TrainedModelAssignmentMetadata . Builder > listener
979
938
) {
980
939
try {
981
940
TrainedModelAssignment .Builder updatedAssignment = TrainedModelAssignment .Builder .fromAssignment (assignment )
@@ -995,7 +954,7 @@ private void increaseNumberOfAllocations(
995
954
)
996
955
);
997
956
} else {
998
- listener .onResponse (update ( clusterState , rebalancedMetadata ) );
957
+ listener .onResponse (rebalancedMetadata );
999
958
}
1000
959
} catch (Exception e ) {
1001
960
listener .onFailure (e );
@@ -1007,7 +966,7 @@ private void decreaseNumberOfAllocations(
1007
966
TrainedModelAssignment assignment ,
1008
967
int numberOfAllocations ,
1009
968
AdaptiveAllocationsSettings adaptiveAllocationsSettings ,
1010
- ActionListener <ClusterState > listener
969
+ ActionListener <TrainedModelAssignmentMetadata . Builder > listener
1011
970
) {
1012
971
TrainedModelAssignment .Builder updatedAssignment = numberOfAllocations < assignment .totalTargetAllocations ()
1013
972
? new AllocationReducer (assignment , nodeAvailabilityZoneMapper .buildMlNodesByAvailabilityZone (clusterState )).reduceTo (
@@ -1022,7 +981,7 @@ private void decreaseNumberOfAllocations(
1022
981
}
1023
982
TrainedModelAssignmentMetadata .Builder builder = TrainedModelAssignmentMetadata .builder (clusterState );
1024
983
builder .updateAssignment (assignment .getDeploymentId (), updatedAssignment );
1025
- listener .onResponse (update ( clusterState , builder ) );
984
+ listener .onResponse (builder );
1026
985
}
1027
986
1028
987
static ClusterState setToStopping (ClusterState clusterState , String deploymentId , String reason ) {
0 commit comments