@@ -100,6 +100,7 @@ public TransportUndeployModelAction(
100100
101101 @ Override
102102 protected void doExecute (Task task , MLUndeployModelNodesRequest request , ActionListener <MLUndeployModelNodesResponse > listener ) {
103+ log .info ("Executing undeploy for models: {}" , Arrays .toString (request .getModelIds ()));
103104 ActionListener <MLUndeployModelNodesResponse > wrappedListener = ActionListener .wrap (undeployModelNodesResponse -> {
104105 processUndeployModelResponseAndUpdate (request .getTenantId (), undeployModelNodesResponse , listener );
105106 }, listener ::onFailure );
@@ -112,6 +113,7 @@ void processUndeployModelResponseAndUpdate(
112113 ActionListener <MLUndeployModelNodesResponse > listener
113114 ) {
114115 List <MLUndeployModelNodeResponse > responses = undeployModelNodesResponse .getNodes ();
116+ log .debug ("Processing undeploy model responses from nodes" );
115117 if (responses == null || responses .isEmpty ()) {
116118 listener .onResponse (undeployModelNodesResponse );
117119 return ;
@@ -135,9 +137,10 @@ void processUndeployModelResponseAndUpdate(
135137
136138 Map <String , String > modelUndeployStatus = r .getModelUndeployStatus ();
137139 for (Map .Entry <String , String > entry : modelUndeployStatus .entrySet ()) {
140+ String modelId = entry .getKey ();
138141 String status = entry .getValue ();
142+ log .debug ("Model status of model {} on node {}: {}" , modelId , r .getNode ().getId (), status );
139143 if (UNDEPLOYED .equals (status )) {
140- String modelId = entry .getKey ();
141144 if (!actualRemovedNodesMap .containsKey (modelId )) {
142145 actualRemovedNodesMap .put (modelId , new ArrayList <>());
143146 }
@@ -154,6 +157,7 @@ void processUndeployModelResponseAndUpdate(
154157 MLSyncUpNodesRequest syncUpRequest = new MLSyncUpNodesRequest (nodeFilter .getAllNodes (), syncUpInput );
155158 try (ThreadContext .StoredContext context = client .threadPool ().getThreadContext ().stashContext ()) {
156159 if (!actualRemovedNodesMap .isEmpty ()) {
160+ log .debug ("Models undeployed from nodes: {}" , actualRemovedNodesMap );
157161 BulkDataObjectRequest bulkRequest = BulkDataObjectRequest .builder ().globalIndex (ML_MODEL_INDEX ).build ();
158162 Map <String , Boolean > deployToAllNodes = new HashMap <>();
159163 for (String modelId : actualRemovedNodesMap .keySet ()) {
@@ -166,8 +170,10 @@ void processUndeployModelResponseAndUpdate(
166170 * we need to update both planning worker nodes (count) and current worker nodes (count)
167171 * and deployToAllNodes value in model index.
168172 */
173+ log .debug ("Updating metadata for model {}: removedNodes={}" , modelId , removedNodes );
169174 Map <String , Object > updateDocument = new HashMap <>();
170- if (modelWorkNodesBeforeRemoval .get (modelId ).length == removedNodeCount ) { // undeploy all nodes.
175+ if (modelWorkNodesBeforeRemoval .get (modelId ).length == removedNodeCount ) {
176+ log .debug ("All nodes removed for model {}. Marking as undeployed." , modelId );// undeploy all nodes.
171177 updateDocument .put (MLModel .PLANNING_WORKER_NODES_FIELD , ImmutableList .of ());
172178 updateDocument .put (MLModel .PLANNING_WORKER_NODE_COUNT_FIELD , 0 );
173179 updateDocument .put (MLModel .CURRENT_WORKER_NODE_COUNT_FIELD , 0 );
@@ -180,6 +186,12 @@ void processUndeployModelResponseAndUpdate(
180186 .stream (modelWorkNodesBeforeRemoval .get (modelId ))
181187 .filter (x -> !removedNodes .contains (x ))
182188 .collect (Collectors .toList ());
189+ log
190+ .debug (
191+ "Partially undeployed for model {} with remaining planning worker nodes: {}" ,
192+ modelId ,
193+ newPlanningWorkerNodes
194+ );
183195 updateDocument .put (MLModel .PLANNING_WORKER_NODES_FIELD , newPlanningWorkerNodes );
184196 updateDocument .put (MLModel .PLANNING_WORKER_NODE_COUNT_FIELD , newPlanningWorkerNodes .size ());
185197 updateDocument .put (MLModel .CURRENT_WORKER_NODE_COUNT_FIELD , newPlanningWorkerNodes .size ());
@@ -195,6 +207,7 @@ void processUndeployModelResponseAndUpdate(
195207 bulkRequest .add (updateRequest ).setRefreshPolicy (WriteRequest .RefreshPolicy .IMMEDIATE );
196208 }
197209 syncUpInput .setDeployToAllNodes (deployToAllNodes );
210+ log .debug ("Sending bulk metadata update request for undeploy" );
198211 ActionListener <BulkResponse > actionListener = ActionListener .wrap (r -> {
199212 log
200213 .debug (
@@ -203,6 +216,7 @@ void processUndeployModelResponseAndUpdate(
203216 );
204217 }, e -> { log .error ("Failed to update model state as undeployed" , e ); });
205218 ActionListener <BulkResponse > wrappedListener = ActionListener .runAfter (actionListener , () -> {
219+ log .debug ("Triggering sync-up after bulk update for undeploy" );
206220 syncUpUndeployedModels (syncUpRequest );
207221 listener .onResponse (undeployModelNodesResponse );
208222 });
@@ -288,11 +302,14 @@ private MLUndeployModelNodeResponse createUndeployModelNodeResponse(MLUndeployMo
288302
289303 boolean specifiedModelIds = modelIds != null && modelIds .length > 0 ;
290304 String [] removedModelIds = specifiedModelIds ? modelIds : mlModelManager .getAllModelIds ();
305+
306+ log .debug ("Models to undeploy: {}" , Arrays .toString (removedModelIds ));
291307 if (removedModelIds != null ) {
292308 for (String modelId : removedModelIds ) {
293309 FunctionName functionName = mlModelManager .getModelFunctionName (modelId );
294310 String [] workerNodes = mlModelManager .getWorkerNodes (modelId , functionName );
295311 modelWorkerNodesMap .put (modelId , workerNodes );
312+ log .debug ("Retrieved worker nodes for model {}: {}" , modelId , Arrays .toString (workerNodes ));
296313 }
297314 }
298315
0 commit comments