Skip to content

Commit 3b6c150

Browse files
[ML] Log node selection during inference routing (#88084)
This adds logging that prints the routing table and the selected node while we are selecting a node to route an inference request to.
1 parent 5549398 commit 3b6c150

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportInferTrainedModelDeploymentAction.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
package org.elasticsearch.xpack.ml.action;
99

10+
import org.apache.logging.log4j.LogManager;
11+
import org.apache.logging.log4j.Logger;
1012
import org.elasticsearch.ElasticsearchStatusException;
1113
import org.elasticsearch.action.ActionListener;
1214
import org.elasticsearch.action.FailedNodeException;
@@ -32,12 +34,16 @@
3234

3335
import java.util.List;
3436

37+
import static org.elasticsearch.core.Strings.format;
38+
3539
public class TransportInferTrainedModelDeploymentAction extends TransportTasksAction<
3640
TrainedModelDeploymentTask,
3741
InferTrainedModelDeploymentAction.Request,
3842
InferTrainedModelDeploymentAction.Response,
3943
InferTrainedModelDeploymentAction.Response> {
4044

45+
private static final Logger logger = LogManager.getLogger(TransportInferTrainedModelDeploymentAction.class);
46+
4147
private final TrainedModelProvider provider;
4248

4349
@Inject
@@ -94,6 +100,7 @@ protected void doExecute(
94100
listener.onFailure(ExceptionsHelper.conflictStatusException(message));
95101
return;
96102
}
103+
logger.trace(() -> format("[%s] selecting node from routing table: %s", assignment.getModelId(), assignment.getNodeRoutingTable()));
97104
String[] randomRunningNode = assignment.getStartedNodes();
98105
if (randomRunningNode.length == 0) {
99106
String message = "Trained model [" + deploymentId + "] is not allocated to any nodes";
@@ -102,6 +109,7 @@ protected void doExecute(
102109
}
103110
// TODO Do better routing for inference calls
104111
int nodeIndex = Randomness.get().nextInt(randomRunningNode.length);
112+
logger.trace(() -> format("[%s] selected node [%s]", assignment.getModelId(), randomRunningNode[nodeIndex]));
105113
request.setNodes(randomRunningNode[nodeIndex]);
106114
super.doExecute(task, request, listener);
107115
}
@@ -118,6 +126,7 @@ protected InferTrainedModelDeploymentAction.Response newResponse(
118126
} else if (failedNodeExceptions.isEmpty() == false) {
119127
throw org.elasticsearch.ExceptionsHelper.convertToElastic(failedNodeExceptions.get(0));
120128
} else if (tasks.isEmpty()) {
129+
logger.trace(() -> format("[%s] unable to find deployment task for inference", request.getDeploymentId()));
121130
throw new ElasticsearchStatusException(
122131
"[{}] unable to find deployment task for inference please stop and start the deployment or try again momentarily",
123132
RestStatus.NOT_FOUND,

0 commit comments

Comments
 (0)