elastic · timgrein · Feb 20, 2025 · Feb 12, 2025 · Feb 12, 2025 · Feb 12, 2025
diff --git a/.../src/main/java/org/elasticsearch/xpack/inference/action/BaseTransportInferenceAction.java b/.../src/main/java/org/elasticsearch/xpack/inference/action/BaseTransportInferenceAction.java
@@ -48,6 +48,8 @@
 import org.elasticsearch.xpack.inference.telemetry.InferenceTimer;
 
 import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.Random;
 import java.util.concurrent.Executor;
 import java.util.concurrent.Flow;
@@ -59,6 +61,7 @@
 import static org.elasticsearch.xpack.inference.InferencePlugin.INFERENCE_API_FEATURE;
 import static org.elasticsearch.xpack.inference.telemetry.InferenceStats.modelAttributes;
 import static org.elasticsearch.xpack.inference.telemetry.InferenceStats.responseAttributes;
+import static org.elasticsearch.xpack.inference.telemetry.InferenceStats.routingAttributes;
 
 /**
  * Base class for transport actions that handle inference requests.
@@ -145,7 +148,8 @@ protected void doExecute(Task task, Request request, ActionListener<InferenceAct
             }
 
             var service = serviceRegistry.getService(serviceName).get();
-            var routingDecision = determineRouting(serviceName, request, unparsedModel);
+            var localNodeId = nodeClient.getLocalNodeId();
+            var routingDecision = determineRouting(serviceName, request, unparsedModel, localNodeId);
 
             if (routingDecision.currentNodeShouldHandleRequest()) {
                 var model = service.parsePersistedConfigWithSecrets(
@@ -154,7 +158,7 @@ protected void doExecute(Task task, Request request, ActionListener<InferenceAct
                     unparsedModel.settings(),
                     unparsedModel.secrets()
                 );
-                inferOnServiceWithMetrics(model, request, service, timer, listener);
+                inferOnServiceWithMetrics(model, request, service, timer, localNodeId, listener);
             } else {
                 // Reroute request
                 request.setHasBeenRerouted(true);
@@ -188,7 +192,7 @@ private void validateRequest(Request request, UnparsedModel unparsedModel) {
         );
     }
 
-    private NodeRoutingDecision determineRouting(String serviceName, Request request, UnparsedModel unparsedModel) {
+    private NodeRoutingDecision determineRouting(String serviceName, Request request, UnparsedModel unparsedModel, String localNodeId) {
         var modelTaskType = unparsedModel.taskType();
 
         // Rerouting not supported or request was already rerouted
@@ -212,7 +216,6 @@ private NodeRoutingDecision determineRouting(String serviceName, Request request
         }
 
         var nodeToHandleRequest = responsibleNodes.get(random.nextInt(responsibleNodes.size()));
-        String localNodeId = nodeClient.getLocalNodeId();
 
         // The drawn node is the current node
         if (nodeToHandleRequest.getId().equals(localNodeId)) {
@@ -260,7 +263,11 @@ public InferenceAction.Response read(StreamInput in) throws IOException {
 
     private void recordMetrics(UnparsedModel model, InferenceTimer timer, @Nullable Throwable t) {
         try {
-            inferenceStats.inferenceDuration().record(timer.elapsedMillis(), responseAttributes(model, t));
+            Map<String, Object> metricAttributes = new HashMap<>();
+            metricAttributes.putAll(modelAttributes(model));
+            metricAttributes.putAll(responseAttributes(unwrapCause(t)));
+
+            inferenceStats.inferenceDuration().record(timer.elapsedMillis(), metricAttributes);
         } catch (Exception e) {
             log.atDebug().withThrowable(e).log("Failed to record metrics with an unparsed model, dropping metrics");
         }
@@ -271,6 +278,7 @@ private void inferOnServiceWithMetrics(
         Request request,
         InferenceService service,
         InferenceTimer timer,
+        String localNodeId,
         ActionListener<InferenceAction.Response> listener
     ) {
         inferenceStats.requestCount().incrementBy(1, modelAttributes(model));
@@ -279,18 +287,18 @@ private void inferOnServiceWithMetrics(
                 var taskProcessor = streamingTaskManager.<ChunkedToXContent>create(STREAMING_INFERENCE_TASK_TYPE, STREAMING_TASK_ACTION);
                 inferenceResults.publisher().subscribe(taskProcessor);
 
-                var instrumentedStream = new PublisherWithMetrics(timer, model);
+                var instrumentedStream = new PublisherWithMetrics(timer, model, request, localNodeId);
                 taskProcessor.subscribe(instrumentedStream);
 
                 var streamErrorHandler = streamErrorHandler(instrumentedStream);
 
                 listener.onResponse(new InferenceAction.Response(inferenceResults, streamErrorHandler));
             } else {
-                recordMetrics(model, timer, null);
+                recordMetrics(model, timer, request, localNodeId, null);
                 listener.onResponse(new InferenceAction.Response(inferenceResults));
             }
         }, e -> {
-            recordMetrics(model, timer, e);
+            recordMetrics(model, timer, request, localNodeId, e);
             listener.onFailure(e);
         }));
     }
@@ -299,9 +307,14 @@ protected Flow.Publisher<ChunkedToXContent> streamErrorHandler(Flow.Processor<Ch
         return upstream;
     }
 
-    private void recordMetrics(Model model, InferenceTimer timer, @Nullable Throwable t) {
+    private void recordMetrics(Model model, InferenceTimer timer, Request request, String localNodeId, @Nullable Throwable t) {
         try {
-            inferenceStats.inferenceDuration().record(timer.elapsedMillis(), responseAttributes(model, unwrapCause(t)));
+            Map<String, Object> metricAttributes = new HashMap<>();
+            metricAttributes.putAll(modelAttributes(model));
+            metricAttributes.putAll(routingAttributes(request, localNodeId));
+            metricAttributes.putAll(responseAttributes(unwrapCause(t)));
+
+            inferenceStats.inferenceDuration().record(timer.elapsedMillis(), metricAttributes);
         } catch (Exception e) {
             log.atDebug().withThrowable(e).log("Failed to record metrics with a parsed model, dropping metrics");
         }
@@ -353,10 +366,14 @@ private class PublisherWithMetrics extends DelegatingProcessor<ChunkedToXContent
 
         private final InferenceTimer timer;
         private final Model model;
+        private final Request request;
+        private final String localNodeId;
 
-        private PublisherWithMetrics(InferenceTimer timer, Model model) {
+        private PublisherWithMetrics(InferenceTimer timer, Model model, Request request, String localNodeId) {
             this.timer = timer;
             this.model = model;
+            this.request = request;
+            this.localNodeId = localNodeId;
         }
 
         @Override
@@ -366,19 +383,19 @@ protected void next(ChunkedToXContent item) {
 
         @Override
         public void onError(Throwable throwable) {
-            recordMetrics(model, timer, throwable);
+            recordMetrics(model, timer, request, localNodeId, throwable);
             super.onError(throwable);
         }
 
         @Override
         protected void onCancel() {
-            recordMetrics(model, timer, null);
+            recordMetrics(model, timer, request, localNodeId, null);
             super.onCancel();
         }
 
         @Override
         public void onComplete() {
-            recordMetrics(model, timer, null);
+            recordMetrics(model, timer, request, localNodeId, null);
             super.onComplete();
         }
     }

diff --git a/...n/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/InferenceStats.java b/...n/inference/src/main/java/org/elasticsearch/xpack/inference/telemetry/InferenceStats.java
@@ -14,14 +14,14 @@
 import org.elasticsearch.telemetry.metric.LongCounter;
 import org.elasticsearch.telemetry.metric.LongHistogram;
 import org.elasticsearch.telemetry.metric.MeterRegistry;
+import org.elasticsearch.xpack.core.inference.action.BaseInferenceActionRequest;
 
 import java.util.Map;
 import java.util.Objects;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
 import static java.util.Map.entry;
-import static java.util.stream.Stream.concat;
 
 public record InferenceStats(LongCounter requestCount, LongHistogram inferenceDuration) {
 
@@ -45,49 +45,43 @@ public static InferenceStats create(MeterRegistry meterRegistry) {
         );
     }
 
-    public static Map<String, Object> modelAttributes(Model model) {
-        return toMap(modelAttributeEntries(model));
+    private static Map<String, Object> toMap(Stream<Map.Entry<String, Object>> stream) {
+        return stream.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
     }
 
-    private static Stream<Map.Entry<String, Object>> modelAttributeEntries(Model model) {
+    public static Map<String, Object> modelAttributes(Model model) {
         var stream = Stream.<Map.Entry<String, Object>>builder()
             .add(entry("service", model.getConfigurations().getService()))
             .add(entry("task_type", model.getTaskType().toString()));
         if (model.getServiceSettings().modelId() != null) {
             stream.add(entry("model_id", model.getServiceSettings().modelId()));
         }
-        return stream.build();
+        return toMap(stream.build());
     }
 
-    private static Map<String, Object> toMap(Stream<Map.Entry<String, Object>> stream) {
-        return stream.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+    public static Map<String, Object> routingAttributes(BaseInferenceActionRequest request, String nodeIdHandlingRequest) {
+        return Map.of("rerouted", request.hasBeenRerouted(), "node_id", nodeIdHandlingRequest);
     }
 
-    public static Map<String, Object> responseAttributes(Model model, @Nullable Throwable t) {
-        return toMap(concat(modelAttributeEntries(model), errorAttributes(t)));
-    }
-
-    public static Map<String, Object> responseAttributes(UnparsedModel model, @Nullable Throwable t) {
+    public static Map<String, Object> modelAttributes(UnparsedModel model) {
         var unknownModelAttributes = Stream.<Map.Entry<String, Object>>builder()
             .add(entry("service", model.service()))
             .add(entry("task_type", model.taskType().toString()))
             .build();
 
-        return toMap(concat(unknownModelAttributes, errorAttributes(t)));
+        return toMap(unknownModelAttributes);
     }
 
     public static Map<String, Object> responseAttributes(@Nullable Throwable t) {
-        return toMap(errorAttributes(t));
-    }
-
-    private static Stream<Map.Entry<String, Object>> errorAttributes(@Nullable Throwable t) {
-        return switch (t) {
-            case null -> Stream.of(entry("status_code", 200));
+        var stream = switch (t) {
+            case null -> Stream.<Map.Entry<String, Object>>of(entry("status_code", 200));
             case ElasticsearchStatusException ese -> Stream.<Map.Entry<String, Object>>builder()
                 .add(entry("status_code", ese.status().getStatus()))
                 .add(entry("error.type", String.valueOf(ese.status().getStatus())))
                 .build();
-            default -> Stream.of(entry("error.type", t.getClass().getSimpleName()));
+            default -> Stream.<Map.Entry<String, Object>>of(entry("error.type", t.getClass().getSimpleName()));
         };
+
+        return toMap(stream);
     }
 }
diff --git a/...t/java/org/elasticsearch/xpack/inference/action/BaseTransportInferenceActionTestCase.java b/...t/java/org/elasticsearch/xpack/inference/action/BaseTransportInferenceActionTestCase.java
@@ -62,6 +62,7 @@ public abstract class BaseTransportInferenceActionTestCase<Request extends BaseI
     protected static final String serviceId = "serviceId";
     protected final TaskType taskType;
     protected static final String inferenceId = "inferenceEntityId";
+    protected static final String localNodeId = "local-node-id";
     protected InferenceServiceRegistry serviceRegistry;
     protected InferenceStats inferenceStats;
     protected InferenceServiceRateLimitCalculator inferenceServiceRateLimitCalculator;
@@ -100,6 +101,7 @@ public void setUp() throws Exception {
         );
 
         mockValidLicenseState();
+        mockNodeClient();
     }
 
     protected abstract BaseTransportInferenceAction<Request> createAction(
@@ -135,6 +137,8 @@ public void testMetricsAfterModelRegistryError() {
             assertThat(attributes.get("model_id"), nullValue());
             assertThat(attributes.get("status_code"), nullValue());
             assertThat(attributes.get("error.type"), is(expectedError));
+            assertThat(attributes.get("rerouted"), nullValue());
+            assertThat(attributes.get("node_id"), nullValue());
         }));
     }
 
@@ -176,6 +180,8 @@ public void testMetricsAfterMissingService() {
             assertThat(attributes.get("model_id"), nullValue());
             assertThat(attributes.get("status_code"), is(RestStatus.BAD_REQUEST.getStatus()));
             assertThat(attributes.get("error.type"), is(String.valueOf(RestStatus.BAD_REQUEST.getStatus())));
+            assertThat(attributes.get("rerouted"), nullValue());
+            assertThat(attributes.get("node_id"), nullValue());
         }));
     }
 
@@ -216,6 +222,8 @@ public void testMetricsAfterUnknownTaskType() {
             assertThat(attributes.get("model_id"), nullValue());
             assertThat(attributes.get("status_code"), is(RestStatus.BAD_REQUEST.getStatus()));
             assertThat(attributes.get("error.type"), is(String.valueOf(RestStatus.BAD_REQUEST.getStatus())));
+            assertThat(attributes.get("rerouted"), nullValue());
+            assertThat(attributes.get("node_id"), nullValue());
         }));
     }
 
@@ -232,6 +240,8 @@ public void testMetricsAfterInferError() {
             assertThat(attributes.get("model_id"), nullValue());
             assertThat(attributes.get("status_code"), nullValue());
             assertThat(attributes.get("error.type"), is(expectedError));
+            assertThat(attributes.get("rerouted"), is(Boolean.FALSE));
+            assertThat(attributes.get("node_id"), is(localNodeId));
         }));
     }
 
@@ -254,6 +264,8 @@ public void testMetricsAfterStreamUnsupported() {
             assertThat(attributes.get("model_id"), nullValue());
             assertThat(attributes.get("status_code"), is(expectedStatus.getStatus()));
             assertThat(attributes.get("error.type"), is(expectedError));
+            assertThat(attributes.get("rerouted"), is(Boolean.FALSE));
+            assertThat(attributes.get("node_id"), is(localNodeId));
         }));
     }
 
@@ -269,6 +281,8 @@ public void testMetricsAfterInferSuccess() {
             assertThat(attributes.get("model_id"), nullValue());
             assertThat(attributes.get("status_code"), is(200));
             assertThat(attributes.get("error.type"), nullValue());
+            assertThat(attributes.get("rerouted"), is(Boolean.FALSE));
+            assertThat(attributes.get("node_id"), is(localNodeId));
         }));
     }
 
@@ -280,6 +294,8 @@ public void testMetricsAfterStreamInferSuccess() {
             assertThat(attributes.get("model_id"), nullValue());
             assertThat(attributes.get("status_code"), is(200));
             assertThat(attributes.get("error.type"), nullValue());
+            assertThat(attributes.get("rerouted"), is(Boolean.FALSE));
+            assertThat(attributes.get("node_id"), is(localNodeId));
         }));
     }
 
@@ -296,6 +312,8 @@ public void testMetricsAfterStreamInferFailure() {
             assertThat(attributes.get("model_id"), nullValue());
             assertThat(attributes.get("status_code"), nullValue());
             assertThat(attributes.get("error.type"), is(expectedError));
+            assertThat(attributes.get("rerouted"), is(Boolean.FALSE));
+            assertThat(attributes.get("node_id"), is(localNodeId));
         }));
     }
 
@@ -329,6 +347,8 @@ public void onComplete() {
             assertThat(attributes.get("model_id"), nullValue());
             assertThat(attributes.get("status_code"), is(200));
             assertThat(attributes.get("error.type"), nullValue());
+            assertThat(attributes.get("rerouted"), is(Boolean.FALSE));
+            assertThat(attributes.get("node_id"), is(localNodeId));
         }));
     }
 
@@ -404,4 +424,8 @@ protected void mockModelAndServiceRegistry(InferenceService service) {
     protected void mockValidLicenseState() {
         when(licenseState.isAllowed(InferencePlugin.INFERENCE_API_FEATURE)).thenReturn(true);
     }
+
+    private void mockNodeClient(){
+        when(nodeClient.getLocalNodeId()).thenReturn(localNodeId);
+    }
 }