Add /stats and /latest_frame endpoints for device-manager

alexnorell · alexnorell · commit 82e180056c1c · 2026-03-13T15:53:33.000-07:00
Adds two new HTTP endpoints behind the ENABLE_STREAM_API flag:

- GET /stats: returns aggregated camera_fps, inference_fps, and
  stream_count across all active pipelines. Reuses existing
  list_pipelines/get_status IPC -- no new commands needed.

- GET /inference_pipelines/{pipeline_id}/latest_frame: returns the most
  recent frame as a base64-encoded JPEG with metadata. Adds a new
  LATEST_FRAME IPC command that peeks at the buffer non-destructively.
diff --git a/inference/core/interfaces/http/http_api.py b/inference/core/interfaces/http/http_api.py
@@ -231,6 +231,7 @@
     InferencePipelineStatusResponse,
     InitializeWebRTCPipelineResponse,
     InitializeWebRTCResponse,
+    LatestFrameResponse,
     ListPipelinesResponse,
 )
 from inference.core.interfaces.stream_manager.api.stream_manager_client import (
@@ -1893,6 +1894,64 @@ async def consume(
                     excluded_fields=request.excluded_fields,
                 )
 
+            @app.get(
+                "/stats",
+                summary="Aggregated pipeline statistics",
+            )
+            @with_route_exceptions_async
+            async def get_stats():
+                stream_count = 0
+                camera_fps_values = []
+                inference_fps_values = []
+                if self.stream_manager_client is not None:
+                    try:
+                        pipelines_resp = (
+                            await self.stream_manager_client.list_pipelines()
+                        )
+                        pipeline_ids = pipelines_resp.pipelines
+                        stream_count = len(pipeline_ids)
+                        for pid in pipeline_ids:
+                            status_resp = (
+                                await self.stream_manager_client.get_status(pid)
+                            )
+                            report = status_resp.report
+                            throughput = report.get("inference_throughput", 0.0)
+                            if throughput and throughput > 0:
+                                inference_fps_values.append(throughput)
+                            for src in report.get("sources_metadata", []):
+                                props = src.get("source_properties") or {}
+                                fps = props.get("fps")
+                                if fps and fps > 0:
+                                    camera_fps_values.append(fps)
+                    except Exception:
+                        pass
+                return {
+                    "camera_fps": (
+                        sum(camera_fps_values) / len(camera_fps_values)
+                        if camera_fps_values
+                        else None
+                    ),
+                    "inference_fps": (
+                        sum(inference_fps_values) / len(inference_fps_values)
+                        if inference_fps_values
+                        else None
+                    ),
+                    "stream_count": stream_count,
+                }
+
+            @app.get(
+                "/inference_pipelines/{pipeline_id}/latest_frame",
+                response_model=LatestFrameResponse,
+                summary="[EXPERIMENTAL] Get latest frame from InferencePipeline",
+            )
+            @with_route_exceptions_async
+            async def latest_frame(
+                pipeline_id: str,
+            ) -> LatestFrameResponse:
+                return await self.stream_manager_client.get_latest_frame(
+                    pipeline_id=pipeline_id
+                )
+
         class ModelInitState:
             """Class to track model initialization state."""
 
diff --git a/inference/core/interfaces/stream_manager/api/entities.py b/inference/core/interfaces/stream_manager/api/entities.py
@@ -37,6 +37,15 @@ class ConsumePipelineResponse(CommandResponse):
     frames_metadata: List[FrameMetadata]
 
 
+class LatestFrameResponse(CommandResponse):
+    frame_data: Optional[str] = Field(
+        default=None, description="Base64-encoded JPEG image"
+    )
+    frame_id: Optional[int] = Field(default=None)
+    frame_timestamp: Optional[datetime] = Field(default=None)
+    source_id: Optional[int] = Field(default=None)
+
+
 class InitializeWebRTCPipelineResponse(CommandResponse):
     sdp: str
     type: str
diff --git a/inference/core/interfaces/stream_manager/api/stream_manager_client.py b/inference/core/interfaces/stream_manager/api/stream_manager_client.py
@@ -13,6 +13,7 @@
     FrameMetadata,
     InferencePipelineStatusResponse,
     InitializeWebRTCPipelineResponse,
+    LatestFrameResponse,
     ListPipelinesResponse,
 )
 from inference.core.interfaces.stream_manager.api.errors import (
@@ -200,6 +201,26 @@ async def consume_pipeline_result(
             ],
         )
 
+    async def get_latest_frame(self, pipeline_id: str) -> LatestFrameResponse:
+        command = {
+            TYPE_KEY: CommandType.LATEST_FRAME,
+            PIPELINE_ID_KEY: pipeline_id,
+        }
+        response = await self._handle_command(command=command)
+        status = response[RESPONSE_KEY][STATUS_KEY]
+        context = CommandContext(
+            request_id=response.get(REQUEST_ID_KEY),
+            pipeline_id=response.get(PIPELINE_ID_KEY),
+        )
+        return LatestFrameResponse(
+            status=status,
+            context=context,
+            frame_data=response[RESPONSE_KEY].get("frame_data"),
+            frame_id=response[RESPONSE_KEY].get("frame_id"),
+            frame_timestamp=response[RESPONSE_KEY].get("frame_timestamp"),
+            source_id=response[RESPONSE_KEY].get("source_id"),
+        )
+
     async def _handle_command(self, command: dict) -> dict:
         response = await send_command(
             host=self._host,
diff --git a/inference/core/interfaces/stream_manager/manager_app/entities.py b/inference/core/interfaces/stream_manager/manager_app/entities.py
@@ -49,6 +49,7 @@ class CommandType(str, Enum):
     TERMINATE = "terminate"
     LIST_PIPELINES = "list_pipelines"
     CONSUME_RESULT = "consume_result"
+    LATEST_FRAME = "latest_frame"
 
 
 class VideoConfiguration(BaseModel):
diff --git a/inference/core/interfaces/stream_manager/manager_app/inference_pipeline_manager.py b/inference/core/interfaces/stream_manager/manager_app/inference_pipeline_manager.py
@@ -1,4 +1,5 @@
 import asyncio
+import base64
 import json
 import os
 import signal
@@ -148,6 +149,8 @@ def _handle_command(self, request_id: str, payload: dict) -> None:
                 return self._get_pipeline_status(request_id=request_id)
             if command_type is CommandType.CONSUME_RESULT:
                 return self._consume_results(request_id=request_id, payload=payload)
+            if command_type is CommandType.LATEST_FRAME:
+                return self._handle_latest_frame(request_id=request_id)
             raise NotImplementedError(
                 f"Command type `{command_type}` cannot be handled"
             )
@@ -636,6 +639,56 @@ def _consume_results(self, request_id: str, payload: dict) -> None:
                 error_type=ErrorType.OPERATION_ERROR,
             )
 
+    def _handle_latest_frame(self, request_id: str) -> None:
+        try:
+            if self._buffer_sink is None or self._buffer_sink.empty():
+                response_payload = {
+                    STATUS_KEY: OperationStatus.SUCCESS,
+                    "frame_data": None,
+                    "frame_id": None,
+                    "frame_timestamp": None,
+                    "source_id": None,
+                }
+                self._responses_queue.put((request_id, response_payload))
+                return None
+            # Peek at the last item in the buffer (non-destructive)
+            predictions, frames = self._buffer_sink._buffer[-1]
+            # Find the last non-None VideoFrame
+            frame = None
+            for f in reversed(frames):
+                if f is not None:
+                    frame = f
+                    break
+            if frame is None:
+                response_payload = {
+                    STATUS_KEY: OperationStatus.SUCCESS,
+                    "frame_data": None,
+                    "frame_id": None,
+                    "frame_timestamp": None,
+                    "source_id": None,
+                }
+                self._responses_queue.put((request_id, response_payload))
+                return None
+            _, jpeg_bytes = cv.imencode(
+                ".jpg", frame.image, [cv.IMWRITE_JPEG_QUALITY, 70]
+            )
+            frame_b64 = base64.b64encode(jpeg_bytes.tobytes()).decode("ascii")
+            response_payload = {
+                STATUS_KEY: OperationStatus.SUCCESS,
+                "frame_data": frame_b64,
+                "frame_id": frame.frame_id,
+                "frame_timestamp": frame.frame_timestamp.isoformat(),
+                "source_id": frame.source_id,
+            }
+            self._responses_queue.put((request_id, response_payload))
+        except Exception as error:
+            self._handle_error(
+                request_id=request_id,
+                error=error,
+                public_error_message="Unexpected error retrieving latest frame.",
+                error_type=ErrorType.OPERATION_ERROR,
+            )
+
     def _handle_error(
         self,
         request_id: str,
diff --git a/tests/inference/unit_tests/core/interfaces/stream_manager/test_stats_and_latest_frame.py b/tests/inference/unit_tests/core/interfaces/stream_manager/test_stats_and_latest_frame.py