feat(platform): record TTFT metric on first streamed token

tbille · tbille · commit 7915adc91c00 · 2026-03-05T16:54:58.000+01:00
diff --git a/src/any_llm/providers/platform/platform.py b/src/any_llm/providers/platform/platform.py
@@ -171,6 +171,7 @@ async def _acompletion(
 
         await self._ensure_provider_initialized()
         start_time_ns = time.time_ns()
+        start_perf_counter_ns = time.perf_counter_ns()
         any_llm_key = self.any_llm_key
         if any_llm_key is None:
             msg = "any_llm_key is required for platform provider"
@@ -258,6 +259,7 @@ async def _acompletion(
                 params.user,
                 session_trace_label,
                 user_session_label,
+                start_perf_counter_ns,
                 any_llm_key,
                 llm_span,
                 trace_id,
@@ -335,6 +337,7 @@ async def _stream_with_usage_tracking(
         conversation_id: str | None,
         session_label: str,
         user_session_label: str | None,
+        start_perf_counter_ns: int,
         any_llm_key: str,
         llm_span: trace.Span,
         trace_id: int,
@@ -343,10 +346,16 @@ async def _stream_with_usage_tracking(
     ) -> AsyncIterator[ChatCompletionChunk]:
         """Wrap the stream to export a trace after completion."""
         chunks: list[ChatCompletionChunk] = []
+        first_chunk_received = False
 
         try:
             with trace.use_span(llm_span, end_on_exit=False):
                 async for chunk in stream:
+                    if not first_chunk_received:
+                        first_chunk_received = True
+                        ttft_ms = (time.perf_counter_ns() - start_perf_counter_ns) / 1_000_000
+                        llm_span.set_attribute("anyllm.performance.ttft_ms", ttft_ms)
+                        llm_span.add_event("llm.first_token", {"anyllm.performance.ttft_ms": ttft_ms})
                     chunks.append(chunk)
                     yield chunk
 
diff --git a/tests/unit/providers/test_platform_provider.py b/tests/unit/providers/test_platform_provider.py
@@ -2249,6 +2249,7 @@ async def empty_stream() -> AsyncIterator[ChatCompletionChunk]:
         conversation_id=None,
         session_label="session",
         user_session_label=None,
+        start_perf_counter_ns=100,
         any_llm_key=any_llm_key,
         llm_span=llm_span,
         trace_id=123,
@@ -2259,6 +2260,111 @@ async def empty_stream() -> AsyncIterator[ChatCompletionChunk]:
     collected = [chunk async for chunk in result]
     assert collected == []
     llm_span.end.assert_called_once()
+    llm_span.add_event.assert_not_called()
+    assert not any(
+        call.args and call.args[0] == "anyllm.performance.ttft_ms" for call in llm_span.set_attribute.call_args_list
+    )
+
+
+@pytest.mark.asyncio
+@patch("any_llm.providers.platform.platform.export_completion_trace", new_callable=AsyncMock)
+async def test_stream_with_usage_tracking_records_ttft_once(
+    mock_export_trace: AsyncMock,
+    any_llm_key: str,
+) -> None:
+    provider_instance = PlatformProvider(api_key=any_llm_key)
+    provider_instance._provider = Mock(PROVIDER_NAME="openai")
+    llm_span = Mock()
+
+    chunks = [
+        ChatCompletionChunk(
+            id="chatcmpl-123",
+            model="gpt-4",
+            created=1234567890,
+            object="chat.completion.chunk",
+            choices=[ChunkChoice(index=0, delta=ChoiceDelta(content="Hello"), finish_reason=None)],
+        ),
+        ChatCompletionChunk(
+            id="chatcmpl-123",
+            model="gpt-4",
+            created=1234567890,
+            object="chat.completion.chunk",
+            choices=[ChunkChoice(index=0, delta=ChoiceDelta(), finish_reason="stop")],
+            usage=CompletionUsage(prompt_tokens=10, completion_tokens=5, total_tokens=15),
+        ),
+    ]
+
+    async def mock_stream() -> AsyncIterator[ChatCompletionChunk]:
+        for chunk in chunks:
+            yield chunk
+
+    with patch("any_llm.providers.platform.platform.time.perf_counter_ns", return_value=1_120_000_000):
+        result = provider_instance._stream_with_usage_tracking(
+            stream=mock_stream(),
+            start_time_ns=100,
+            request_model="gpt-4",
+            conversation_id=None,
+            session_label="session",
+            user_session_label=None,
+            start_perf_counter_ns=1_000_000_000,
+            any_llm_key=any_llm_key,
+            llm_span=llm_span,
+            trace_id=123,
+            access_token=None,
+            trace_export_activated=False,
+        )
+        collected = [chunk async for chunk in result]
+
+    assert collected == chunks
+    ttft_calls = [
+        call
+        for call in llm_span.set_attribute.call_args_list
+        if call.args and call.args[0] == "anyllm.performance.ttft_ms"
+    ]
+    assert len(ttft_calls) == 1
+    assert ttft_calls[0].args[1] == 120.0
+    llm_span.add_event.assert_called_once_with("llm.first_token", {"anyllm.performance.ttft_ms": 120.0})
+    mock_export_trace.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+@patch("any_llm.providers.platform.platform.export_completion_trace")
+async def test_acompletion_non_streaming_does_not_set_ttft_attribute(
+    mock_export_trace: AsyncMock,
+    any_llm_key: str,
+    mock_decrypted_provider_key: DecryptedProviderKey,
+    mock_completion: ChatCompletion,
+) -> None:
+    provider_instance = PlatformProvider(api_key=any_llm_key)
+    provider_instance.provider = OpenaiProvider
+    await _init_provider(provider_instance, mock_decrypted_provider_key)
+    provider_instance.provider._acompletion = AsyncMock(return_value=mock_completion)  # type: ignore[method-assign]
+
+    params = CompletionParams(
+        model_id="gpt-4",
+        messages=[{"role": "user", "content": "Hello"}],
+        stream=False,
+    )
+
+    mock_span = Mock()
+    mock_span.get_span_context.return_value = Mock(trace_id=456)
+    mock_tracer = Mock()
+    mock_tracer.start_span.return_value = mock_span
+    mock_provider_tp = Mock()
+    mock_provider_tp.get_tracer.return_value = mock_tracer
+
+    with (
+        patch.object(provider_instance.platform_client, "_aensure_valid_token", AsyncMock(return_value="jwt-token")),
+        patch("any_llm.providers.platform.platform._get_or_create_tracer_provider", return_value=mock_provider_tp),
+        patch("any_llm.providers.platform.platform.activate_trace_export"),
+        patch("any_llm.providers.platform.platform.deactivate_trace_export"),
+    ):
+        await provider_instance._acompletion(params)
+
+    assert not any(
+        call.args and call.args[0] == "anyllm.performance.ttft_ms" for call in mock_span.set_attribute.call_args_list
+    )
+    mock_export_trace.assert_awaited_once()
 
 
 @pytest.mark.asyncio