diff --git a/src/strands/event_loop/event_loop.py b/src/strands/event_loop/event_loop.py index d6367e9d9..feb6ac339 100644 --- a/src/strands/event_loop/event_loop.py +++ b/src/strands/event_loop/event_loop.py @@ -281,7 +281,7 @@ async def _handle_model_execution( message = recover_message_on_max_tokens_reached(message) if model_invoke_span: - tracer.end_model_invoke_span(model_invoke_span, message, usage, stop_reason) + tracer.end_model_invoke_span(model_invoke_span, message, usage, metrics, stop_reason) break # Success! Break out of retry loop except Exception as e: diff --git a/src/strands/event_loop/streaming.py b/src/strands/event_loop/streaming.py index f24bd2a76..73f38de8a 100644 --- a/src/strands/event_loop/streaming.py +++ b/src/strands/event_loop/streaming.py @@ -2,6 +2,7 @@ import json import logging +import time from typing import Any, AsyncGenerator, AsyncIterable, Optional from ..models.model import Model @@ -267,31 +268,38 @@ def handle_redact_content(event: RedactContentEvent, state: dict[str, Any]) -> N state["message"]["content"] = [{"text": event["redactAssistantContentMessage"]}] -def extract_usage_metrics(event: MetadataEvent) -> tuple[Usage, Metrics]: +def extract_usage_metrics(event: MetadataEvent, time_to_first_byte_ms: int | None = None) -> tuple[Usage, Metrics]: """Extracts usage metrics from the metadata chunk. Args: event: metadata. + time_to_first_byte_ms: time to get the first byte from the model in milliseconds Returns: The extracted usage metrics and latency. """ usage = Usage(**event["usage"]) metrics = Metrics(**event["metrics"]) + if time_to_first_byte_ms: + metrics["timeToFirstByteMs"] = time_to_first_byte_ms return usage, metrics -async def process_stream(chunks: AsyncIterable[StreamEvent]) -> AsyncGenerator[TypedEvent, None]: +async def process_stream( + chunks: AsyncIterable[StreamEvent], start_time: float | None = None +) -> AsyncGenerator[TypedEvent, None]: """Processes the response stream from the API, constructing the final message and extracting usage metrics. Args: chunks: The chunks of the response stream from the model. + start_time: Time when the model request is initiated Yields: The reason for stopping, the constructed message, and the usage metrics. """ stop_reason: StopReason = "end_turn" + first_byte_time = None state: dict[str, Any] = { "message": {"role": "assistant", "content": []}, @@ -303,10 +311,14 @@ async def process_stream(chunks: AsyncIterable[StreamEvent]) -> AsyncGenerator[T state["content"] = state["message"]["content"] usage: Usage = Usage(inputTokens=0, outputTokens=0, totalTokens=0) - metrics: Metrics = Metrics(latencyMs=0) + metrics: Metrics = Metrics(latencyMs=0, timeToFirstByteMs=0) async for chunk in chunks: + # Track first byte time when we get first content + if first_byte_time is None and ("contentBlockDelta" in chunk or "contentBlockStart" in chunk): + first_byte_time = time.time() yield ModelStreamChunkEvent(chunk=chunk) + if "messageStart" in chunk: state["message"] = handle_message_start(chunk["messageStart"], state["message"]) elif "contentBlockStart" in chunk: @@ -319,7 +331,10 @@ async def process_stream(chunks: AsyncIterable[StreamEvent]) -> AsyncGenerator[T elif "messageStop" in chunk: stop_reason = handle_message_stop(chunk["messageStop"]) elif "metadata" in chunk: - usage, metrics = extract_usage_metrics(chunk["metadata"]) + time_to_first_byte_ms = ( + int(1000 * (first_byte_time - start_time)) if (start_time and first_byte_time) else None + ) + usage, metrics = extract_usage_metrics(chunk["metadata"], time_to_first_byte_ms) elif "redactContent" in chunk: handle_redact_content(chunk["redactContent"], state) @@ -346,7 +361,8 @@ async def stream_messages( logger.debug("model=<%s> | streaming messages", model) messages = remove_blank_messages_content_text(messages) + start_time = time.time() chunks = model.stream(messages, tool_specs if tool_specs else None, system_prompt) - async for event in process_stream(chunks): + async for event in process_stream(chunks, start_time): yield event diff --git a/src/strands/telemetry/metrics.py b/src/strands/telemetry/metrics.py index 883273f64..abfbbffae 100644 --- a/src/strands/telemetry/metrics.py +++ b/src/strands/telemetry/metrics.py @@ -286,6 +286,8 @@ def update_metrics(self, metrics: Metrics) -> None: metrics: The metrics data to add to the accumulated totals. """ self._metrics_client.event_loop_latency.record(metrics["latencyMs"]) + if metrics.get("timeToFirstByteMs") is not None: + self._metrics_client.model_time_to_first_token.record(metrics["timeToFirstByteMs"]) self.accumulated_metrics["latencyMs"] += metrics["latencyMs"] def get_summary(self) -> Dict[str, Any]: @@ -448,7 +450,7 @@ class MetricsClient: event_loop_output_tokens: Histogram event_loop_cache_read_input_tokens: Histogram event_loop_cache_write_input_tokens: Histogram - + model_time_to_first_token: Histogram tool_call_count: Counter tool_success_count: Counter tool_error_count: Counter @@ -507,3 +509,6 @@ def create_instruments(self) -> None: self.event_loop_cache_write_input_tokens = self.meter.create_histogram( name=constants.STRANDS_EVENT_LOOP_CACHE_WRITE_INPUT_TOKENS, unit="token" ) + self.model_time_to_first_token = self.meter.create_histogram( + name=constants.STRANDS_MODEL_TIME_TO_FIRST_TOKEN, unit="ms" + ) diff --git a/src/strands/telemetry/metrics_constants.py b/src/strands/telemetry/metrics_constants.py index f8fac34da..2e1047581 100644 --- a/src/strands/telemetry/metrics_constants.py +++ b/src/strands/telemetry/metrics_constants.py @@ -15,3 +15,4 @@ STRANDS_EVENT_LOOP_OUTPUT_TOKENS = "strands.event_loop.output.tokens" STRANDS_EVENT_LOOP_CACHE_READ_INPUT_TOKENS = "strands.event_loop.cache_read.input.tokens" STRANDS_EVENT_LOOP_CACHE_WRITE_INPUT_TOKENS = "strands.event_loop.cache_write.input.tokens" +STRANDS_MODEL_TIME_TO_FIRST_TOKEN = "strands.model.time_to_first_token" diff --git a/src/strands/telemetry/tracer.py b/src/strands/telemetry/tracer.py index 7cd2d0e7b..907fd454a 100644 --- a/src/strands/telemetry/tracer.py +++ b/src/strands/telemetry/tracer.py @@ -16,7 +16,7 @@ from ..agent.agent_result import AgentResult from ..types.content import ContentBlock, Message, Messages -from ..types.streaming import StopReason, Usage +from ..types.streaming import Metrics, StopReason, Usage from ..types.tools import ToolResult, ToolUse from ..types.traces import Attributes, AttributeValue @@ -153,6 +153,28 @@ def _set_attributes(self, span: Span, attributes: Dict[str, AttributeValue]) -> for key, value in attributes.items(): span.set_attribute(key, value) + def _add_optional_usage_and_metrics_attributes( + self, attributes: Dict[str, AttributeValue], usage: Usage, metrics: Metrics + ) -> None: + """Add optional usage and metrics attributes if they have values. + + Args: + attributes: Dictionary to add attributes to + usage: Token usage information from the model call + metrics: Metrics from the model call + """ + if "cacheReadInputTokens" in usage: + attributes["gen_ai.usage.cache_read_input_tokens"] = usage["cacheReadInputTokens"] + + if "cacheWriteInputTokens" in usage: + attributes["gen_ai.usage.cache_write_input_tokens"] = usage["cacheWriteInputTokens"] + + if metrics.get("timeToFirstByteMs", 0) > 0: + attributes["gen_ai.server.time_to_first_token"] = metrics["timeToFirstByteMs"] + + if metrics.get("latencyMs", 0) > 0: + attributes["gen_ai.server.request.duration"] = metrics["latencyMs"] + def _end_span( self, span: Span, @@ -277,7 +299,13 @@ def start_model_invoke_span( return span def end_model_invoke_span( - self, span: Span, message: Message, usage: Usage, stop_reason: StopReason, error: Optional[Exception] = None + self, + span: Span, + message: Message, + usage: Usage, + metrics: Metrics, + stop_reason: StopReason, + error: Optional[Exception] = None, ) -> None: """End a model invocation span with results and metrics. @@ -285,6 +313,7 @@ def end_model_invoke_span( span: The span to end. message: The message response from the model. usage: Token usage information from the model call. + metrics: Metrics from the model call. stop_reason (StopReason): The reason the model stopped generating. error: Optional exception if the model call failed. """ @@ -294,10 +323,11 @@ def end_model_invoke_span( "gen_ai.usage.completion_tokens": usage["outputTokens"], "gen_ai.usage.output_tokens": usage["outputTokens"], "gen_ai.usage.total_tokens": usage["totalTokens"], - "gen_ai.usage.cache_read_input_tokens": usage.get("cacheReadInputTokens", 0), - "gen_ai.usage.cache_write_input_tokens": usage.get("cacheWriteInputTokens", 0), } + # Add optional attributes if they have values + self._add_optional_usage_and_metrics_attributes(attributes, usage, metrics) + if self.use_latest_genai_conventions: self._add_event( span, @@ -307,7 +337,7 @@ def end_model_invoke_span( [ { "role": message["role"], - "parts": [{"type": "text", "content": message["content"]}], + "parts": self._map_content_blocks_to_otel_parts(message["content"]), "finish_reason": str(stop_reason), } ] @@ -362,7 +392,7 @@ def start_tool_call_span(self, tool: ToolUse, parent_span: Optional[Span] = None "type": "tool_call", "name": tool["name"], "id": tool["toolUseId"], - "arguments": [{"content": tool["input"]}], + "arguments": tool["input"], } ], } @@ -417,7 +447,7 @@ def end_tool_call_span( { "type": "tool_call_response", "id": tool_result.get("toolUseId", ""), - "result": tool_result.get("content"), + "response": tool_result.get("content"), } ], } @@ -504,7 +534,7 @@ def end_event_loop_cycle_span( [ { "role": tool_result_message["role"], - "parts": [{"type": "text", "content": tool_result_message["content"]}], + "parts": self._map_content_blocks_to_otel_parts(tool_result_message["content"]), } ] ) @@ -634,19 +664,23 @@ def start_multiagent_span( ) span = self._start_span(operation, attributes=attributes, span_kind=trace_api.SpanKind.CLIENT) - content = serialize(task) if isinstance(task, list) else task if self.use_latest_genai_conventions: + parts: list[dict[str, Any]] = [] + if isinstance(task, list): + parts = self._map_content_blocks_to_otel_parts(task) + else: + parts = [{"type": "text", "content": task}] self._add_event( span, "gen_ai.client.inference.operation.details", - {"gen_ai.input.messages": serialize([{"role": "user", "parts": [{"type": "text", "content": task}]}])}, + {"gen_ai.input.messages": serialize([{"role": "user", "parts": parts}])}, ) else: self._add_event( span, "gen_ai.user.message", - event_attributes={"content": content}, + event_attributes={"content": serialize(task) if isinstance(task, list) else task}, ) return span @@ -718,7 +752,7 @@ def _add_event_messages(self, span: Span, messages: Messages) -> None: input_messages: list = [] for message in messages: input_messages.append( - {"role": message["role"], "parts": [{"type": "text", "content": message["content"]}]} + {"role": message["role"], "parts": self._map_content_blocks_to_otel_parts(message["content"])} ) self._add_event( span, "gen_ai.client.inference.operation.details", {"gen_ai.input.messages": serialize(input_messages)} @@ -731,6 +765,41 @@ def _add_event_messages(self, span: Span, messages: Messages) -> None: {"content": serialize(message["content"])}, ) + def _map_content_blocks_to_otel_parts(self, content_blocks: list[ContentBlock]) -> list[dict[str, Any]]: + """Map ContentBlock objects to OpenTelemetry parts format.""" + parts: list[dict[str, Any]] = [] + + for block in content_blocks: + if "text" in block: + # Standard TextPart + parts.append({"type": "text", "content": block["text"]}) + elif "toolUse" in block: + # Standard ToolCallRequestPart + tool_use = block["toolUse"] + parts.append( + { + "type": "tool_call", + "name": tool_use["name"], + "id": tool_use["toolUseId"], + "arguments": tool_use["input"], + } + ) + elif "toolResult" in block: + # Standard ToolCallResponsePart + tool_result = block["toolResult"] + parts.append( + { + "type": "tool_call_response", + "id": tool_result["toolUseId"], + "response": tool_result["content"], + } + ) + else: + # For all other ContentBlock types, use the key as type and value as content + for key, value in block.items(): + parts.append({"type": key, "content": value}) + return parts + # Singleton instance for global access _tracer_instance = None diff --git a/src/strands/types/event_loop.py b/src/strands/types/event_loop.py index 2c240972b..f184f5e59 100644 --- a/src/strands/types/event_loop.py +++ b/src/strands/types/event_loop.py @@ -23,14 +23,17 @@ class Usage(TypedDict, total=False): cacheWriteInputTokens: int -class Metrics(TypedDict): +class Metrics(TypedDict, total=False): """Performance metrics for model interactions. Attributes: latencyMs (int): Latency of the model request in milliseconds. + timeToFirstByteMs (int): Latency from sending model request to first + content chunk (contentBlockDelta or contentBlockStart) from the model in milliseconds. """ - latencyMs: int + latencyMs: Required[int] + timeToFirstByteMs: int StopReason = Literal[ diff --git a/tests/strands/event_loop/test_streaming.py b/tests/strands/event_loop/test_streaming.py index 1de957619..5afa0cb45 100644 --- a/tests/strands/event_loop/test_streaming.py +++ b/tests/strands/event_loop/test_streaming.py @@ -491,7 +491,7 @@ def test_extract_usage_metrics_with_cache_tokens(): "content": [], }, {"inputTokens": 0, "outputTokens": 0, "totalTokens": 0}, - {"latencyMs": 0}, + {"latencyMs": 0, "timeToFirstByteMs": 0}, ), }, ], @@ -781,7 +781,7 @@ async def test_stream_messages(agenerator, alist): "end_turn", {"role": "assistant", "content": [{"text": "test"}]}, {"inputTokens": 0, "outputTokens": 0, "totalTokens": 0}, - {"latencyMs": 0}, + {"latencyMs": 0, "timeToFirstByteMs": 0}, ) }, ] diff --git a/tests/strands/telemetry/test_metrics.py b/tests/strands/telemetry/test_metrics.py index 12db81908..e87277eed 100644 --- a/tests/strands/telemetry/test_metrics.py +++ b/tests/strands/telemetry/test_metrics.py @@ -109,6 +109,18 @@ def metrics(request): return Metrics(**params) +@pytest.fixture +def metrics_with_ttfb(request): + params = { + "latencyMs": 1, + "timeToFirstByteMs": 10, + } + if hasattr(request, "param"): + params.update(request.param) + + return Metrics(**params) + + @pytest.mark.parametrize("end_time", [None, 1]) @unittest.mock.patch.object(strands.telemetry.metrics.time, "time") def test_trace_end(mock_time, end_time, trace): @@ -132,8 +144,8 @@ def mock_get_meter_provider(): mock_create_counter = mock.MagicMock() mock_meter.create_counter.return_value = mock_create_counter - mock_create_histogram = mock.MagicMock() - mock_meter.create_histogram.return_value = mock_create_histogram + # Create separate mock objects for each histogram call + mock_meter.create_histogram.side_effect = lambda *args, **kwargs: mock.MagicMock() meter_provider_mock.get_meter.return_value = mock_meter mock_get_meter_provider.return_value = meter_provider_mock @@ -326,9 +338,9 @@ def test_event_loop_metrics_update_usage(usage, event_loop_metrics, mock_get_met metrics_client.event_loop_cache_write_input_tokens.record.assert_called() -def test_event_loop_metrics_update_metrics(metrics, event_loop_metrics, mock_get_meter_provider): +def test_event_loop_metrics_update_metrics(metrics_with_ttfb, event_loop_metrics, mock_get_meter_provider): for _ in range(3): - event_loop_metrics.update_metrics(metrics) + event_loop_metrics.update_metrics(metrics_with_ttfb) tru_metrics = event_loop_metrics.accumulated_metrics exp_metrics = Metrics( @@ -338,6 +350,7 @@ def test_event_loop_metrics_update_metrics(metrics, event_loop_metrics, mock_get assert tru_metrics == exp_metrics mock_get_meter_provider.return_value.get_meter.assert_called() event_loop_metrics._metrics_client.event_loop_latency.record.assert_called_with(1) + event_loop_metrics._metrics_client.model_time_to_first_token.record.assert_called_with(10) def test_event_loop_metrics_get_summary(trace, tool, event_loop_metrics, mock_get_meter_provider): diff --git a/tests/strands/telemetry/test_tracer.py b/tests/strands/telemetry/test_tracer.py index 4e9872100..de677c2cc 100644 --- a/tests/strands/telemetry/test_tracer.py +++ b/tests/strands/telemetry/test_tracer.py @@ -11,7 +11,7 @@ from strands.telemetry.tracer import JSONEncoder, Tracer, get_tracer, serialize from strands.types.content import ContentBlock -from strands.types.streaming import StopReason, Usage +from strands.types.streaming import Metrics, StopReason, Usage @pytest.fixture(autouse=True) @@ -173,7 +173,15 @@ def test_start_model_invoke_span_latest_conventions(mock_tracer): mock_span = mock.MagicMock() mock_tracer.start_span.return_value = mock_span - messages = [{"role": "user", "content": [{"text": "Hello"}]}] + messages = [ + {"role": "user", "content": [{"text": "Hello 2025-1993"}]}, + { + "role": "assistant", + "content": [ + {"toolUse": {"input": '"expression": "2025-1993"', "name": "calculator", "toolUseId": "123"}} + ], + }, + ] model_id = "test-model" span = tracer.start_model_invoke_span(messages=messages, agent_name="TestAgent", model_id=model_id) @@ -191,8 +199,19 @@ def test_start_model_invoke_span_latest_conventions(mock_tracer): [ { "role": messages[0]["role"], - "parts": [{"type": "text", "content": messages[0]["content"]}], - } + "parts": [{"type": "text", "content": "Hello 2025-1993"}], + }, + { + "role": messages[1]["role"], + "parts": [ + { + "type": "tool_call", + "name": "calculator", + "id": "123", + "arguments": '"expression": "2025-1993"', + } + ], + }, ] ) }, @@ -205,17 +224,18 @@ def test_end_model_invoke_span(mock_span): tracer = Tracer() message = {"role": "assistant", "content": [{"text": "Response"}]} usage = Usage(inputTokens=10, outputTokens=20, totalTokens=30) + metrics = Metrics(latencyMs=20, timeToFirstByteMs=10) stop_reason: StopReason = "end_turn" - tracer.end_model_invoke_span(mock_span, message, usage, stop_reason) + tracer.end_model_invoke_span(mock_span, message, usage, metrics, stop_reason) mock_span.set_attribute.assert_any_call("gen_ai.usage.prompt_tokens", 10) mock_span.set_attribute.assert_any_call("gen_ai.usage.input_tokens", 10) mock_span.set_attribute.assert_any_call("gen_ai.usage.completion_tokens", 20) mock_span.set_attribute.assert_any_call("gen_ai.usage.output_tokens", 20) mock_span.set_attribute.assert_any_call("gen_ai.usage.total_tokens", 30) - mock_span.set_attribute.assert_any_call("gen_ai.usage.cache_read_input_tokens", 0) - mock_span.set_attribute.assert_any_call("gen_ai.usage.cache_write_input_tokens", 0) + mock_span.set_attribute.assert_any_call("gen_ai.server.request.duration", 20) + mock_span.set_attribute.assert_any_call("gen_ai.server.time_to_first_token", 10) mock_span.add_event.assert_called_with( "gen_ai.choice", attributes={"message": json.dumps(message["content"]), "finish_reason": "end_turn"}, @@ -231,17 +251,18 @@ def test_end_model_invoke_span_latest_conventions(mock_span): tracer.use_latest_genai_conventions = True message = {"role": "assistant", "content": [{"text": "Response"}]} usage = Usage(inputTokens=10, outputTokens=20, totalTokens=30) + metrics = Metrics(latencyMs=20, timeToFirstByteMs=10) stop_reason: StopReason = "end_turn" - tracer.end_model_invoke_span(mock_span, message, usage, stop_reason) + tracer.end_model_invoke_span(mock_span, message, usage, metrics, stop_reason) mock_span.set_attribute.assert_any_call("gen_ai.usage.prompt_tokens", 10) mock_span.set_attribute.assert_any_call("gen_ai.usage.input_tokens", 10) mock_span.set_attribute.assert_any_call("gen_ai.usage.completion_tokens", 20) mock_span.set_attribute.assert_any_call("gen_ai.usage.output_tokens", 20) mock_span.set_attribute.assert_any_call("gen_ai.usage.total_tokens", 30) - mock_span.set_attribute.assert_any_call("gen_ai.usage.cache_read_input_tokens", 0) - mock_span.set_attribute.assert_any_call("gen_ai.usage.cache_write_input_tokens", 0) + mock_span.set_attribute.assert_any_call("gen_ai.server.time_to_first_token", 10) + mock_span.set_attribute.assert_any_call("gen_ai.server.request.duration", 20) mock_span.add_event.assert_called_with( "gen_ai.client.inference.operation.details", attributes={ @@ -249,7 +270,7 @@ def test_end_model_invoke_span_latest_conventions(mock_span): [ { "role": "assistant", - "parts": [{"type": "text", "content": message["content"]}], + "parts": [{"type": "text", "content": "Response"}], "finish_reason": "end_turn", } ] @@ -318,7 +339,7 @@ def test_start_tool_call_span_latest_conventions(mock_tracer): "type": "tool_call", "name": tool["name"], "id": tool["toolUseId"], - "arguments": [{"content": tool["input"]}], + "arguments": tool["input"], } ], } @@ -398,7 +419,7 @@ def test_start_swarm_span_with_contentblock_task_latest_conventions(mock_tracer) "gen_ai.client.inference.operation.details", attributes={ "gen_ai.input.messages": serialize( - [{"role": "user", "parts": [{"type": "text", "content": [{"text": "Original Task: foo bar"}]}]}] + [{"role": "user", "parts": [{"type": "text", "content": "Original Task: foo bar"}]}] ) }, ) @@ -486,7 +507,7 @@ def test_end_tool_call_span_latest_conventions(mock_span): """Test ending a tool call span with the latest semantic conventions.""" tracer = Tracer() tracer.use_latest_genai_conventions = True - tool_result = {"status": "success", "content": [{"text": "Tool result"}]} + tool_result = {"status": "success", "content": [{"text": "Tool result"}, {"json": {"foo": "bar"}}]} tracer.end_tool_call_span(mock_span, tool_result) @@ -502,7 +523,7 @@ def test_end_tool_call_span_latest_conventions(mock_span): { "type": "tool_call_response", "id": tool_result.get("toolUseId", ""), - "result": tool_result.get("content"), + "response": tool_result.get("content"), } ], } @@ -558,9 +579,7 @@ def test_start_event_loop_cycle_span_latest_conventions(mock_tracer): mock_span.add_event.assert_any_call( "gen_ai.client.inference.operation.details", attributes={ - "gen_ai.input.messages": serialize( - [{"role": "user", "parts": [{"type": "text", "content": messages[0]["content"]}]}] - ) + "gen_ai.input.messages": serialize([{"role": "user", "parts": [{"type": "text", "content": "Hello"}]}]) }, ) assert span is not None @@ -570,7 +589,12 @@ def test_end_event_loop_cycle_span(mock_span): """Test ending an event loop cycle span.""" tracer = Tracer() message = {"role": "assistant", "content": [{"text": "Response"}]} - tool_result_message = {"role": "assistant", "content": [{"toolResult": {"response": "Success"}}]} + tool_result_message = { + "role": "assistant", + "content": [ + {"toolResult": {"toolUseId": "123", "status": "success", "content": [{"text": "Weather is sunny"}]}} + ], + } tracer.end_event_loop_cycle_span(mock_span, message, tool_result_message) @@ -590,7 +614,12 @@ def test_end_event_loop_cycle_span_latest_conventions(mock_span): tracer = Tracer() tracer.use_latest_genai_conventions = True message = {"role": "assistant", "content": [{"text": "Response"}]} - tool_result_message = {"role": "assistant", "content": [{"toolResult": {"response": "Success"}}]} + tool_result_message = { + "role": "assistant", + "content": [ + {"toolResult": {"toolUseId": "123", "status": "success", "content": [{"text": "Weather is sunny"}]}} + ], + } tracer.end_event_loop_cycle_span(mock_span, message, tool_result_message) @@ -601,7 +630,13 @@ def test_end_event_loop_cycle_span_latest_conventions(mock_span): [ { "role": "assistant", - "parts": [{"type": "text", "content": tool_result_message["content"]}], + "parts": [ + { + "type": "tool_call_response", + "id": "123", + "response": [{"text": "Weather is sunny"}], + } + ], } ] ) @@ -676,7 +711,7 @@ def test_start_agent_span_latest_conventions(mock_tracer): "gen_ai.client.inference.operation.details", attributes={ "gen_ai.input.messages": serialize( - [{"role": "user", "parts": [{"type": "text", "content": [{"text": "test prompt"}]}]}] + [{"role": "user", "parts": [{"type": "text", "content": "test prompt"}]}] ) }, ) @@ -766,8 +801,9 @@ def test_end_model_invoke_span_with_cache_metrics(mock_span): cacheWriteInputTokens=3, ) stop_reason: StopReason = "end_turn" + metrics = Metrics(latencyMs=10, timeToFirstByteMs=5) - tracer.end_model_invoke_span(mock_span, message, usage, stop_reason) + tracer.end_model_invoke_span(mock_span, message, usage, metrics, stop_reason) mock_span.set_attribute.assert_any_call("gen_ai.usage.prompt_tokens", 10) mock_span.set_attribute.assert_any_call("gen_ai.usage.input_tokens", 10) @@ -776,6 +812,8 @@ def test_end_model_invoke_span_with_cache_metrics(mock_span): mock_span.set_attribute.assert_any_call("gen_ai.usage.total_tokens", 30) mock_span.set_attribute.assert_any_call("gen_ai.usage.cache_read_input_tokens", 5) mock_span.set_attribute.assert_any_call("gen_ai.usage.cache_write_input_tokens", 3) + mock_span.set_attribute.assert_any_call("gen_ai.server.request.duration", 10) + mock_span.set_attribute.assert_any_call("gen_ai.server.time_to_first_token", 5) mock_span.set_status.assert_called_once_with(StatusCode.OK) mock_span.end.assert_called_once()