feat(tracing): Adds more OpenTelemetry convention attributes: gen_ai.request.max_tokens, gen_ai.request.top_p and gen_ai.response.finish_reasons

Jacksunwei · copybara-github · commit 9ba8eec22034 · 2025-08-14T09:52:50.000-07:00
Fixes #1234 PiperOrigin-RevId: 795082815
diff --git a/src/google/adk/models/llm_response.py b/src/google/adk/models/llm_response.py
@@ -67,6 +67,9 @@ class LlmResponse(BaseModel):
   Only used for streaming mode.
   """
 
+  finish_reason: Optional[types.FinishReason] = None
+  """The finish reason of the response."""
+
   error_code: Optional[str] = None
   """Error code if the response is an error. Code varies by model."""
 
@@ -97,7 +100,7 @@ class LlmResponse(BaseModel):
   @staticmethod
   def create(
       generate_content_response: types.GenerateContentResponse,
-  ) -> 'LlmResponse':
+  ) -> LlmResponse:
     """Creates an LlmResponse from a GenerateContentResponse.
 
     Args:
@@ -115,12 +118,14 @@ def create(
             content=candidate.content,
             grounding_metadata=candidate.grounding_metadata,
             usage_metadata=usage_metadata,
+            finish_reason=candidate.finish_reason,
         )
       else:
         return LlmResponse(
             error_code=candidate.finish_reason,
             error_message=candidate.finish_message,
             usage_metadata=usage_metadata,
+            finish_reason=candidate.finish_reason,
         )
     else:
       if generate_content_response.prompt_feedback:
diff --git a/src/google/adk/telemetry.py b/src/google/adk/telemetry.py
@@ -184,6 +184,17 @@ def trace_call_llm(
       _safe_json_serialize(_build_llm_request_for_trace(llm_request)),
   )
   # Consider removing once GenAI SDK provides a way to record this info.
+  if llm_request.config:
+    if llm_request.config.top_p:
+      span.set_attribute(
+          'gen_ai.request.top_p',
+          llm_request.config.top_p,
+      )
+    if llm_request.config.max_output_tokens:
+      span.set_attribute(
+          'gen_ai.request.max_tokens',
+          llm_request.config.max_output_tokens,
+      )
 
   try:
     llm_response_json = llm_response.model_dump_json(exclude_none=True)
@@ -204,6 +215,11 @@ def trace_call_llm(
         'gen_ai.usage.output_tokens',
         llm_response.usage_metadata.candidates_token_count,
     )
+  if llm_response.finish_reason:
+    span.set_attribute(
+        'gen_ai.response.finish_reasons',
+        [llm_response.finish_reason.value.lower()],
+    )
 
 
 def trace_send_data(
diff --git a/tests/unittests/test_telemetry.py b/tests/unittests/test_telemetry.py
@@ -81,9 +81,57 @@ async def _create_invocation_context(
 
 
 @pytest.mark.asyncio
-async def test_trace_call_llm_function_response_includes_part_from_bytes(
+async def test_trace_call_llm(monkeypatch, mock_span_fixture):
+  """Test trace_call_llm sets all telemetry attributes correctly with normal content."""
+  monkeypatch.setattr(
+      'opentelemetry.trace.get_current_span', lambda: mock_span_fixture
+  )
+
+  agent = LlmAgent(name='test_agent')
+  invocation_context = await _create_invocation_context(agent)
+  llm_request = LlmRequest(
+      contents=[
+          types.Content(
+              role='user',
+              parts=[types.Part(text='Hello, how are you?')],
+          ),
+      ],
+      config=types.GenerateContentConfig(
+          system_instruction='You are a helpful assistant.',
+          top_p=0.95,
+          max_output_tokens=1024,
+      ),
+  )
+  llm_response = LlmResponse(
+      turn_complete=True,
+      finish_reason=types.FinishReason.STOP,
+      usage_metadata=types.GenerateContentResponseUsageMetadata(
+          total_token_count=100,
+          prompt_token_count=50,
+          candidates_token_count=50,
+      ),
+  )
+  trace_call_llm(invocation_context, 'test_event_id', llm_request, llm_response)
+
+  expected_calls = [
+      mock.call('gen_ai.system', 'gcp.vertex.agent'),
+      mock.call('gen_ai.request.top_p', 0.95),
+      mock.call('gen_ai.request.max_tokens', 1024),
+      mock.call('gen_ai.usage.input_tokens', 50),
+      mock.call('gen_ai.usage.output_tokens', 50),
+      mock.call('gen_ai.response.finish_reasons', ['stop']),
+  ]
+  assert mock_span_fixture.set_attribute.call_count == 12
+  mock_span_fixture.set_attribute.assert_has_calls(
+      expected_calls, any_order=True
+  )
+
+
+@pytest.mark.asyncio
+async def test_trace_call_llm_with_binary_content(
     monkeypatch, mock_span_fixture
 ):
+  """Test trace_call_llm handles binary content serialization correctly."""
   monkeypatch.setattr(
       'opentelemetry.trace.get_current_span', lambda: mock_span_fixture
   )
@@ -123,11 +171,14 @@ async def test_trace_call_llm_function_response_includes_part_from_bytes(
   llm_response = LlmResponse(turn_complete=True)
   trace_call_llm(invocation_context, 'test_event_id', llm_request, llm_response)
 
+  # Verify basic telemetry attributes are set
   expected_calls = [
       mock.call('gen_ai.system', 'gcp.vertex.agent'),
   ]
   assert mock_span_fixture.set_attribute.call_count == 7
   mock_span_fixture.set_attribute.assert_has_calls(expected_calls)
+
+  # Verify binary content is replaced with '<not serializable>' in JSON
   llm_request_json_str = None
   for call_obj in mock_span_fixture.set_attribute.call_args_list:
     if call_obj.args[0] == 'gcp.vertex.agent.llm_request':
@@ -141,38 +192,6 @@ async def test_trace_call_llm_function_response_includes_part_from_bytes(
   assert llm_request_json_str.count('<not serializable>') == 2
 
 
-@pytest.mark.asyncio
-async def test_trace_call_llm_usage_metadata(monkeypatch, mock_span_fixture):
-  monkeypatch.setattr(
-      'opentelemetry.trace.get_current_span', lambda: mock_span_fixture
-  )
-
-  agent = LlmAgent(name='test_agent')
-  invocation_context = await _create_invocation_context(agent)
-  llm_request = LlmRequest(
-      config=types.GenerateContentConfig(system_instruction=''),
-  )
-  llm_response = LlmResponse(
-      turn_complete=True,
-      usage_metadata=types.GenerateContentResponseUsageMetadata(
-          total_token_count=100,
-          prompt_token_count=50,
-          candidates_token_count=50,
-      ),
-  )
-  trace_call_llm(invocation_context, 'test_event_id', llm_request, llm_response)
-
-  expected_calls = [
-      mock.call('gen_ai.system', 'gcp.vertex.agent'),
-      mock.call('gen_ai.usage.input_tokens', 50),
-      mock.call('gen_ai.usage.output_tokens', 50),
-  ]
-  assert mock_span_fixture.set_attribute.call_count == 9
-  mock_span_fixture.set_attribute.assert_has_calls(
-      expected_calls, any_order=True
-  )
-
-
 def test_trace_tool_call_with_scalar_response(
     monkeypatch, mock_span_fixture, mock_tool_fixture, mock_event_fixture
 ):