Add Vertex gen AI response span attributes

aabmass · aabmass · commit 7398657d1b2e · 2025-02-03T23:09:07.000Z
diff --git a/instrumentation-genai/opentelemetry-instrumentation-vertexai/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-vertexai/CHANGELOG.md
@@ -15,3 +15,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   ([#3208](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3208))
 - VertexAI emit user, system, and assistant events
   ([#3203](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3203))
+- Add Vertex gen AI response span attributes
+  ([#3227](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3227))
diff --git a/instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/patch.py b/instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/patch.py
@@ -25,6 +25,7 @@
 from opentelemetry.instrumentation.vertexai.utils import (
     GenerateContentParams,
     get_genai_request_attributes,
+    get_genai_response_attributes,
     get_server_attributes,
     get_span_name,
     request_to_events,
@@ -113,25 +114,27 @@ def traced_method(
             name=span_name,
             kind=SpanKind.CLIENT,
             attributes=span_attributes,
-        ) as _span:
+        ) as span:
             for event in request_to_events(
                 params=params, capture_content=capture_content
             ):
                 event_logger.emit(event)
 
             # TODO: set error.type attribute
             # https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/gen-ai-spans.md
-            result = wrapped(*args, **kwargs)
+            response = wrapped(*args, **kwargs)
             # TODO: handle streaming
             # if is_streaming(kwargs):
             #     return StreamWrapper(
             #         result, span, event_logger, capture_content
             #     )
 
+            if span.is_recording():
+                span.set_attributes(get_genai_response_attributes(response))
             # TODO: add response attributes and events
             # _set_response_attributes(
             #     span, result, event_logger, capture_content
             # )
-            return result
+            return response
 
     return traced_method
diff --git a/instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/utils.py b/instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/utils.py
@@ -39,10 +39,17 @@
 from opentelemetry.util.types import AnyValue, AttributeValue
 
 if TYPE_CHECKING:
-    from google.cloud.aiplatform_v1.types import content, tool
+    from google.cloud.aiplatform_v1.types import (
+        content,
+        prediction_service,
+        tool,
+    )
     from google.cloud.aiplatform_v1beta1.types import (
         content as content_v1beta1,
     )
+    from google.cloud.aiplatform_v1beta1.types import (
+        prediction_service as prediction_service_v1beta1,
+    )
     from google.cloud.aiplatform_v1beta1.types import (
         tool as tool_v1beta1,
     )
@@ -137,6 +144,21 @@ def get_genai_request_attributes(
     return attributes
 
 
+def get_genai_response_attributes(
+    response: prediction_service.GenerateContentResponse
+    | prediction_service_v1beta1.GenerateContentResponse,
+) -> dict[str, AttributeValue]:
+    finish_reasons: list[str] = [
+        candidate.finish_reason.name for candidate in response.candidates
+    ]
+    return {
+        GenAIAttributes.GEN_AI_RESPONSE_MODEL: response.model_version,
+        GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS: finish_reasons,
+        GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS: response.usage_metadata.prompt_token_count,
+        GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS: response.usage_metadata.candidates_token_count,
+    }
+
+
 _MODEL_STRIP_RE = re.compile(
     r"^projects/(.*)/locations/(.*)/publishers/google/models/"
 )
diff --git a/instrumentation-genai/opentelemetry-instrumentation-vertexai/tests/test_chat_completions.py b/instrumentation-genai/opentelemetry-instrumentation-vertexai/tests/test_chat_completions.py
@@ -38,7 +38,11 @@ def test_generate_content(
     assert dict(spans[0].attributes) == {
         "gen_ai.operation.name": "chat",
         "gen_ai.request.model": "gemini-1.5-flash-002",
+        "gen_ai.response.finish_reasons": ("STOP",),
+        "gen_ai.response.model": "gemini-1.5-flash-002",
         "gen_ai.system": "vertex_ai",
+        "gen_ai.usage.input_tokens": 5,
+        "gen_ai.usage.output_tokens": 19,
         "server.address": "us-central1-aiplatform.googleapis.com",
         "server.port": 443,
     }
@@ -81,7 +85,11 @@ def test_generate_content_without_events(
     assert dict(spans[0].attributes) == {
         "gen_ai.operation.name": "chat",
         "gen_ai.request.model": "gemini-1.5-flash-002",
+        "gen_ai.response.finish_reasons": ("STOP",),
+        "gen_ai.response.model": "gemini-1.5-flash-002",
         "gen_ai.system": "vertex_ai",
+        "gen_ai.usage.input_tokens": 5,
+        "gen_ai.usage.output_tokens": 19,
         "server.address": "us-central1-aiplatform.googleapis.com",
         "server.port": 443,
     }
@@ -255,7 +263,11 @@ def test_generate_content_extra_params(span_exporter, instrument_no_content):
         "gen_ai.request.stop_sequences": ("\n\n\n",),
         "gen_ai.request.temperature": 0.20000000298023224,
         "gen_ai.request.top_p": 0.949999988079071,
+        "gen_ai.response.finish_reasons": ("MAX_TOKENS",),
+        "gen_ai.response.model": "gemini-1.5-flash-002",
         "gen_ai.system": "vertex_ai",
+        "gen_ai.usage.input_tokens": 5,
+        "gen_ai.usage.output_tokens": 5,
         "server.address": "us-central1-aiplatform.googleapis.com",
         "server.port": 443,
     }