refactor: simplify TelemetryHandler API by moving invocation data management to LLMInvocation class

keith-decker · keith-decker · commit 465ca78bcdf1 · 2025-09-19T11:03:34.000-06:00
diff --git a/util/opentelemetry-util-genai/README.rst b/util/opentelemetry-util-genai/README.rst
@@ -11,17 +11,18 @@ By default, message content will not be captured.
 Set the environment variable `OTEL_SEMCONV_STABILITY_OPT_IN` to `gen_ai_latest_experimental` to enable experimental features.
 And set the environment variable `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` to `SPAN_ONLY` or `SPAN_AND_EVENT` to capture message content in spans.
 
-This package provides these span attributes.
--> gen_ai.provider.name: Str(openai)
--> gen_ai.operation.name: Str(chat)
--> gen_ai.request.model: Str(gpt-3.5-turbo)
--> gen_ai.response.finish_reasons: Slice(["stop"])
--> gen_ai.response.model: Str(gpt-3.5-turbo-0125)
--> gen_ai.response.id: Str(chatcmpl-Bz8yrvPnydD9pObv625n2CGBPHS13)
--> gen_ai.usage.input_tokens: Int(24)
--> gen_ai.usage.output_tokens: Int(7)
--> gen_ai.input.messages: Str('[{"role": "Human", "parts": [{"content": "hello world", "type": "text"}]}]')
--> gen_ai.output.messages: Str('[{"role": "AI", "parts": [{"content": "hello back", "type": "text"}], "finish_reason": "stop"}]')
+This package provides these span attributes:
+
+- `gen_ai.provider.name`: Str(openai)
+- `gen_ai.operation.name`: Str(chat)
+- `gen_ai.request.model`: Str(gpt-3.5-turbo)
+- `gen_ai.response.finish_reasons`: Slice(["stop"])
+- `gen_ai.response.model`: Str(gpt-3.5-turbo-0125)
+- `gen_ai.response.id`: Str(chatcmpl-Bz8yrvPnydD9pObv625n2CGBPHS13)
+- `gen_ai.usage.input_tokens`: Int(24)
+- `gen_ai.usage.output_tokens`: Int(7)
+- `gen_ai.input.messages`: Str('[{"role": "Human", "parts": [{"content": "hello world", "type": "text"}]}]')
+- `gen_ai.output.messages`: Str('[{"role": "AI", "parts": [{"content": "hello back", "type": "text"}], "finish_reason": "stop"}]')
 
 
 Installation
diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py
@@ -27,43 +27,38 @@
 
 Usage:
     handler = get_telemetry_handler()
-    handler.start_llm(input_messages, request_model, **attrs)
-    handler.stop_llm(invocation, output_messages, **attrs)
-    handler.fail_llm(invocation, error, **attrs)
+
+    # Create an invocation object with your request data
+    invocation = LLMInvocation(
+        request_model="my-model",
+        input_messages=[...],
+        provider="my-provider",
+        attributes={"custom": "attr"},
+    )
+
+    # Start the invocation (opens a span)
+    handler.start_llm(invocation)
+
+    # Populate outputs and any additional attributes, then stop (closes the span)
+    invocation.output_messages = [...]
+    invocation.attributes.update({"more": "attrs"})
+    handler.stop_llm(invocation)
+
+    # Or, in case of error
+    # handler.fail_llm(invocation, Error(type="...", message="..."))
 """
 
 import time
-from typing import Any, List, Optional
+from typing import Any, Optional
 
 from opentelemetry.semconv.schemas import Schemas
 from opentelemetry.trace import get_tracer
 
 from .generators import SpanGenerator
-from .types import Error, InputMessage, LLMInvocation, OutputMessage
+from .types import Error, LLMInvocation
 from .version import __version__
 
 
-def _apply_known_attrs_to_invocation(
-    invocation: LLMInvocation, attributes: dict[str, Any]
-) -> None:
-    """Pop known fields from attributes and set them on the invocation.
-
-    Mutates the provided attributes dict by popping known keys, leaving
-    only unknown/custom attributes behind for the caller to persist into
-    invocation.attributes.
-    """
-    if "provider" in attributes:
-        invocation.provider = attributes.pop("provider")
-    if "response_model_name" in attributes:
-        invocation.response_model_name = attributes.pop("response_model_name")
-    if "response_id" in attributes:
-        invocation.response_id = attributes.pop("response_id")
-    if "input_tokens" in attributes:
-        invocation.input_tokens = attributes.pop("input_tokens")
-    if "output_tokens" in attributes:
-        invocation.output_tokens = attributes.pop("output_tokens")
-
-
 class TelemetryHandler:
     """
     High-level handler managing GenAI invocation lifecycles and emitting
@@ -83,50 +78,23 @@ def __init__(self, **kwargs: Any):
 
     def start_llm(
         self,
-        request_model: str,
-        input_messages: List[InputMessage],
-        **attributes: Any,
+        invocation: LLMInvocation,
     ) -> LLMInvocation:
-        """Start an LLM invocation and create a pending span entry.
-
-        Known attributes provided via ``**attributes`` (``provider``,
-        ``response_model_name``, ``response_id``, ``input_tokens``,
-        ``output_tokens``) are extracted and set as explicit fields on the
-        ``LLMInvocation``. Any remaining keys are preserved in
-        ``invocation.attributes`` for custom metadata.
-
-        Returns the ``LLMInvocation`` to use with `stop_llm` and `fail_llm`.
-        """
-        invocation = LLMInvocation(
-            request_model=request_model,
-            input_messages=input_messages,
-            attributes=attributes,
-        )
-        _apply_known_attrs_to_invocation(invocation, invocation.attributes)
+        """Start an LLM invocation and create a pending span entry."""
         self._generator.start(invocation)
         return invocation
 
-    def stop_llm(
-        self,
-        invocation: LLMInvocation,
-        output_messages: List[OutputMessage],
-        **attributes: Any,
-    ) -> LLMInvocation:
+    def stop_llm(self, invocation: LLMInvocation) -> LLMInvocation:
         """Finalize an LLM invocation successfully and end its span."""
         invocation.end_time = time.time()
-        invocation.output_messages = output_messages
-        _apply_known_attrs_to_invocation(invocation, attributes)
-        invocation.attributes.update(attributes)
         self._generator.finish(invocation)
         return invocation
 
     def fail_llm(
-        self, invocation: LLMInvocation, error: Error, **attributes: Any
+        self, invocation: LLMInvocation, error: Error
     ) -> LLMInvocation:
         """Fail an LLM invocation and end its span with error status."""
         invocation.end_time = time.time()
-        _apply_known_attrs_to_invocation(invocation, attributes)
-        invocation.attributes.update(**attributes)
         self._generator.error(error, invocation)
         return invocation
 
diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py
@@ -14,7 +14,7 @@
 
 import json
 from dataclasses import asdict
-from typing import List
+from typing import Any, Dict, List
 
 from opentelemetry.semconv._incubating.attributes import (
     gen_ai_attributes as GenAI,
@@ -100,12 +100,21 @@ def _maybe_set_span_messages(
         )
 
 
+def _maybe_set_span_extra_attributes(
+    span: Span,
+    attributes: Dict[str, Any],
+) -> None:
+    for key, value in attributes.items():
+        span.set_attribute(key, value)
+
+
 def _apply_finish_attributes(span: Span, invocation: LLMInvocation) -> None:
     """Apply attributes/messages common to finish() paths."""
     _apply_common_span_attributes(span, invocation)
     _maybe_set_span_messages(
         span, invocation.input_messages, invocation.output_messages
     )
+    _maybe_set_span_extra_attributes(span, invocation.attributes)
 
 
 def _apply_error_attributes(span: Span, error: Error) -> None:
diff --git a/util/opentelemetry-util-genai/tests/test_utils.py b/util/opentelemetry-util-genai/tests/test_utils.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
 import os
 import unittest
 from unittest.mock import patch
-from uuid import uuid4
 
 from opentelemetry import trace
 from opentelemetry.instrumentation._semconv import (
@@ -34,6 +34,7 @@
 from opentelemetry.util.genai.types import (
     ContentCapturingMode,
     InputMessage,
+    LLMInvocation,
     OutputMessage,
     Text,
 )
@@ -130,17 +131,18 @@ def test_llm_start_and_stop_creates_span(self):  # pylint: disable=no-self-use
         )
 
         # Start and stop LLM invocation
-        invocation = self.telemetry_handler.start_llm(
+        invocation = LLMInvocation(
             request_model="test-model",
             input_messages=[message],
-            custom_attr="value",
             provider="test-provider",
+            attributes={"custom_attr": "value"},
         )
-        self.telemetry_handler.stop_llm(
-            invocation,
-            output_messages=[chat_generation],
-            extra="info",
-        )
+
+        self.telemetry_handler.start_llm(invocation)
+        assert invocation.span is not None
+        invocation.output_messages = [chat_generation]
+        invocation.attributes.update({"extra": "info"})
+        self.telemetry_handler.stop_llm(invocation)
 
         # Get the spans that were created
         spans = self.span_exporter.get_finished_spans()
@@ -165,44 +167,54 @@ def test_llm_start_and_stop_creates_span(self):  # pylint: disable=no-self-use
         output_messages_json = span_attrs.get("gen_ai.output.messages")
         assert input_messages_json is not None
         assert output_messages_json is not None
-
         assert isinstance(input_messages_json, str)
         assert isinstance(output_messages_json, str)
+        input_messages = json.loads(input_messages_json)
+        output_messages = json.loads(output_messages_json)
+        assert len(input_messages) == 1
+        assert len(output_messages) == 1
+        assert input_messages[0].get("role") == "Human"
+        assert output_messages[0].get("role") == "AI"
+        assert output_messages[0].get("finish_reason") == "stop"
+        assert (
+            output_messages[0].get("parts")[0].get("content") == "hello back"
+        )
+
+        # Check that extra attributes are added to the span
+        assert span_attrs.get("extra") == "info"
+        assert span_attrs.get("custom_attr") == "value"
 
     @patch_env_vars(
         stability_mode="gen_ai_latest_experimental",
         content_capturing="SPAN_ONLY",
     )
     def test_parent_child_span_relationship(self):
-        parent_id = uuid4()
-        child_id = uuid4()
         message = InputMessage(role="Human", parts=[Text(content="hi")])
         chat_generation = OutputMessage(
             role="AI", parts=[Text(content="ok")], finish_reason="stop"
         )
 
         # Start parent and child (child references parent_run_id)
-        parent_invocation = self.telemetry_handler.start_llm(
+        parent_invocation = LLMInvocation(
             request_model="parent-model",
             input_messages=[message],
-            run_id=parent_id,
             provider="test-provider",
         )
-        child_invocation = self.telemetry_handler.start_llm(
+        child_invocation = LLMInvocation(
             request_model="child-model",
             input_messages=[message],
-            run_id=child_id,
-            parent_run_id=parent_id,
             provider="test-provider",
         )
 
+        # Pass invocation data to start_llm
+        self.telemetry_handler.start_llm(parent_invocation)
+        self.telemetry_handler.start_llm(child_invocation)
+
         # Stop child first, then parent (order should not matter)
-        self.telemetry_handler.stop_llm(
-            child_invocation, output_messages=[chat_generation]
-        )
-        self.telemetry_handler.stop_llm(
-            parent_invocation, output_messages=[chat_generation]
-        )
+        child_invocation.output_messages = [chat_generation]
+        parent_invocation.output_messages = [chat_generation]
+        self.telemetry_handler.stop_llm(child_invocation)
+        self.telemetry_handler.stop_llm(parent_invocation)
 
         spans = self.span_exporter.get_finished_spans()
         assert len(spans) == 2