refactor: store span and context token in LLMInvocation instead of SpanGenerator

keith-decker · keith-decker · commit 9837cf49e710 · 2025-09-18T12:06:24.000-06:00
diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/generators.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/generators.py
@@ -31,7 +31,6 @@
     follow the GenAI semantic conventions.
 """
 
-from contextlib import contextmanager
 from contextvars import Token
 from typing import Dict, Optional
 from uuid import UUID
@@ -95,56 +94,26 @@ def start(self, invocation: LLMInvocation):
             name=f"{GenAI.GenAiOperationNameValues.CHAT.value} {invocation.request_model}",
             kind=SpanKind.CLIENT,
         )
-        token = otel_context.attach(set_span_in_context(span))
-        self._active[invocation.run_id] = (span, token)
-
-    @contextmanager
-    def _start_span_for_invocation(self, invocation: LLMInvocation):
-        """Create/register a span for the invocation and yield it.
-
-        The span is not ended automatically on exiting the context; callers
-        must finalize via _finalize_invocation.
-        """
-        # Create a span and attach it as current; keep the token to detach later
-        span = self._tracer.start_span(
-            name=f"{GenAI.GenAiOperationNameValues.CHAT.value} {invocation.request_model}",
-            kind=SpanKind.CLIENT,
+        invocation.span = span
+        invocation.context_token = otel_context.attach(
+            set_span_in_context(span)
         )
-        token = otel_context.attach(set_span_in_context(span))
-        # store active span and its context attachment token
-        self._active[invocation.run_id] = (span, token)
-        yield span
 
     def finish(self, invocation: LLMInvocation):
-        active = self._active.get(invocation.run_id)
-        if active is None:
-            # If missing, create a quick span to record attributes and end it
-            with self._tracer.start_as_current_span(
-                name=f"{GenAI.GenAiOperationNameValues.CHAT.value} {invocation.request_model}",
-                kind=SpanKind.CLIENT,
-            ) as span:
-                _apply_finish_attributes(span, invocation)
+        if invocation.context_token is None or invocation.span is None:
             return
 
-        span, token = active
-        _apply_finish_attributes(span, invocation)
+        _apply_finish_attributes(invocation.span, invocation)
         # Detach context and end span
-        otel_context.detach(token)
-        span.end()
-        del self._active[invocation.run_id]
+        otel_context.detach(invocation.context_token)
+        invocation.span.end()
 
     def error(self, error: Error, invocation: LLMInvocation):
-        active = self._active.get(invocation.run_id)
-        if active is None:
-            with self._tracer.start_as_current_span(
-                name=f"{GenAI.GenAiOperationNameValues.CHAT.value} {invocation.request_model}",
-                kind=SpanKind.CLIENT,
-            ) as span:
-                _apply_error_attributes(span, error)
+        if invocation.context_token is None or invocation.span is None:
             return
 
-        span, token = active
-        _apply_error_attributes(span, error)
-        otel_context.detach(token)
-        span.end()
-        del self._active[invocation.run_id]
+        _apply_error_attributes(invocation.span, error)
+        # Detach context and end span
+        otel_context.detach(invocation.context_token)
+        invocation.span.end()
+        return
diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py
@@ -33,9 +33,7 @@
 """
 
 import time
-import uuid
 from typing import Any, List, Optional
-from uuid import UUID
 
 from opentelemetry.semconv.schemas import Schemas
 from opentelemetry.trace import get_tracer
@@ -72,7 +70,7 @@ class TelemetryHandler:
     them as spans, metrics, and events.
     """
 
-    def __init__(self, emitter_type_full: bool = True, **kwargs: Any):
+    def __init__(self, **kwargs: Any):
         tracer_provider = kwargs.get("tracer_provider")
         self._tracer = get_tracer(
             __name__,
@@ -81,18 +79,14 @@ def __init__(self, emitter_type_full: bool = True, **kwargs: Any):
             schema_url=Schemas.V1_36_0.value,
         )
 
-        # TODO: trigger span+metric+event generation based on the full emitter flag
         self._generator = SpanGenerator(tracer=self._tracer)
 
-        self._llm_registry: dict[UUID, LLMInvocation] = {}
-
     def start_llm(
         self,
         request_model: str,
         prompts: List[InputMessage],
-        run_id: Optional[UUID] = None,
         **attributes: Any,
-    ) -> UUID:
+    ) -> LLMInvocation:
         """Start an LLM invocation and create a pending span entry.
 
         Known attributes provided via ``**attributes`` (``provider``,
@@ -101,29 +95,24 @@ def start_llm(
         ``LLMInvocation``. Any remaining keys are preserved in
         ``invocation.attributes`` for custom metadata.
 
-        Returns the ``run_id`` used to track the invocation lifecycle.
+        Returns the ``LLMInvocation`` to use with `stop_llm` and `fail_llm`.
         """
-        if run_id is None:
-            run_id = uuid.uuid4()
         invocation = LLMInvocation(
             request_model=request_model,
             messages=prompts,
-            run_id=run_id,
             attributes=attributes,
         )
         _apply_known_attrs_to_invocation(invocation, invocation.attributes)
-        self._llm_registry[invocation.run_id] = invocation
         self._generator.start(invocation)
-        return invocation.run_id
+        return invocation
 
     def stop_llm(
         self,
-        run_id: UUID,
+        invocation: LLMInvocation,
         chat_generations: List[OutputMessage],
         **attributes: Any,
     ) -> LLMInvocation:
         """Finalize an LLM invocation successfully and end its span."""
-        invocation = self._llm_registry.pop(run_id)
         invocation.end_time = time.time()
         invocation.chat_generations = chat_generations
         _apply_known_attrs_to_invocation(invocation, attributes)
@@ -132,29 +121,24 @@ def stop_llm(
         return invocation
 
     def fail_llm(
-        self, run_id: UUID, error: Error, **attributes: Any
+        self, invocation: LLMInvocation, error: Error, **attributes: Any
     ) -> LLMInvocation:
         """Fail an LLM invocation and end its span with error status."""
-        invocation = self._llm_registry.pop(run_id)
         invocation.end_time = time.time()
         _apply_known_attrs_to_invocation(invocation, attributes)
         invocation.attributes.update(**attributes)
         self._generator.error(error, invocation)
         return invocation
 
 
-def get_telemetry_handler(
-    emitter_type_full: bool = True, **kwargs: Any
-) -> TelemetryHandler:
+def get_telemetry_handler(**kwargs: Any) -> TelemetryHandler:
     """
     Returns a singleton TelemetryHandler instance.
     """
     handler: Optional[TelemetryHandler] = getattr(
         get_telemetry_handler, "_default_handler", None
     )
     if handler is None:
-        handler = TelemetryHandler(
-            emitter_type_full=emitter_type_full, **kwargs
-        )
+        handler = TelemetryHandler(**kwargs)
         setattr(get_telemetry_handler, "_default_handler", handler)
     return handler
diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py
@@ -14,13 +14,20 @@
 
 
 import time
+from contextvars import Token
 from dataclasses import dataclass, field
 from enum import Enum
 from typing import Any, Dict, List, Literal, Optional, Type, Union
 from uuid import UUID
 
+from typing_extensions import TypeAlias
+
+from opentelemetry.context import Context
+from opentelemetry.trace import Span
 from opentelemetry.util.types import AttributeValue
 
+ContextToken: TypeAlias = Token[Context]
+
 
 class ContentCapturingMode(Enum):
     # Do not capture content (default).
@@ -81,8 +88,9 @@ class LLMInvocation:
     Represents a single LLM call invocation.
     """
 
-    run_id: UUID
     request_model: str
+    context_token: Optional[ContextToken] = None
+    span: Optional[Span] = None
     parent_run_id: Optional[UUID] = None
     start_time: float = field(default_factory=time.time)
     end_time: Optional[float] = None
diff --git a/util/opentelemetry-util-genai/tests/test_utils.py b/util/opentelemetry-util-genai/tests/test_utils.py
@@ -122,7 +122,6 @@ def tearDown(self):
         content_capturing="SPAN_ONLY",
     )
     def test_llm_start_and_stop_creates_span(self):  # pylint: disable=no-self-use
-        run_id = uuid4()
         message = InputMessage(
             role="Human", parts=[Text(content="hello world")]
         )
@@ -131,15 +130,14 @@ def test_llm_start_and_stop_creates_span(self):  # pylint: disable=no-self-use
         )
 
         # Start and stop LLM invocation
-        self.telemetry_handler.start_llm(
+        invocation = self.telemetry_handler.start_llm(
             request_model="test-model",
             prompts=[message],
-            run_id=run_id,
             custom_attr="value",
             provider="test-provider",
         )
-        invocation = self.telemetry_handler.stop_llm(
-            run_id, chat_generations=[chat_generation], extra="info"
+        self.telemetry_handler.stop_llm(
+            invocation, chat_generations=[chat_generation], extra="info"
         )
 
         # Get the spans that were created
@@ -157,7 +155,6 @@ def test_llm_start_and_stop_creates_span(self):  # pylint: disable=no-self-use
         assert span.start_time is not None
         assert span.end_time is not None
         assert span.end_time > span.start_time
-        assert invocation.run_id == run_id
         assert invocation.attributes.get("custom_attr") == "value"
         assert invocation.attributes.get("extra") == "info"
 
@@ -183,13 +180,13 @@ def test_parent_child_span_relationship(self):
         )
 
         # Start parent and child (child references parent_run_id)
-        self.telemetry_handler.start_llm(
+        parent_invocation = self.telemetry_handler.start_llm(
             request_model="parent-model",
             prompts=[message],
             run_id=parent_id,
             provider="test-provider",
         )
-        self.telemetry_handler.start_llm(
+        child_invocation = self.telemetry_handler.start_llm(
             request_model="child-model",
             prompts=[message],
             run_id=child_id,
@@ -199,10 +196,10 @@ def test_parent_child_span_relationship(self):
 
         # Stop child first, then parent (order should not matter)
         self.telemetry_handler.stop_llm(
-            child_id, chat_generations=[chat_generation]
+            child_invocation, chat_generations=[chat_generation]
         )
         self.telemetry_handler.stop_llm(
-            parent_id, chat_generations=[chat_generation]
+            parent_invocation, chat_generations=[chat_generation]
         )
 
         spans = self.span_exporter.get_finished_spans()