Add workflow root span support and handoff example

nagkumar91 · nagkumar91 · commit 0125626b7d11 · 2025-10-09T10:33:01.000-07:00
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-agents/examples/handoffs/.env.example b/instrumentation-genai/opentelemetry-instrumentation-openai-agents/examples/handoffs/.env.example
@@ -0,0 +1,11 @@
+# Update this with your real OpenAI API key
+OPENAI_API_KEY=sk-YOUR_API_KEY
+
+# Uncomment and adjust if you use a non-default OTLP collector endpoint
+# OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317
+# OTEL_EXPORTER_OTLP_PROTOCOL=grpc
+
+OTEL_SERVICE_NAME=opentelemetry-python-openai-agents-handoffs
+
+# Optionally override the agent name reported on spans
+# OTEL_GENAI_AGENT_NAME=Travel Concierge
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-agents/examples/handoffs/README.rst b/instrumentation-genai/opentelemetry-instrumentation-openai-agents/examples/handoffs/README.rst
@@ -0,0 +1,39 @@
+OpenTelemetry OpenAI Agents Handoff Example
+==========================================
+
+This example shows how the OpenTelemetry OpenAI Agents instrumentation captures
+spans in a small multi-agent workflow. Three agents collaborate: a primary
+concierge, a concise assistant with a random-number tool, and a Spanish
+specialist reached through a handoff. Running the sample produces
+``invoke_agent`` spans for each agent as well as an ``execute_tool`` span for
+the random-number function.
+
+Setup
+-----
+
+1. Copy `.env.example <.env.example>`_ to `.env` and populate it with your real
+   ``OPENAI_API_KEY``. Adjust the OTLP exporter settings if your collector does
+   not listen on ``http://localhost:4317``.
+2. Create a virtual environment and install the dependencies:
+
+   ::
+
+       python3 -m venv .venv
+       source .venv/bin/activate
+       pip install "python-dotenv[cli]"
+       pip install -r requirements.txt
+
+Run
+---
+
+Execute the workflow with ``dotenv`` so the environment variables from ``.env``
+are loaded automatically:
+
+::
+
+    dotenv run -- python main.py
+
+The script emits a short transcript to stdout while spans stream to the OTLP
+endpoint defined in your environment. You should see multiple
+``invoke_agent`` spans (one per agent) and an ``execute_tool`` span for the
+random-number helper triggered during the run.
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-agents/examples/handoffs/main.py b/instrumentation-genai/opentelemetry-instrumentation-openai-agents/examples/handoffs/main.py
@@ -0,0 +1,162 @@
+# pylint: skip-file
+"""Multi-agent handoff example instrumented with OpenTelemetry."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import random
+
+from agents import Agent, HandoffInputData, Runner, function_tool, handoff
+from agents import trace as agent_trace
+from agents.extensions import handoff_filters
+from agents.models import is_gpt_5_default
+from dotenv import load_dotenv
+
+from opentelemetry import trace as otel_trace
+from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import (
+    OTLPSpanExporter,
+)
+from opentelemetry.instrumentation.openai_agents import (
+    OpenAIAgentsInstrumentor,
+)
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor
+
+
+def configure_otel() -> None:
+    """Configure the OpenTelemetry SDK and enable the Agents instrumentation."""
+
+    provider = TracerProvider()
+    provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter()))
+    otel_trace.set_tracer_provider(provider)
+
+    OpenAIAgentsInstrumentor().instrument(tracer_provider=provider)
+
+
+@function_tool
+def random_number_tool(maximum: int) -> int:
+    """Return a random integer between 0 and ``maximum``."""
+
+    return random.randint(0, maximum)
+
+
+def spanish_handoff_message_filter(
+    handoff_message_data: HandoffInputData,
+) -> HandoffInputData:
+    """Trim the message history forwarded to the Spanish-speaking agent."""
+
+    if is_gpt_5_default():
+        # When GPT-5 is enabled we skip additional filtering.
+        return HandoffInputData(
+            input_history=handoff_message_data.input_history,
+            pre_handoff_items=tuple(handoff_message_data.pre_handoff_items),
+            new_items=tuple(handoff_message_data.new_items),
+        )
+
+    filtered = handoff_filters.remove_all_tools(handoff_message_data)
+    history = (
+        tuple(filtered.input_history[2:])
+        if isinstance(filtered.input_history, tuple)
+        else filtered.input_history[2:]
+    )
+
+    return HandoffInputData(
+        input_history=history,
+        pre_handoff_items=tuple(filtered.pre_handoff_items),
+        new_items=tuple(filtered.new_items),
+    )
+
+
+assistant = Agent(
+    name="Assistant",
+    instructions="Be extremely concise.",
+    tools=[random_number_tool],
+)
+
+spanish_assistant = Agent(
+    name="Spanish Assistant",
+    instructions="You only speak Spanish and are extremely concise.",
+    handoff_description="A Spanish-speaking assistant.",
+)
+
+concierge = Agent(
+    name="Concierge",
+    instructions=(
+        "Be a helpful assistant. If the traveler switches to Spanish, handoff to"
+        " the Spanish specialist. Use the random number tool when asked for"
+        " numbers."
+    ),
+    handoffs=[
+        handoff(spanish_assistant, input_filter=spanish_handoff_message_filter)
+    ],
+)
+
+
+async def run_workflow() -> None:
+    """Execute a conversation that triggers tool calls and handoffs."""
+
+    with agent_trace(workflow_name="Travel concierge handoff"):
+        # Step 1: Basic conversation with the initial assistant.
+        result = await Runner.run(
+            assistant,
+            input="I'm planning a trip to Madrid. Can you help?",
+        )
+
+        print("Step 1 complete")
+
+        # Step 2: Ask for a random number to exercise the tool span.
+        result = await Runner.run(
+            assistant,
+            input=result.to_input_list()
+            + [
+                {
+                    "content": "Pick a lucky number between 0 and 20",
+                    "role": "user",
+                }
+            ],
+        )
+
+        print("Step 2 complete")
+
+        # Step 3: Continue the conversation with the concierge agent.
+        result = await Runner.run(
+            concierge,
+            input=result.to_input_list()
+            + [
+                {
+                    "content": "Recommend some sights in Madrid for a weekend trip.",
+                    "role": "user",
+                }
+            ],
+        )
+
+        print("Step 3 complete")
+
+        # Step 4: Switch to Spanish to cause a handoff to the specialist.
+        result = await Runner.run(
+            concierge,
+            input=result.to_input_list()
+            + [
+                {
+                    "content": "Por favor habla en español. ¿Puedes resumir el plan?",
+                    "role": "user",
+                }
+            ],
+        )
+
+        print("Step 4 complete")
+
+    print("\n=== Conversation Transcript ===\n")
+    for message in result.to_input_list():
+        print(json.dumps(message, indent=2, ensure_ascii=False))
+
+
+def main() -> None:
+    load_dotenv()
+    configure_otel()
+    asyncio.run(run_workflow())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-agents/examples/handoffs/requirements.txt b/instrumentation-genai/opentelemetry-instrumentation-openai-agents/examples/handoffs/requirements.txt
@@ -0,0 +1,6 @@
+openai-agents~=0.3.3
+python-dotenv~=1.0
+
+opentelemetry-sdk~=1.36.0
+opentelemetry-exporter-otlp-proto-grpc~=1.36.0
+opentelemetry-instrumentation-openai-agents~=0.1.0.dev
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-agents/src/opentelemetry/instrumentation/openai_agents/span_processor.py b/instrumentation-genai/opentelemetry-instrumentation-openai-agents/src/opentelemetry/instrumentation/openai_agents/span_processor.py
@@ -160,8 +160,8 @@ def __init__(
             and agent_name_override.strip()
             else None
         )
-        self._root_spans: dict[str, Span] = {}
         self._spans: dict[str, _SpanContext] = {}
+        self._root_spans: dict[str, Span] = {}
         self._lock = RLock()
 
     def _operation_name(self, span_data: Any) -> str:
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-agents/tests/test_tracer.py b/instrumentation-genai/opentelemetry-instrumentation-openai-agents/tests/test_tracer.py
@@ -75,9 +75,15 @@ def test_generation_span_creates_client_span():
                 pass
 
         spans = exporter.get_finished_spans()
-        client_span = next(
-            span for span in spans if span.kind is SpanKind.CLIENT
-        )
+        client_spans = [span for span in spans if span.kind is SpanKind.CLIENT]
+        server_spans = [span for span in spans if span.kind is SpanKind.SERVER]
+
+        assert len(server_spans) == 1
+        server_span = server_spans[0]
+        assert server_span.name == "workflow"
+        assert server_span.attributes["gen_ai.provider.name"] == "openai"
+        assert client_spans
+        client_span = next(iter(client_spans))
 
         assert client_span.attributes["gen_ai.provider.name"] == "openai"
         assert client_span.attributes[GenAI.GEN_AI_OPERATION_NAME] == "chat"
@@ -115,6 +121,12 @@ def test_generation_span_without_roles_uses_text_completion():
             if span.attributes[GenAI.GEN_AI_OPERATION_NAME]
             == GenAI.GenAiOperationNameValues.TEXT_COMPLETION.value
         )
+        assert completion_span.kind is SpanKind.CLIENT
+        server_spans = [span for span in spans if span.kind is SpanKind.SERVER]
+        assert len(server_spans) == 1
+        assert server_spans[0].name == "workflow"
+        assert server_spans[0].attributes["gen_ai.provider.name"] == "openai"
+        assert [span for span in spans if span.kind is SpanKind.CLIENT]
 
         assert completion_span.kind is SpanKind.CLIENT
         assert completion_span.name == "text_completion gpt-4o-mini"
@@ -142,6 +154,11 @@ def test_function_span_records_tool_attributes():
             span for span in spans if span.kind is SpanKind.INTERNAL
         )
 
+        server_spans = [span for span in spans if span.kind is SpanKind.SERVER]
+        assert len(server_spans) == 1
+        assert server_spans[0].name == "workflow"
+        assert server_spans[0].attributes["gen_ai.provider.name"] == "openai"
+
         assert (
             tool_span.attributes[GenAI.GEN_AI_OPERATION_NAME] == "execute_tool"
         )
@@ -174,6 +191,11 @@ def test_agent_create_span_records_attributes():
             if span.attributes[GenAI.GEN_AI_OPERATION_NAME]
             == GenAI.GenAiOperationNameValues.CREATE_AGENT.value
         )
+        server_spans = [span for span in spans if span.kind is SpanKind.SERVER]
+        assert len(server_spans) == 1
+        assert server_spans[0].name == "workflow"
+        assert server_spans[0].attributes["gen_ai.provider.name"] == "openai"
+        assert [span for span in spans if span.kind is SpanKind.CLIENT]
 
         assert create_span.kind is SpanKind.CLIENT
         assert create_span.name == "create_agent support_bot"
@@ -209,6 +231,11 @@ def test_agent_name_override_applied_to_agent_spans():
             if span.attributes[GenAI.GEN_AI_OPERATION_NAME]
             == GenAI.GenAiOperationNameValues.INVOKE_AGENT.value
         )
+        server_spans = [span for span in spans if span.kind is SpanKind.SERVER]
+        assert len(server_spans) == 1
+        assert server_spans[0].name == "workflow"
+        assert server_spans[0].attributes["gen_ai.provider.name"] == "openai"
+        assert [span for span in spans if span.kind is SpanKind.CLIENT]
 
         assert agent_span_record.kind is SpanKind.CLIENT
         assert agent_span_record.name == "invoke_agent Travel Concierge"
@@ -261,6 +288,10 @@ def __init__(self) -> None:
         assert response.attributes[GenAI.GEN_AI_RESPONSE_FINISH_REASONS] == (
             "stop",
         )
+        server_spans = [span for span in spans if span.kind is SpanKind.SERVER]
+        assert len(server_spans) == 1
+        assert server_spans[0].name == "workflow"
+        assert server_spans[0].attributes["gen_ai.provider.name"] == "openai"
     finally:
         instrumentor.uninstrument()
         exporter.clear()

Original file line number	Diff line number	Diff line change
`@@ -160,8 +160,8 @@ def __init__(`
`160`	`160`	`and agent_name_override.strip()`
`161`	`161`	`else None`
`162`	`162`	`)`
`163`		`- self._root_spans: dict[str, Span] = {}`
`164`	`163`	`self._spans: dict[str, _SpanContext] = {}`
	`164`	`+ self._root_spans: dict[str, Span] = {}`
`165`	`165`	`self._lock = RLock()`
`166`	`166`
`167`	`167`	`def _operation_name(self, span_data: Any) -> str:`