open-telemetry
diff --git a/‎instrumentation-genai/opentelemetry-instrumentation-openai-agents/src/opentelemetry/instrumentation/openai_agents/__init__.py‎
Lines changed: 141 additions & 48 deletions b/‎instrumentation-genai/opentelemetry-instrumentation-openai-agents/src/opentelemetry/instrumentation/openai_agents/__init__.py‎
Lines changed: 141 additions & 48 deletions
diff --git a/‎instrumentation-genai/opentelemetry-instrumentation-openai-agents/src/opentelemetry/instrumentation/openai_agents/constants.py‎
Lines changed: 135 additions & 0 deletions b/‎instrumentation-genai/opentelemetry-instrumentation-openai-agents/src/opentelemetry/instrumentation/openai_agents/constants.py‎
Lines changed: 135 additions & 0 deletions
@@ -1,74 +1,133 @@
-# Copyright The OpenTelemetry Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 """OpenAI Agents instrumentation for OpenTelemetry."""
 
 from __future__ import annotations
 
+import importlib
+import logging
 import os
-from typing import Collection, Protocol
-
-from agents import tracing
-from agents.tracing.processor_interface import TracingProcessor
+from typing import Any, Collection
 
+from opentelemetry._events import get_event_logger
 from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
 from opentelemetry.semconv._incubating.attributes import (
     gen_ai_attributes as GenAI,
 )
 from opentelemetry.semconv.schemas import Schemas
 from opentelemetry.trace import get_tracer
 
+from .constants import (
+    GenAIEvaluationAttributes,
+    GenAIOperationName,
+    GenAIOutputType,
+    GenAIProvider,
+    GenAIToolType,
+)
+from .genai_semantic_processor import (
+    ContentCaptureMode,
+    GenAISemanticProcessor,
+)
 from .package import _instruments
-from .span_processor import _OpenAIAgentsSpanProcessor
-from .version import __version__  # noqa: F401
 
+__all__ = [
+    "OpenAIAgentsInstrumentor",
+    "GenAIProvider",
+    "GenAIOperationName",
+    "GenAIToolType",
+    "GenAIOutputType",
+    "GenAIEvaluationAttributes",
+]
 
-class _ProcessorHolder(Protocol):
-    _processors: Collection[TracingProcessor]
+logger = logging.getLogger(__name__)
 
+_CONTENT_CAPTURE_ENV = "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT"
+_SYSTEM_OVERRIDE_ENV = "OTEL_INSTRUMENTATION_OPENAI_AGENTS_SYSTEM"
+_CAPTURE_CONTENT_ENV = "OTEL_INSTRUMENTATION_OPENAI_AGENTS_CAPTURE_CONTENT"
+_CAPTURE_METRICS_ENV = "OTEL_INSTRUMENTATION_OPENAI_AGENTS_CAPTURE_METRICS"
 
-class _TraceProviderLike(Protocol):
-    _multi_processor: _ProcessorHolder
 
+def _load_tracing_module():  # pragma: no cover - exercised via tests
+    return importlib.import_module("agents.tracing")
 
-__all__ = ["OpenAIAgentsInstrumentor"]
 
+def _get_registered_processors(provider) -> list:
+    multi = getattr(provider, "_multi_processor", None)
+    processors = getattr(multi, "_processors", ())
+    return list(processors)
 
-def _resolve_system(_: str | None) -> str:
-    # OpenAI spans must report provider name "openai" per semantic conventions.
-    return GenAI.GenAiSystemValues.OPENAI.value
 
+def _resolve_system(value: str | None) -> str:
+    if not value:
+        return GenAI.GenAiSystemValues.OPENAI.value
 
-def _get_registered_processors(
-    provider: _TraceProviderLike,
-) -> list[TracingProcessor]:
-    """Return tracing processors registered on the OpenAI Agents trace provider.
+    normalized = value.strip().lower()
+    for member in GenAI.GenAiSystemValues:
+        if normalized == member.value:
+            return member.value
+        if normalized == member.name.lower():
+            return member.value
+    return value
 
-    The provider exposes a private `_multi_processor` attribute with a `_processors`
-    collection that stores the currently registered processors in execution order.
-    """
-    multi = getattr(provider, "_multi_processor", None)
-    processors = getattr(multi, "_processors", ())
-    return list(processors)
+
+def _resolve_content_mode(value: Any) -> ContentCaptureMode:
+    if isinstance(value, ContentCaptureMode):
+        return value
+    if isinstance(value, bool):
+        return (
+            ContentCaptureMode.SPAN_AND_EVENT
+            if value
+            else ContentCaptureMode.NO_CONTENT
+        )
+
+    if value is None:
+        return ContentCaptureMode.SPAN_AND_EVENT
+
+    text = str(value).strip().lower()
+    if not text:
+        return ContentCaptureMode.SPAN_AND_EVENT
+
+    mapping = {
+        "span_only": ContentCaptureMode.SPAN_ONLY,
+        "span-only": ContentCaptureMode.SPAN_ONLY,
+        "span": ContentCaptureMode.SPAN_ONLY,
+        "event_only": ContentCaptureMode.EVENT_ONLY,
+        "event-only": ContentCaptureMode.EVENT_ONLY,
+        "event": ContentCaptureMode.EVENT_ONLY,
+        "span_and_event": ContentCaptureMode.SPAN_AND_EVENT,
+        "span-and-event": ContentCaptureMode.SPAN_AND_EVENT,
+        "span_and_events": ContentCaptureMode.SPAN_AND_EVENT,
+        "all": ContentCaptureMode.SPAN_AND_EVENT,
+        "true": ContentCaptureMode.SPAN_AND_EVENT,
+        "1": ContentCaptureMode.SPAN_AND_EVENT,
+        "yes": ContentCaptureMode.SPAN_AND_EVENT,
+        "no_content": ContentCaptureMode.NO_CONTENT,
+        "false": ContentCaptureMode.NO_CONTENT,
+        "0": ContentCaptureMode.NO_CONTENT,
+        "no": ContentCaptureMode.NO_CONTENT,
+        "none": ContentCaptureMode.NO_CONTENT,
+    }
+
+    return mapping.get(text, ContentCaptureMode.SPAN_AND_EVENT)
+
+
+def _resolve_bool(value: Any, default: bool) -> bool:
+    if value is None:
+        return default
+    if isinstance(value, bool):
+        return value
+    text = str(value).strip().lower()
+    if text in {"true", "1", "yes", "on"}:
+        return True
+    if text in {"false", "0", "no", "off"}:
+        return False
+    return default
 
 
 class OpenAIAgentsInstrumentor(BaseInstrumentor):
-    """Instrumentation that bridges OpenAI Agents tracing to OpenTelemetry spans."""
+    """Instrumentation that bridges OpenAI Agents tracing to OpenTelemetry."""
 
     def __init__(self) -> None:
         super().__init__()
-        self._processor: _OpenAIAgentsSpanProcessor | None = None
+        self._processor: GenAISemanticProcessor | None = None
 
     def _instrument(self, **kwargs) -> None:
         if self._processor is not None:
@@ -82,17 +141,48 @@ def _instrument(self, **kwargs) -> None:
             schema_url=Schemas.V1_28_0.value,
         )
 
-        system = _resolve_system(kwargs.get("system"))
-        agent_name_override = kwargs.get("agent_name") or os.getenv(
-            "OTEL_GENAI_AGENT_NAME"
+        event_logger_provider = kwargs.get("event_logger_provider")
+        event_logger = get_event_logger(
+            __name__,
+            "",
+            schema_url=Schemas.V1_28_0.value,
+            event_logger_provider=event_logger_provider,
+        )
+
+        system_override = kwargs.get("system") or os.getenv(
+            _SYSTEM_OVERRIDE_ENV
         )
+        system = _resolve_system(system_override)
+
+        content_override = kwargs.get("capture_message_content")
+        if content_override is None:
+            content_override = os.getenv(_CONTENT_CAPTURE_ENV) or os.getenv(
+                _CAPTURE_CONTENT_ENV
+            )
+        content_mode = _resolve_content_mode(content_override)
+
+        metrics_override = kwargs.get("capture_metrics")
+        if metrics_override is None:
+            metrics_override = os.getenv(_CAPTURE_METRICS_ENV)
+        metrics_enabled = _resolve_bool(metrics_override, default=True)
 
-        processor = _OpenAIAgentsSpanProcessor(
+        processor = GenAISemanticProcessor(
             tracer=tracer,
-            system=system,
-            agent_name_override=agent_name_override,
+            event_logger=event_logger,
+            system_name=system,
+            include_sensitive_data=content_mode
+            != ContentCaptureMode.NO_CONTENT,
+            content_mode=content_mode,
+            metrics_enabled=metrics_enabled,
+            agent_name=kwargs.get("agent_name"),
+            agent_id=kwargs.get("agent_id"),
+            agent_description=kwargs.get("agent_description"),
+            base_url=kwargs.get("base_url"),
+            server_address=kwargs.get("server_address"),
+            server_port=kwargs.get("server_port"),
         )
 
+        tracing = _load_tracing_module()
         provider = tracing.get_trace_provider()
         existing = _get_registered_processors(provider)
         provider.set_processors([*existing, processor])
@@ -102,13 +192,16 @@ def _uninstrument(self, **kwargs) -> None:
         if self._processor is None:
             return
 
+        tracing = _load_tracing_module()
         provider = tracing.get_trace_provider()
         current = _get_registered_processors(provider)
         filtered = [proc for proc in current if proc is not self._processor]
         provider.set_processors(filtered)
 
-        self._processor.shutdown()
-        self._processor = None
+        try:
+            self._processor.shutdown()
+        finally:
+            self._processor = None
 
     def instrumentation_dependencies(self) -> Collection[str]:
         return _instruments
@@ -0,0 +1,135 @@
+"""Centralized semantic convention constants for GenAI instrumentation.
+
+Consolidates provider names, operation names, tool types, output types,
+evaluation attributes, and helper maps so other modules can import from
+one place. Keeping strings in one module reduces drift as the spec evolves.
+"""
+
+from __future__ import annotations
+
+
+# Provider names (superset for forward compatibility)
+class GenAIProvider:
+    OPENAI = "openai"
+    GCP_GEN_AI = "gcp.gen_ai"
+    GCP_VERTEX_AI = "gcp.vertex_ai"
+    GCP_GEMINI = "gcp.gemini"
+    ANTHROPIC = "anthropic"
+    COHERE = "cohere"
+    AZURE_AI_INFERENCE = "azure.ai.inference"
+    AZURE_AI_OPENAI = "azure.ai.openai"
+    IBM_WATSONX_AI = "ibm.watsonx.ai"
+    AWS_BEDROCK = "aws.bedrock"
+    PERPLEXITY = "perplexity"
+    X_AI = "x_ai"
+    DEEPSEEK = "deepseek"
+    GROQ = "groq"
+    MISTRAL_AI = "mistral_ai"
+
+    ALL = {
+        OPENAI,
+        GCP_GEN_AI,
+        GCP_VERTEX_AI,
+        GCP_GEMINI,
+        ANTHROPIC,
+        COHERE,
+        AZURE_AI_INFERENCE,
+        AZURE_AI_OPENAI,
+        IBM_WATSONX_AI,
+        AWS_BEDROCK,
+        PERPLEXITY,
+        X_AI,
+        DEEPSEEK,
+        GROQ,
+        MISTRAL_AI,
+    }
+
+
+class GenAIOperationName:
+    CHAT = "chat"
+    GENERATE_CONTENT = "generate_content"
+    TEXT_COMPLETION = "text_completion"
+    EMBEDDINGS = "embeddings"
+    CREATE_AGENT = "create_agent"
+    INVOKE_AGENT = "invoke_agent"
+    EXECUTE_TOOL = "execute_tool"
+    TRANSCRIPTION = "transcription"
+    SPEECH = "speech_generation"
+    GUARDRAIL = "guardrail_check"
+    HANDOFF = "agent_handoff"
+    RESPONSE = "response"  # internal aggregator in current processor
+
+    # Mapping of span data class (lower) to default op (heuristic)
+    CLASS_FALLBACK = {
+        "generationspan": CHAT,
+        "responsespan": RESPONSE,
+        "functionspan": EXECUTE_TOOL,
+        "agentspan": INVOKE_AGENT,
+    }
+
+
+class GenAIOutputType:
+    TEXT = "text"
+    JSON = "json"
+    IMAGE = "image"
+    SPEECH = "speech"
+    # existing custom inference types retained for backward compatibility
+
+
+class GenAIToolType:
+    FUNCTION = "function"
+    EXTENSION = "extension"
+    DATASTORE = "datastore"
+
+    ALL = {FUNCTION, EXTENSION, DATASTORE}
+
+
+class GenAIEvaluationAttributes:
+    NAME = "gen_ai.evaluation.name"
+    SCORE_VALUE = "gen_ai.evaluation.score.value"
+    SCORE_LABEL = "gen_ai.evaluation.score.label"
+    EXPLANATION = "gen_ai.evaluation.explanation"
+
+
+# Complete list of GenAI semantic convention attribute keys
+GEN_AI_PROVIDER_NAME = "gen_ai.provider.name"
+GEN_AI_OPERATION_NAME = "gen_ai.operation.name"
+GEN_AI_REQUEST_MODEL = "gen_ai.request.model"
+GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
+GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
+GEN_AI_REQUEST_TOP_P = "gen_ai.request.top_p"
+GEN_AI_REQUEST_TOP_K = "gen_ai.request.top_k"
+GEN_AI_REQUEST_FREQUENCY_PENALTY = "gen_ai.request.frequency_penalty"
+GEN_AI_REQUEST_PRESENCE_PENALTY = "gen_ai.request.presence_penalty"
+GEN_AI_REQUEST_CHOICE_COUNT = "gen_ai.request.choice.count"
+GEN_AI_REQUEST_STOP_SEQUENCES = "gen_ai.request.stop_sequences"
+GEN_AI_REQUEST_ENCODING_FORMATS = "gen_ai.request.encoding_formats"
+GEN_AI_REQUEST_SEED = "gen_ai.request.seed"
+GEN_AI_RESPONSE_ID = "gen_ai.response.id"
+GEN_AI_RESPONSE_MODEL = "gen_ai.response.model"
+GEN_AI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons"
+GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"
+GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
+GEN_AI_USAGE_TOTAL_TOKENS = "gen_ai.usage.total_tokens"
+GEN_AI_CONVERSATION_ID = "gen_ai.conversation.id"
+GEN_AI_AGENT_ID = "gen_ai.agent.id"
+GEN_AI_AGENT_NAME = "gen_ai.agent.name"
+GEN_AI_AGENT_DESCRIPTION = "gen_ai.agent.description"
+GEN_AI_TOOL_NAME = "gen_ai.tool.name"
+GEN_AI_TOOL_TYPE = "gen_ai.tool.type"
+GEN_AI_TOOL_CALL_ID = "gen_ai.tool.call.id"
+GEN_AI_TOOL_DESCRIPTION = "gen_ai.tool.description"
+GEN_AI_TOOL_CALL_ARGUMENTS = "gen_ai.tool.call.arguments"
+GEN_AI_TOOL_CALL_RESULT = "gen_ai.tool.call.result"
+GEN_AI_TOOL_DEFINITIONS = "gen_ai.tool.definitions"
+GEN_AI_ORCHESTRATOR_AGENT_DEFINITIONS = "gen_ai.orchestrator.agent.definitions"
+GEN_AI_OUTPUT_TYPE = "gen_ai.output.type"
+GEN_AI_SYSTEM_INSTRUCTIONS = "gen_ai.system_instructions"
+GEN_AI_INPUT_MESSAGES = "gen_ai.input.messages"
+GEN_AI_OUTPUT_MESSAGES = "gen_ai.output.messages"
+GEN_AI_GUARDRAIL_NAME = "gen_ai.guardrail.name"
+GEN_AI_GUARDRAIL_TRIGGERED = "gen_ai.guardrail.triggered"
+GEN_AI_HANDOFF_FROM_AGENT = "gen_ai.handoff.from_agent"
+GEN_AI_HANDOFF_TO_AGENT = "gen_ai.handoff.to_agent"
+GEN_AI_EMBEDDINGS_DIMENSION_COUNT = "gen_ai.embeddings.dimension.count"
+GEN_AI_DATA_SOURCE_ID = "gen_ai.data_source.id"