Skip to content

Commit f63cbbf

Browse files
committed
Capture agent content via child span aggregation
1 parent 3bbc833 commit f63cbbf

File tree

6 files changed

+722
-132
lines changed

6 files changed

+722
-132
lines changed
Lines changed: 141 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,74 +1,133 @@
1-
# Copyright The OpenTelemetry Authors
2-
#
3-
# Licensed under the Apache License, Version 2.0 (the "License");
4-
# you may not use this file except in compliance with the License.
5-
# You may obtain a copy of the License at
6-
#
7-
# http://www.apache.org/licenses/LICENSE-2.0
8-
#
9-
# Unless required by applicable law or agreed to in writing, software
10-
# distributed under the License is distributed on an "AS IS" BASIS,
11-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12-
# See the License for the specific language governing permissions and
13-
# limitations under the License.
14-
151
"""OpenAI Agents instrumentation for OpenTelemetry."""
162

173
from __future__ import annotations
184

5+
import importlib
6+
import logging
197
import os
20-
from typing import Collection, Protocol
21-
22-
from agents import tracing
23-
from agents.tracing.processor_interface import TracingProcessor
8+
from typing import Any, Collection
249

10+
from opentelemetry._events import get_event_logger
2511
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
2612
from opentelemetry.semconv._incubating.attributes import (
2713
gen_ai_attributes as GenAI,
2814
)
2915
from opentelemetry.semconv.schemas import Schemas
3016
from opentelemetry.trace import get_tracer
3117

18+
from .constants import (
19+
GenAIEvaluationAttributes,
20+
GenAIOperationName,
21+
GenAIOutputType,
22+
GenAIProvider,
23+
GenAIToolType,
24+
)
25+
from .genai_semantic_processor import (
26+
ContentCaptureMode,
27+
GenAISemanticProcessor,
28+
)
3229
from .package import _instruments
33-
from .span_processor import _OpenAIAgentsSpanProcessor
34-
from .version import __version__ # noqa: F401
3530

31+
__all__ = [
32+
"OpenAIAgentsInstrumentor",
33+
"GenAIProvider",
34+
"GenAIOperationName",
35+
"GenAIToolType",
36+
"GenAIOutputType",
37+
"GenAIEvaluationAttributes",
38+
]
3639

37-
class _ProcessorHolder(Protocol):
38-
_processors: Collection[TracingProcessor]
40+
logger = logging.getLogger(__name__)
3941

42+
_CONTENT_CAPTURE_ENV = "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT"
43+
_SYSTEM_OVERRIDE_ENV = "OTEL_INSTRUMENTATION_OPENAI_AGENTS_SYSTEM"
44+
_CAPTURE_CONTENT_ENV = "OTEL_INSTRUMENTATION_OPENAI_AGENTS_CAPTURE_CONTENT"
45+
_CAPTURE_METRICS_ENV = "OTEL_INSTRUMENTATION_OPENAI_AGENTS_CAPTURE_METRICS"
4046

41-
class _TraceProviderLike(Protocol):
42-
_multi_processor: _ProcessorHolder
4347

48+
def _load_tracing_module(): # pragma: no cover - exercised via tests
49+
return importlib.import_module("agents.tracing")
4450

45-
__all__ = ["OpenAIAgentsInstrumentor"]
4651

52+
def _get_registered_processors(provider) -> list:
53+
multi = getattr(provider, "_multi_processor", None)
54+
processors = getattr(multi, "_processors", ())
55+
return list(processors)
4756

48-
def _resolve_system(_: str | None) -> str:
49-
# OpenAI spans must report provider name "openai" per semantic conventions.
50-
return GenAI.GenAiSystemValues.OPENAI.value
5157

58+
def _resolve_system(value: str | None) -> str:
59+
if not value:
60+
return GenAI.GenAiSystemValues.OPENAI.value
5261

53-
def _get_registered_processors(
54-
provider: _TraceProviderLike,
55-
) -> list[TracingProcessor]:
56-
"""Return tracing processors registered on the OpenAI Agents trace provider.
62+
normalized = value.strip().lower()
63+
for member in GenAI.GenAiSystemValues:
64+
if normalized == member.value:
65+
return member.value
66+
if normalized == member.name.lower():
67+
return member.value
68+
return value
5769

58-
The provider exposes a private `_multi_processor` attribute with a `_processors`
59-
collection that stores the currently registered processors in execution order.
60-
"""
61-
multi = getattr(provider, "_multi_processor", None)
62-
processors = getattr(multi, "_processors", ())
63-
return list(processors)
70+
71+
def _resolve_content_mode(value: Any) -> ContentCaptureMode:
72+
if isinstance(value, ContentCaptureMode):
73+
return value
74+
if isinstance(value, bool):
75+
return (
76+
ContentCaptureMode.SPAN_AND_EVENT
77+
if value
78+
else ContentCaptureMode.NO_CONTENT
79+
)
80+
81+
if value is None:
82+
return ContentCaptureMode.SPAN_AND_EVENT
83+
84+
text = str(value).strip().lower()
85+
if not text:
86+
return ContentCaptureMode.SPAN_AND_EVENT
87+
88+
mapping = {
89+
"span_only": ContentCaptureMode.SPAN_ONLY,
90+
"span-only": ContentCaptureMode.SPAN_ONLY,
91+
"span": ContentCaptureMode.SPAN_ONLY,
92+
"event_only": ContentCaptureMode.EVENT_ONLY,
93+
"event-only": ContentCaptureMode.EVENT_ONLY,
94+
"event": ContentCaptureMode.EVENT_ONLY,
95+
"span_and_event": ContentCaptureMode.SPAN_AND_EVENT,
96+
"span-and-event": ContentCaptureMode.SPAN_AND_EVENT,
97+
"span_and_events": ContentCaptureMode.SPAN_AND_EVENT,
98+
"all": ContentCaptureMode.SPAN_AND_EVENT,
99+
"true": ContentCaptureMode.SPAN_AND_EVENT,
100+
"1": ContentCaptureMode.SPAN_AND_EVENT,
101+
"yes": ContentCaptureMode.SPAN_AND_EVENT,
102+
"no_content": ContentCaptureMode.NO_CONTENT,
103+
"false": ContentCaptureMode.NO_CONTENT,
104+
"0": ContentCaptureMode.NO_CONTENT,
105+
"no": ContentCaptureMode.NO_CONTENT,
106+
"none": ContentCaptureMode.NO_CONTENT,
107+
}
108+
109+
return mapping.get(text, ContentCaptureMode.SPAN_AND_EVENT)
110+
111+
112+
def _resolve_bool(value: Any, default: bool) -> bool:
113+
if value is None:
114+
return default
115+
if isinstance(value, bool):
116+
return value
117+
text = str(value).strip().lower()
118+
if text in {"true", "1", "yes", "on"}:
119+
return True
120+
if text in {"false", "0", "no", "off"}:
121+
return False
122+
return default
64123

65124

66125
class OpenAIAgentsInstrumentor(BaseInstrumentor):
67-
"""Instrumentation that bridges OpenAI Agents tracing to OpenTelemetry spans."""
126+
"""Instrumentation that bridges OpenAI Agents tracing to OpenTelemetry."""
68127

69128
def __init__(self) -> None:
70129
super().__init__()
71-
self._processor: _OpenAIAgentsSpanProcessor | None = None
130+
self._processor: GenAISemanticProcessor | None = None
72131

73132
def _instrument(self, **kwargs) -> None:
74133
if self._processor is not None:
@@ -82,17 +141,48 @@ def _instrument(self, **kwargs) -> None:
82141
schema_url=Schemas.V1_28_0.value,
83142
)
84143

85-
system = _resolve_system(kwargs.get("system"))
86-
agent_name_override = kwargs.get("agent_name") or os.getenv(
87-
"OTEL_GENAI_AGENT_NAME"
144+
event_logger_provider = kwargs.get("event_logger_provider")
145+
event_logger = get_event_logger(
146+
__name__,
147+
"",
148+
schema_url=Schemas.V1_28_0.value,
149+
event_logger_provider=event_logger_provider,
150+
)
151+
152+
system_override = kwargs.get("system") or os.getenv(
153+
_SYSTEM_OVERRIDE_ENV
88154
)
155+
system = _resolve_system(system_override)
156+
157+
content_override = kwargs.get("capture_message_content")
158+
if content_override is None:
159+
content_override = os.getenv(_CONTENT_CAPTURE_ENV) or os.getenv(
160+
_CAPTURE_CONTENT_ENV
161+
)
162+
content_mode = _resolve_content_mode(content_override)
163+
164+
metrics_override = kwargs.get("capture_metrics")
165+
if metrics_override is None:
166+
metrics_override = os.getenv(_CAPTURE_METRICS_ENV)
167+
metrics_enabled = _resolve_bool(metrics_override, default=True)
89168

90-
processor = _OpenAIAgentsSpanProcessor(
169+
processor = GenAISemanticProcessor(
91170
tracer=tracer,
92-
system=system,
93-
agent_name_override=agent_name_override,
171+
event_logger=event_logger,
172+
system_name=system,
173+
include_sensitive_data=content_mode
174+
!= ContentCaptureMode.NO_CONTENT,
175+
content_mode=content_mode,
176+
metrics_enabled=metrics_enabled,
177+
agent_name=kwargs.get("agent_name"),
178+
agent_id=kwargs.get("agent_id"),
179+
agent_description=kwargs.get("agent_description"),
180+
base_url=kwargs.get("base_url"),
181+
server_address=kwargs.get("server_address"),
182+
server_port=kwargs.get("server_port"),
94183
)
95184

185+
tracing = _load_tracing_module()
96186
provider = tracing.get_trace_provider()
97187
existing = _get_registered_processors(provider)
98188
provider.set_processors([*existing, processor])
@@ -102,13 +192,16 @@ def _uninstrument(self, **kwargs) -> None:
102192
if self._processor is None:
103193
return
104194

195+
tracing = _load_tracing_module()
105196
provider = tracing.get_trace_provider()
106197
current = _get_registered_processors(provider)
107198
filtered = [proc for proc in current if proc is not self._processor]
108199
provider.set_processors(filtered)
109200

110-
self._processor.shutdown()
111-
self._processor = None
201+
try:
202+
self._processor.shutdown()
203+
finally:
204+
self._processor = None
112205

113206
def instrumentation_dependencies(self) -> Collection[str]:
114207
return _instruments
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
"""Centralized semantic convention constants for GenAI instrumentation.
2+
3+
Consolidates provider names, operation names, tool types, output types,
4+
evaluation attributes, and helper maps so other modules can import from
5+
one place. Keeping strings in one module reduces drift as the spec evolves.
6+
"""
7+
8+
from __future__ import annotations
9+
10+
11+
# Provider names (superset for forward compatibility)
12+
class GenAIProvider:
13+
OPENAI = "openai"
14+
GCP_GEN_AI = "gcp.gen_ai"
15+
GCP_VERTEX_AI = "gcp.vertex_ai"
16+
GCP_GEMINI = "gcp.gemini"
17+
ANTHROPIC = "anthropic"
18+
COHERE = "cohere"
19+
AZURE_AI_INFERENCE = "azure.ai.inference"
20+
AZURE_AI_OPENAI = "azure.ai.openai"
21+
IBM_WATSONX_AI = "ibm.watsonx.ai"
22+
AWS_BEDROCK = "aws.bedrock"
23+
PERPLEXITY = "perplexity"
24+
X_AI = "x_ai"
25+
DEEPSEEK = "deepseek"
26+
GROQ = "groq"
27+
MISTRAL_AI = "mistral_ai"
28+
29+
ALL = {
30+
OPENAI,
31+
GCP_GEN_AI,
32+
GCP_VERTEX_AI,
33+
GCP_GEMINI,
34+
ANTHROPIC,
35+
COHERE,
36+
AZURE_AI_INFERENCE,
37+
AZURE_AI_OPENAI,
38+
IBM_WATSONX_AI,
39+
AWS_BEDROCK,
40+
PERPLEXITY,
41+
X_AI,
42+
DEEPSEEK,
43+
GROQ,
44+
MISTRAL_AI,
45+
}
46+
47+
48+
class GenAIOperationName:
49+
CHAT = "chat"
50+
GENERATE_CONTENT = "generate_content"
51+
TEXT_COMPLETION = "text_completion"
52+
EMBEDDINGS = "embeddings"
53+
CREATE_AGENT = "create_agent"
54+
INVOKE_AGENT = "invoke_agent"
55+
EXECUTE_TOOL = "execute_tool"
56+
TRANSCRIPTION = "transcription"
57+
SPEECH = "speech_generation"
58+
GUARDRAIL = "guardrail_check"
59+
HANDOFF = "agent_handoff"
60+
RESPONSE = "response" # internal aggregator in current processor
61+
62+
# Mapping of span data class (lower) to default op (heuristic)
63+
CLASS_FALLBACK = {
64+
"generationspan": CHAT,
65+
"responsespan": RESPONSE,
66+
"functionspan": EXECUTE_TOOL,
67+
"agentspan": INVOKE_AGENT,
68+
}
69+
70+
71+
class GenAIOutputType:
72+
TEXT = "text"
73+
JSON = "json"
74+
IMAGE = "image"
75+
SPEECH = "speech"
76+
# existing custom inference types retained for backward compatibility
77+
78+
79+
class GenAIToolType:
80+
FUNCTION = "function"
81+
EXTENSION = "extension"
82+
DATASTORE = "datastore"
83+
84+
ALL = {FUNCTION, EXTENSION, DATASTORE}
85+
86+
87+
class GenAIEvaluationAttributes:
88+
NAME = "gen_ai.evaluation.name"
89+
SCORE_VALUE = "gen_ai.evaluation.score.value"
90+
SCORE_LABEL = "gen_ai.evaluation.score.label"
91+
EXPLANATION = "gen_ai.evaluation.explanation"
92+
93+
94+
# Complete list of GenAI semantic convention attribute keys
95+
GEN_AI_PROVIDER_NAME = "gen_ai.provider.name"
96+
GEN_AI_OPERATION_NAME = "gen_ai.operation.name"
97+
GEN_AI_REQUEST_MODEL = "gen_ai.request.model"
98+
GEN_AI_REQUEST_MAX_TOKENS = "gen_ai.request.max_tokens"
99+
GEN_AI_REQUEST_TEMPERATURE = "gen_ai.request.temperature"
100+
GEN_AI_REQUEST_TOP_P = "gen_ai.request.top_p"
101+
GEN_AI_REQUEST_TOP_K = "gen_ai.request.top_k"
102+
GEN_AI_REQUEST_FREQUENCY_PENALTY = "gen_ai.request.frequency_penalty"
103+
GEN_AI_REQUEST_PRESENCE_PENALTY = "gen_ai.request.presence_penalty"
104+
GEN_AI_REQUEST_CHOICE_COUNT = "gen_ai.request.choice.count"
105+
GEN_AI_REQUEST_STOP_SEQUENCES = "gen_ai.request.stop_sequences"
106+
GEN_AI_REQUEST_ENCODING_FORMATS = "gen_ai.request.encoding_formats"
107+
GEN_AI_REQUEST_SEED = "gen_ai.request.seed"
108+
GEN_AI_RESPONSE_ID = "gen_ai.response.id"
109+
GEN_AI_RESPONSE_MODEL = "gen_ai.response.model"
110+
GEN_AI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons"
111+
GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"
112+
GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
113+
GEN_AI_USAGE_TOTAL_TOKENS = "gen_ai.usage.total_tokens"
114+
GEN_AI_CONVERSATION_ID = "gen_ai.conversation.id"
115+
GEN_AI_AGENT_ID = "gen_ai.agent.id"
116+
GEN_AI_AGENT_NAME = "gen_ai.agent.name"
117+
GEN_AI_AGENT_DESCRIPTION = "gen_ai.agent.description"
118+
GEN_AI_TOOL_NAME = "gen_ai.tool.name"
119+
GEN_AI_TOOL_TYPE = "gen_ai.tool.type"
120+
GEN_AI_TOOL_CALL_ID = "gen_ai.tool.call.id"
121+
GEN_AI_TOOL_DESCRIPTION = "gen_ai.tool.description"
122+
GEN_AI_TOOL_CALL_ARGUMENTS = "gen_ai.tool.call.arguments"
123+
GEN_AI_TOOL_CALL_RESULT = "gen_ai.tool.call.result"
124+
GEN_AI_TOOL_DEFINITIONS = "gen_ai.tool.definitions"
125+
GEN_AI_ORCHESTRATOR_AGENT_DEFINITIONS = "gen_ai.orchestrator.agent.definitions"
126+
GEN_AI_OUTPUT_TYPE = "gen_ai.output.type"
127+
GEN_AI_SYSTEM_INSTRUCTIONS = "gen_ai.system_instructions"
128+
GEN_AI_INPUT_MESSAGES = "gen_ai.input.messages"
129+
GEN_AI_OUTPUT_MESSAGES = "gen_ai.output.messages"
130+
GEN_AI_GUARDRAIL_NAME = "gen_ai.guardrail.name"
131+
GEN_AI_GUARDRAIL_TRIGGERED = "gen_ai.guardrail.triggered"
132+
GEN_AI_HANDOFF_FROM_AGENT = "gen_ai.handoff.from_agent"
133+
GEN_AI_HANDOFF_TO_AGENT = "gen_ai.handoff.to_agent"
134+
GEN_AI_EMBEDDINGS_DIMENSION_COUNT = "gen_ai.embeddings.dimension.count"
135+
GEN_AI_DATA_SOURCE_ID = "gen_ai.data_source.id"

0 commit comments

Comments
 (0)