Skip to content

Commit 353fdb0

Browse files
committed
GenAI: emit system_instructions as typed text objects; add redaction mode returning [{"type":"text","content":"readacted"}] when sensitive capture disabled; add TODO to restore processors on uninstrument
1 parent 746c934 commit 353fdb0

File tree

2 files changed

+65
-8
lines changed

2 files changed

+65
-8
lines changed

instrumentation-genai/opentelemetry-instrumentation-openai-agents/src/opentelemetry/instrumentation/openai_agents/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def _instrument(self, **kwargs):
115115
def _uninstrument(self, **kwargs):
116116
"""Uninstruments the OpenAI library for agent frameworks."""
117117
# No-op: optional processor registry may not be present.
118+
# TODO: maintain the old list of processors and restore it upon _uninstrument.
118119
return
119120

120121
def instrumentation_dependencies(self) -> Collection[str]:

instrumentation-genai/opentelemetry-instrumentation-openai-agents/src/opentelemetry/instrumentation/openai_agents/genai_semantic_processor.py

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -357,21 +357,67 @@ def _record_metrics(
357357

358358
def _collect_system_instructions(
359359
self, messages: Sequence[Any] | None
360-
) -> list[str]:
361-
"""Return list of system/ai role message contents."""
360+
) -> list[dict[str, str]]:
361+
"""Return system/ai role instructions as typed text objects.
362+
363+
Enforces format: [{"type": "text", "content": "..."}].
364+
Handles message content that may be a string, list of parts,
365+
or a dict with text/content fields.
366+
"""
362367
if not messages:
363368
return []
364-
out: list[str] = []
369+
out: list[dict[str, str]] = []
365370
for m in messages:
366371
if not isinstance(m, dict):
367372
continue
368373
role = m.get("role")
369374
if role in {"system", "ai"}:
370375
content = m.get("content")
371-
if content is not None:
372-
out.append(str(content))
376+
out.extend(self._normalize_to_text_parts(content))
373377
return out
374378

379+
def _normalize_to_text_parts(self, content: Any) -> list[dict[str, str]]:
380+
"""Normalize arbitrary content into typed text parts.
381+
382+
- String -> [{type: text, content: <string>}]
383+
- List/Tuple -> map each item to a text part (string/dict supported)
384+
- Dict -> use 'text' or 'content' field when available; else str(dict)
385+
- Other -> str(value)
386+
"""
387+
parts: list[dict[str, str]] = []
388+
if content is None:
389+
return parts
390+
if isinstance(content, str):
391+
parts.append({"type": "text", "content": content})
392+
return parts
393+
if isinstance(content, (list, tuple)):
394+
for item in content:
395+
if isinstance(item, str):
396+
parts.append({"type": "text", "content": item})
397+
elif isinstance(item, dict):
398+
txt = item.get("text") or item.get("content")
399+
if isinstance(txt, str) and txt:
400+
parts.append({"type": "text", "content": txt})
401+
else:
402+
parts.append({"type": "text", "content": str(item)})
403+
else:
404+
parts.append({"type": "text", "content": str(item)})
405+
return parts
406+
if isinstance(content, dict):
407+
txt = content.get("text") or content.get("content")
408+
if isinstance(txt, str) and txt:
409+
parts.append({"type": "text", "content": txt})
410+
else:
411+
parts.append({"type": "text", "content": str(content)})
412+
return parts
413+
# Fallback for other types
414+
parts.append({"type": "text", "content": str(content)})
415+
return parts
416+
417+
def _redacted_text_parts(self) -> list[dict[str, str]]:
418+
"""Return a single redacted text part for system instructions."""
419+
return [{"type": "text", "content": "readacted"}]
420+
375421
def _infer_output_type(self, span_data: Any) -> str:
376422
"""Infer gen_ai.output.type for multiple span kinds."""
377423
if isinstance(span_data, FunctionSpanData):
@@ -746,7 +792,12 @@ def _get_attributes_from_generation_span_data(
746792

747793
# System instructions
748794
if self._capture_system_instructions and span_data.input:
749-
sys_instr = self._collect_system_instructions(span_data.input)
795+
if self.include_sensitive_data:
796+
sys_instr = self._collect_system_instructions(
797+
span_data.input
798+
)
799+
else:
800+
sys_instr = self._redacted_text_parts()
750801
if sys_instr:
751802
yield (
752803
GEN_AI_SYSTEM_INSTRUCTIONS,
@@ -808,7 +859,7 @@ def _get_attributes_from_agent_span_data(
808859
try:
809860
defs = span_data.agent_definitions
810861
if isinstance(defs, (list, tuple)):
811-
collected: list[str] = []
862+
collected: list[dict[str, str]] = []
812863
for d in defs:
813864
if isinstance(d, dict):
814865
msgs = d.get("messages") or d.get(
@@ -955,7 +1006,12 @@ def _get_attributes_from_response_span_data(
9551006

9561007
# System instructions
9571008
if self._capture_system_instructions and span_data.input:
958-
sys_instr = self._collect_system_instructions(span_data.input)
1009+
if self.include_sensitive_data:
1010+
sys_instr = self._collect_system_instructions(
1011+
span_data.input
1012+
)
1013+
else:
1014+
sys_instr = self._redacted_text_parts()
9591015
if sys_instr:
9601016
yield (
9611017
GEN_AI_SYSTEM_INSTRUCTIONS,

0 commit comments

Comments
 (0)