Skip to content

Commit 8a90db1

Browse files
committed
Apply ruff-format after normalization changes
1 parent 353fdb0 commit 8a90db1

File tree

1 file changed

+185
-43
lines changed
  • instrumentation-genai/opentelemetry-instrumentation-openai-agents/src/opentelemetry/instrumentation/openai_agents

1 file changed

+185
-43
lines changed

instrumentation-genai/opentelemetry-instrumentation-openai-agents/src/opentelemetry/instrumentation/openai_agents/genai_semantic_processor.py

Lines changed: 185 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -105,12 +105,7 @@
105105
if TYPE_CHECKING:
106106
pass
107107

108-
# Legacy attribute names for backward compatibility
109-
GEN_AI_SYSTEM_LEGACY = "gen_ai.system"
110-
GEN_AI_PROMPT_LEGACY = "gen_ai.prompt"
111-
GEN_AI_COMPLETION_LEGACY = "gen_ai.completion"
112-
GEN_AI_TOOL_INPUT_LEGACY = "gen_ai.tool.input"
113-
GEN_AI_TOOL_OUTPUT_LEGACY = "gen_ai.tool.output"
108+
# Legacy attributes removed
114109

115110
logger = logging.getLogger(__name__)
116111

@@ -221,7 +216,8 @@ def __init__(
221216
self.system_name = normalize_provider(system_name) or system_name
222217
self.include_sensitive_data = include_sensitive_data
223218
self.base_url = base_url
224-
self.emit_legacy = emit_legacy
219+
# Legacy emission removed; parameter retained for compatibility but unused
220+
self.emit_legacy = False
225221

226222
# Agent information - use init parameters or defaults
227223
self.agent_name = agent_name or "agent"
@@ -418,6 +414,171 @@ def _redacted_text_parts(self) -> list[dict[str, str]]:
418414
"""Return a single redacted text part for system instructions."""
419415
return [{"type": "text", "content": "readacted"}]
420416

417+
def _normalize_messages_to_role_parts(
418+
self, messages: Sequence[Any] | None
419+
) -> list[dict[str, Any]]:
420+
"""Normalize input messages to enforced role+parts schema.
421+
422+
Each message becomes: {"role": <role>, "parts": [ {"type": ..., ...} ]}
423+
Redaction: when include_sensitive_data is False, replace text content,
424+
tool_call arguments, and tool_call_response result with "readacted".
425+
"""
426+
if not messages:
427+
return []
428+
normalized: list[dict[str, Any]] = []
429+
for m in messages:
430+
if not isinstance(m, dict):
431+
# Fallback: treat as user text
432+
normalized.append(
433+
{
434+
"role": "user",
435+
"parts": [
436+
{
437+
"type": "text",
438+
"content": "readacted"
439+
if not self.include_sensitive_data
440+
else str(m),
441+
}
442+
],
443+
}
444+
)
445+
continue
446+
447+
role = m.get("role") or "user"
448+
parts: list[dict[str, Any]] = []
449+
450+
# Existing parts array
451+
if isinstance(m.get("parts"), (list, tuple)):
452+
for p in m["parts"]:
453+
if isinstance(p, dict):
454+
ptype = p.get("type") or "text"
455+
newp: dict[str, Any] = {"type": ptype}
456+
if ptype == "text":
457+
txt = p.get("content") or p.get("text")
458+
newp["content"] = (
459+
"readacted"
460+
if not self.include_sensitive_data
461+
else (txt if isinstance(txt, str) else str(p))
462+
)
463+
elif ptype == "tool_call":
464+
newp["id"] = p.get("id")
465+
newp["name"] = p.get("name")
466+
args = p.get("arguments")
467+
newp["arguments"] = (
468+
"readacted"
469+
if not self.include_sensitive_data
470+
else args
471+
)
472+
elif ptype == "tool_call_response":
473+
newp["id"] = p.get("id") or m.get("tool_call_id")
474+
result = p.get("result") or p.get("content")
475+
newp["result"] = (
476+
"readacted"
477+
if not self.include_sensitive_data
478+
else result
479+
)
480+
else:
481+
newp["content"] = (
482+
"readacted"
483+
if not self.include_sensitive_data
484+
else str(p)
485+
)
486+
parts.append(newp)
487+
else:
488+
parts.append(
489+
{
490+
"type": "text",
491+
"content": "readacted"
492+
if not self.include_sensitive_data
493+
else str(p),
494+
}
495+
)
496+
497+
# OpenAI content
498+
content = m.get("content")
499+
if isinstance(content, str):
500+
parts.append(
501+
{
502+
"type": "text",
503+
"content": "readacted"
504+
if not self.include_sensitive_data
505+
else content,
506+
}
507+
)
508+
elif isinstance(content, (list, tuple)):
509+
for item in content:
510+
if isinstance(item, dict):
511+
itype = item.get("type") or "text"
512+
if itype == "text":
513+
txt = item.get("text") or item.get("content")
514+
parts.append(
515+
{
516+
"type": "text",
517+
"content": "readacted"
518+
if not self.include_sensitive_data
519+
else (
520+
txt
521+
if isinstance(txt, str)
522+
else str(item)
523+
),
524+
}
525+
)
526+
else:
527+
# Fallback for other part types
528+
parts.append(
529+
{
530+
"type": "text",
531+
"content": "readacted"
532+
if not self.include_sensitive_data
533+
else str(item),
534+
}
535+
)
536+
else:
537+
parts.append(
538+
{
539+
"type": "text",
540+
"content": "readacted"
541+
if not self.include_sensitive_data
542+
else str(item),
543+
}
544+
)
545+
546+
# Assistant tool_calls
547+
if role == "assistant" and isinstance(
548+
m.get("tool_calls"), (list, tuple)
549+
):
550+
for tc in m["tool_calls"]:
551+
if not isinstance(tc, dict):
552+
continue
553+
p = {"type": "tool_call"}
554+
p["id"] = tc.get("id")
555+
fn = tc.get("function") or {}
556+
if isinstance(fn, dict):
557+
p["name"] = fn.get("name")
558+
args = fn.get("arguments")
559+
p["arguments"] = (
560+
"readacted"
561+
if not self.include_sensitive_data
562+
else args
563+
)
564+
parts.append(p)
565+
566+
# Tool call response
567+
if role in {"tool", "function"}:
568+
p = {"type": "tool_call_response"}
569+
p["id"] = m.get("tool_call_id") or m.get("id")
570+
result = m.get("result") or m.get("content")
571+
p["result"] = (
572+
"readacted" if not self.include_sensitive_data else result
573+
)
574+
parts.append(p)
575+
576+
normalized.append(
577+
{"role": role, "parts": parts or self._redacted_text_parts()}
578+
)
579+
580+
return normalized
581+
421582
def _infer_output_type(self, span_data: Any) -> str:
422583
"""Infer gen_ai.output.type for multiple span kinds."""
423584
if isinstance(span_data, FunctionSpanData):
@@ -483,8 +644,7 @@ def on_trace_start(self, trace: Trace) -> None:
483644
attributes = {
484645
GEN_AI_PROVIDER_NAME: self.system_name,
485646
}
486-
if self.emit_legacy:
487-
attributes[GEN_AI_SYSTEM_LEGACY] = self.system_name
647+
# Legacy emission removed
488648

489649
# Add configured agent and server attributes
490650
if self.agent_name:
@@ -544,8 +704,7 @@ def on_span_start(self, span: Span[Any]) -> None:
544704
GEN_AI_PROVIDER_NAME: self.system_name,
545705
GEN_AI_OPERATION_NAME: operation_name,
546706
}
547-
if self.emit_legacy:
548-
attributes[GEN_AI_SYSTEM_LEGACY] = self.system_name
707+
# Legacy emission removed
549708

550709
# Add configured agent and server attributes
551710
if self.agent_name:
@@ -682,8 +841,7 @@ def _extract_genai_attributes(
682841

683842
# Base attributes
684843
yield GEN_AI_PROVIDER_NAME, self.system_name
685-
if self.emit_legacy:
686-
yield GEN_AI_SYSTEM_LEGACY, self.system_name
844+
# Legacy emission removed
687845

688846
# Add configured agent attributes (always include when set)
689847
if self.agent_name:
@@ -781,14 +939,12 @@ def _get_attributes_from_generation_span_data(
781939

782940
# Sensitive data capture
783941
if self.include_sensitive_data:
784-
# Input messages
942+
# Input messages (normalized to role+parts)
785943
if self._capture_messages and span_data.input:
786-
yield GEN_AI_INPUT_MESSAGES, safe_json_dumps(span_data.input)
787-
if self.emit_legacy:
788-
yield (
789-
GEN_AI_PROMPT_LEGACY,
790-
safe_json_dumps(span_data.input),
791-
)
944+
normalized_in = self._normalize_messages_to_role_parts(
945+
span_data.input
946+
)
947+
yield GEN_AI_INPUT_MESSAGES, safe_json_dumps(normalized_in)
792948

793949
# System instructions
794950
if self._capture_system_instructions and span_data.input:
@@ -804,14 +960,9 @@ def _get_attributes_from_generation_span_data(
804960
safe_json_dumps(sys_instr),
805961
)
806962

807-
# Output messages
963+
# Output messages (leave as-is; not normalized here)
808964
if self._capture_messages and span_data.output:
809965
yield GEN_AI_OUTPUT_MESSAGES, safe_json_dumps(span_data.output)
810-
if self.emit_legacy:
811-
yield (
812-
GEN_AI_COMPLETION_LEGACY,
813-
safe_json_dumps(span_data.output),
814-
)
815966

816967
# Output type
817968
yield (
@@ -920,8 +1071,7 @@ def _get_attributes_from_function_span_data(
9201071
else str(span_data.input)
9211072
)
9221073
yield GEN_AI_TOOL_CALL_ARGUMENTS, arg_val
923-
if self.emit_legacy:
924-
yield GEN_AI_TOOL_INPUT_LEGACY, arg_val
1074+
# Legacy emission removed
9251075

9261076
if span_data.output is not None:
9271077
res_val = (
@@ -930,8 +1080,7 @@ def _get_attributes_from_function_span_data(
9301080
else str(span_data.output)
9311081
)
9321082
yield GEN_AI_TOOL_CALL_RESULT, res_val
933-
if self.emit_legacy:
934-
yield GEN_AI_TOOL_OUTPUT_LEGACY, res_val
1083+
# Legacy emission removed
9351084

9361085
yield (
9371086
GEN_AI_OUTPUT_TYPE,
@@ -995,14 +1144,12 @@ def _get_attributes_from_response_span_data(
9951144

9961145
# Input/output messages
9971146
if self.include_sensitive_data:
998-
# Input messages
1147+
# Input messages (normalized to role+parts)
9991148
if self._capture_messages and span_data.input:
1000-
yield GEN_AI_INPUT_MESSAGES, safe_json_dumps(span_data.input)
1001-
if self.emit_legacy:
1002-
yield (
1003-
GEN_AI_PROMPT_LEGACY,
1004-
safe_json_dumps(span_data.input),
1005-
)
1149+
normalized_in = self._normalize_messages_to_role_parts(
1150+
span_data.input
1151+
)
1152+
yield GEN_AI_INPUT_MESSAGES, safe_json_dumps(normalized_in)
10061153

10071154
# System instructions
10081155
if self._capture_system_instructions and span_data.input:
@@ -1018,7 +1165,7 @@ def _get_attributes_from_response_span_data(
10181165
safe_json_dumps(sys_instr),
10191166
)
10201167

1021-
# Output messages
1168+
# Output messages (leave as-is; not normalized here)
10221169
if self._capture_messages:
10231170
output_messages = getattr(
10241171
getattr(span_data, "response", None), "output", None
@@ -1039,11 +1186,6 @@ def _get_attributes_from_response_span_data(
10391186
GEN_AI_OUTPUT_MESSAGES,
10401187
safe_json_dumps(collected),
10411188
)
1042-
if self.emit_legacy:
1043-
yield (
1044-
GEN_AI_COMPLETION_LEGACY,
1045-
safe_json_dumps(collected),
1046-
)
10471189

10481190
yield (
10491191
GEN_AI_OUTPUT_TYPE,

0 commit comments

Comments
 (0)