105
105
if TYPE_CHECKING :
106
106
pass
107
107
108
- # Legacy attribute names for backward compatibility
109
- GEN_AI_SYSTEM_LEGACY = "gen_ai.system"
110
- GEN_AI_PROMPT_LEGACY = "gen_ai.prompt"
111
- GEN_AI_COMPLETION_LEGACY = "gen_ai.completion"
112
- GEN_AI_TOOL_INPUT_LEGACY = "gen_ai.tool.input"
113
- GEN_AI_TOOL_OUTPUT_LEGACY = "gen_ai.tool.output"
108
+ # Legacy attributes removed
114
109
115
110
logger = logging .getLogger (__name__ )
116
111
@@ -221,7 +216,8 @@ def __init__(
221
216
self .system_name = normalize_provider (system_name ) or system_name
222
217
self .include_sensitive_data = include_sensitive_data
223
218
self .base_url = base_url
224
- self .emit_legacy = emit_legacy
219
+ # Legacy emission removed; parameter retained for compatibility but unused
220
+ self .emit_legacy = False
225
221
226
222
# Agent information - use init parameters or defaults
227
223
self .agent_name = agent_name or "agent"
@@ -418,6 +414,171 @@ def _redacted_text_parts(self) -> list[dict[str, str]]:
418
414
"""Return a single redacted text part for system instructions."""
419
415
return [{"type" : "text" , "content" : "readacted" }]
420
416
417
+ def _normalize_messages_to_role_parts (
418
+ self , messages : Sequence [Any ] | None
419
+ ) -> list [dict [str , Any ]]:
420
+ """Normalize input messages to enforced role+parts schema.
421
+
422
+ Each message becomes: {"role": <role>, "parts": [ {"type": ..., ...} ]}
423
+ Redaction: when include_sensitive_data is False, replace text content,
424
+ tool_call arguments, and tool_call_response result with "readacted".
425
+ """
426
+ if not messages :
427
+ return []
428
+ normalized : list [dict [str , Any ]] = []
429
+ for m in messages :
430
+ if not isinstance (m , dict ):
431
+ # Fallback: treat as user text
432
+ normalized .append (
433
+ {
434
+ "role" : "user" ,
435
+ "parts" : [
436
+ {
437
+ "type" : "text" ,
438
+ "content" : "readacted"
439
+ if not self .include_sensitive_data
440
+ else str (m ),
441
+ }
442
+ ],
443
+ }
444
+ )
445
+ continue
446
+
447
+ role = m .get ("role" ) or "user"
448
+ parts : list [dict [str , Any ]] = []
449
+
450
+ # Existing parts array
451
+ if isinstance (m .get ("parts" ), (list , tuple )):
452
+ for p in m ["parts" ]:
453
+ if isinstance (p , dict ):
454
+ ptype = p .get ("type" ) or "text"
455
+ newp : dict [str , Any ] = {"type" : ptype }
456
+ if ptype == "text" :
457
+ txt = p .get ("content" ) or p .get ("text" )
458
+ newp ["content" ] = (
459
+ "readacted"
460
+ if not self .include_sensitive_data
461
+ else (txt if isinstance (txt , str ) else str (p ))
462
+ )
463
+ elif ptype == "tool_call" :
464
+ newp ["id" ] = p .get ("id" )
465
+ newp ["name" ] = p .get ("name" )
466
+ args = p .get ("arguments" )
467
+ newp ["arguments" ] = (
468
+ "readacted"
469
+ if not self .include_sensitive_data
470
+ else args
471
+ )
472
+ elif ptype == "tool_call_response" :
473
+ newp ["id" ] = p .get ("id" ) or m .get ("tool_call_id" )
474
+ result = p .get ("result" ) or p .get ("content" )
475
+ newp ["result" ] = (
476
+ "readacted"
477
+ if not self .include_sensitive_data
478
+ else result
479
+ )
480
+ else :
481
+ newp ["content" ] = (
482
+ "readacted"
483
+ if not self .include_sensitive_data
484
+ else str (p )
485
+ )
486
+ parts .append (newp )
487
+ else :
488
+ parts .append (
489
+ {
490
+ "type" : "text" ,
491
+ "content" : "readacted"
492
+ if not self .include_sensitive_data
493
+ else str (p ),
494
+ }
495
+ )
496
+
497
+ # OpenAI content
498
+ content = m .get ("content" )
499
+ if isinstance (content , str ):
500
+ parts .append (
501
+ {
502
+ "type" : "text" ,
503
+ "content" : "readacted"
504
+ if not self .include_sensitive_data
505
+ else content ,
506
+ }
507
+ )
508
+ elif isinstance (content , (list , tuple )):
509
+ for item in content :
510
+ if isinstance (item , dict ):
511
+ itype = item .get ("type" ) or "text"
512
+ if itype == "text" :
513
+ txt = item .get ("text" ) or item .get ("content" )
514
+ parts .append (
515
+ {
516
+ "type" : "text" ,
517
+ "content" : "readacted"
518
+ if not self .include_sensitive_data
519
+ else (
520
+ txt
521
+ if isinstance (txt , str )
522
+ else str (item )
523
+ ),
524
+ }
525
+ )
526
+ else :
527
+ # Fallback for other part types
528
+ parts .append (
529
+ {
530
+ "type" : "text" ,
531
+ "content" : "readacted"
532
+ if not self .include_sensitive_data
533
+ else str (item ),
534
+ }
535
+ )
536
+ else :
537
+ parts .append (
538
+ {
539
+ "type" : "text" ,
540
+ "content" : "readacted"
541
+ if not self .include_sensitive_data
542
+ else str (item ),
543
+ }
544
+ )
545
+
546
+ # Assistant tool_calls
547
+ if role == "assistant" and isinstance (
548
+ m .get ("tool_calls" ), (list , tuple )
549
+ ):
550
+ for tc in m ["tool_calls" ]:
551
+ if not isinstance (tc , dict ):
552
+ continue
553
+ p = {"type" : "tool_call" }
554
+ p ["id" ] = tc .get ("id" )
555
+ fn = tc .get ("function" ) or {}
556
+ if isinstance (fn , dict ):
557
+ p ["name" ] = fn .get ("name" )
558
+ args = fn .get ("arguments" )
559
+ p ["arguments" ] = (
560
+ "readacted"
561
+ if not self .include_sensitive_data
562
+ else args
563
+ )
564
+ parts .append (p )
565
+
566
+ # Tool call response
567
+ if role in {"tool" , "function" }:
568
+ p = {"type" : "tool_call_response" }
569
+ p ["id" ] = m .get ("tool_call_id" ) or m .get ("id" )
570
+ result = m .get ("result" ) or m .get ("content" )
571
+ p ["result" ] = (
572
+ "readacted" if not self .include_sensitive_data else result
573
+ )
574
+ parts .append (p )
575
+
576
+ normalized .append (
577
+ {"role" : role , "parts" : parts or self ._redacted_text_parts ()}
578
+ )
579
+
580
+ return normalized
581
+
421
582
def _infer_output_type (self , span_data : Any ) -> str :
422
583
"""Infer gen_ai.output.type for multiple span kinds."""
423
584
if isinstance (span_data , FunctionSpanData ):
@@ -483,8 +644,7 @@ def on_trace_start(self, trace: Trace) -> None:
483
644
attributes = {
484
645
GEN_AI_PROVIDER_NAME : self .system_name ,
485
646
}
486
- if self .emit_legacy :
487
- attributes [GEN_AI_SYSTEM_LEGACY ] = self .system_name
647
+ # Legacy emission removed
488
648
489
649
# Add configured agent and server attributes
490
650
if self .agent_name :
@@ -544,8 +704,7 @@ def on_span_start(self, span: Span[Any]) -> None:
544
704
GEN_AI_PROVIDER_NAME : self .system_name ,
545
705
GEN_AI_OPERATION_NAME : operation_name ,
546
706
}
547
- if self .emit_legacy :
548
- attributes [GEN_AI_SYSTEM_LEGACY ] = self .system_name
707
+ # Legacy emission removed
549
708
550
709
# Add configured agent and server attributes
551
710
if self .agent_name :
@@ -682,8 +841,7 @@ def _extract_genai_attributes(
682
841
683
842
# Base attributes
684
843
yield GEN_AI_PROVIDER_NAME , self .system_name
685
- if self .emit_legacy :
686
- yield GEN_AI_SYSTEM_LEGACY , self .system_name
844
+ # Legacy emission removed
687
845
688
846
# Add configured agent attributes (always include when set)
689
847
if self .agent_name :
@@ -781,14 +939,12 @@ def _get_attributes_from_generation_span_data(
781
939
782
940
# Sensitive data capture
783
941
if self .include_sensitive_data :
784
- # Input messages
942
+ # Input messages (normalized to role+parts)
785
943
if self ._capture_messages and span_data .input :
786
- yield GEN_AI_INPUT_MESSAGES , safe_json_dumps (span_data .input )
787
- if self .emit_legacy :
788
- yield (
789
- GEN_AI_PROMPT_LEGACY ,
790
- safe_json_dumps (span_data .input ),
791
- )
944
+ normalized_in = self ._normalize_messages_to_role_parts (
945
+ span_data .input
946
+ )
947
+ yield GEN_AI_INPUT_MESSAGES , safe_json_dumps (normalized_in )
792
948
793
949
# System instructions
794
950
if self ._capture_system_instructions and span_data .input :
@@ -804,14 +960,9 @@ def _get_attributes_from_generation_span_data(
804
960
safe_json_dumps (sys_instr ),
805
961
)
806
962
807
- # Output messages
963
+ # Output messages (leave as-is; not normalized here)
808
964
if self ._capture_messages and span_data .output :
809
965
yield GEN_AI_OUTPUT_MESSAGES , safe_json_dumps (span_data .output )
810
- if self .emit_legacy :
811
- yield (
812
- GEN_AI_COMPLETION_LEGACY ,
813
- safe_json_dumps (span_data .output ),
814
- )
815
966
816
967
# Output type
817
968
yield (
@@ -920,8 +1071,7 @@ def _get_attributes_from_function_span_data(
920
1071
else str (span_data .input )
921
1072
)
922
1073
yield GEN_AI_TOOL_CALL_ARGUMENTS , arg_val
923
- if self .emit_legacy :
924
- yield GEN_AI_TOOL_INPUT_LEGACY , arg_val
1074
+ # Legacy emission removed
925
1075
926
1076
if span_data .output is not None :
927
1077
res_val = (
@@ -930,8 +1080,7 @@ def _get_attributes_from_function_span_data(
930
1080
else str (span_data .output )
931
1081
)
932
1082
yield GEN_AI_TOOL_CALL_RESULT , res_val
933
- if self .emit_legacy :
934
- yield GEN_AI_TOOL_OUTPUT_LEGACY , res_val
1083
+ # Legacy emission removed
935
1084
936
1085
yield (
937
1086
GEN_AI_OUTPUT_TYPE ,
@@ -995,14 +1144,12 @@ def _get_attributes_from_response_span_data(
995
1144
996
1145
# Input/output messages
997
1146
if self .include_sensitive_data :
998
- # Input messages
1147
+ # Input messages (normalized to role+parts)
999
1148
if self ._capture_messages and span_data .input :
1000
- yield GEN_AI_INPUT_MESSAGES , safe_json_dumps (span_data .input )
1001
- if self .emit_legacy :
1002
- yield (
1003
- GEN_AI_PROMPT_LEGACY ,
1004
- safe_json_dumps (span_data .input ),
1005
- )
1149
+ normalized_in = self ._normalize_messages_to_role_parts (
1150
+ span_data .input
1151
+ )
1152
+ yield GEN_AI_INPUT_MESSAGES , safe_json_dumps (normalized_in )
1006
1153
1007
1154
# System instructions
1008
1155
if self ._capture_system_instructions and span_data .input :
@@ -1018,7 +1165,7 @@ def _get_attributes_from_response_span_data(
1018
1165
safe_json_dumps (sys_instr ),
1019
1166
)
1020
1167
1021
- # Output messages
1168
+ # Output messages (leave as-is; not normalized here)
1022
1169
if self ._capture_messages :
1023
1170
output_messages = getattr (
1024
1171
getattr (span_data , "response" , None ), "output" , None
@@ -1039,11 +1186,6 @@ def _get_attributes_from_response_span_data(
1039
1186
GEN_AI_OUTPUT_MESSAGES ,
1040
1187
safe_json_dumps (collected ),
1041
1188
)
1042
- if self .emit_legacy :
1043
- yield (
1044
- GEN_AI_COMPLETION_LEGACY ,
1045
- safe_json_dumps (collected ),
1046
- )
1047
1189
1048
1190
yield (
1049
1191
GEN_AI_OUTPUT_TYPE ,
0 commit comments