tracing: add operation to create agent and tokens to stream (#44037)

M-Hietala · web-flow · commit 0d85fc612768 · 2025-11-14T13:02:45.000-06:00
* tracing: adding operation name to create agent and token count to streaming responses

* updating assets.json

* fixed change log and removed commented out code per review comments
diff --git a/sdk/ai/azure-ai-projects/CHANGELOG.md b/sdk/ai/azure-ai-projects/CHANGELOG.md
@@ -7,9 +7,10 @@
 ### Breaking changes
 
 * `get_openai_client()` method on the asynchronous AIProjectClient is no longer an "async" method.
-* tracing: tool call output event content format updated to be in line with other events
+* Tracing: tool call output event content format updated to be in line with other events
 
 ### Bugs Fixed
+* Tracing: operation name attribute added to create agent span, token usage added to streaming response generation span
 
 ### Sample updates
 * Added `finetuning` samples for operations create, retrieve, list, list_events, list_checkpoints, cancel, pause and resume. Also, these samples includes various finetuning techniques like Supervised (SFT), Reinforcement (RFT) and Direct performance optimization (DPO).
diff --git a/sdk/ai/azure-ai-projects/assets.json b/sdk/ai/azure-ai-projects/assets.json
@@ -2,5 +2,5 @@
   "AssetsRepo": "Azure/azure-sdk-assets",
   "AssetsRepoPrefixPath": "python",
   "TagPrefix": "python/ai/azure-ai-projects",
-  "Tag": "python/ai/azure-ai-projects_7e1b7f222f"
+  "Tag": "python/ai/azure-ai-projects_8ddbfaaa38"
 }
diff --git a/sdk/ai/azure-ai-projects/azure/ai/projects/telemetry/_ai_project_instrumentor.py b/sdk/ai/azure-ai-projects/azure/ai/projects/telemetry/_ai_project_instrumentor.py
@@ -29,6 +29,7 @@
     GEN_AI_EVENT_CONTENT,
     GEN_AI_MESSAGE_ID,
     GEN_AI_MESSAGE_STATUS,
+    GEN_AI_OPERATION_NAME,
     GEN_AI_SYSTEM,
     GEN_AI_SYSTEM_MESSAGE,
     GEN_AI_THREAD_ID,
@@ -504,6 +505,7 @@ def start_create_agent_span(  # pylint: disable=too-many-locals
             gen_ai_system=AZ_AI_AGENT_SYSTEM,
         )
         if span and span.span_instance.is_recording:
+            span.add_attribute(GEN_AI_OPERATION_NAME, OperationName.CREATE_AGENT.value)
             if name:
                 span.add_attribute(GEN_AI_AGENT_NAME, name)
             if description:
diff --git a/sdk/ai/azure-ai-projects/azure/ai/projects/telemetry/_responses_instrumentor.py b/sdk/ai/azure-ai-projects/azure/ai/projects/telemetry/_responses_instrumentor.py
@@ -1965,6 +1965,18 @@ def process_chunk(self, chunk):
                         self.response_id = chunk.response.id
                     if not self.response_model:
                         self.response_model = getattr(chunk.response, "model", None)
+                    # Extract usage from the completed response
+                    if hasattr(chunk.response, "usage"):
+                        response_usage = chunk.response.usage
+                        if hasattr(response_usage, "input_tokens") and response_usage.input_tokens:
+                            self.input_tokens = response_usage.input_tokens
+                        if hasattr(response_usage, "output_tokens") and response_usage.output_tokens:
+                            self.output_tokens = response_usage.output_tokens
+                        # Also handle standard token field names for compatibility
+                        if hasattr(response_usage, "prompt_tokens") and response_usage.prompt_tokens:
+                            self.input_tokens = response_usage.prompt_tokens
+                        if hasattr(response_usage, "completion_tokens") and response_usage.completion_tokens:
+                            self.output_tokens = response_usage.completion_tokens
 
                 # Only append TEXT content from delta events (not function call arguments or other deltas)
                 # Text deltas can come as:
@@ -2041,6 +2053,7 @@ def cleanup(self):
                             self.instrumentor._set_span_attribute_safe(
                                 self.span, "gen_ai.response.model", self.response_model
                             )
+
                         if self.service_tier:
                             self.instrumentor._set_span_attribute_safe(
                                 self.span, "gen_ai.openai.response.service_tier", self.service_tier
@@ -2049,11 +2062,11 @@ def cleanup(self):
                         # Set token usage span attributes
                         if self.input_tokens > 0:
                             self.instrumentor._set_span_attribute_safe(
-                                self.span, "gen_ai.usage.prompt_tokens", self.input_tokens
+                                self.span, "gen_ai.usage.input_tokens", self.input_tokens
                             )
                         if self.output_tokens > 0:
                             self.instrumentor._set_span_attribute_safe(
-                                self.span, "gen_ai.usage.completion_tokens", self.output_tokens
+                                self.span, "gen_ai.usage.output_tokens", self.output_tokens
                             )
 
                     # Record metrics using accumulated data
@@ -2373,6 +2386,18 @@ def process_chunk(self, chunk):
                         self.response_id = chunk.response.id
                     if not self.response_model:
                         self.response_model = getattr(chunk.response, "model", None)
+                    # Extract usage from the completed response
+                    if hasattr(chunk.response, "usage"):
+                        response_usage = chunk.response.usage
+                        if hasattr(response_usage, "input_tokens") and response_usage.input_tokens:
+                            self.input_tokens = response_usage.input_tokens
+                        if hasattr(response_usage, "output_tokens") and response_usage.output_tokens:
+                            self.output_tokens = response_usage.output_tokens
+                        # Also handle standard token field names for compatibility
+                        if hasattr(response_usage, "prompt_tokens") and response_usage.prompt_tokens:
+                            self.input_tokens = response_usage.prompt_tokens
+                        if hasattr(response_usage, "completion_tokens") and response_usage.completion_tokens:
+                            self.output_tokens = response_usage.completion_tokens
 
                 # Only append TEXT content from delta events (not function call arguments or other deltas)
                 # Text deltas can come as:
@@ -2449,6 +2474,7 @@ def cleanup(self):
                             self.instrumentor._set_span_attribute_safe(
                                 self.span, "gen_ai.response.model", self.response_model
                             )
+
                         if self.service_tier:
                             self.instrumentor._set_span_attribute_safe(
                                 self.span, "gen_ai.openai.response.service_tier", self.service_tier
@@ -2457,11 +2483,11 @@ def cleanup(self):
                         # Set token usage span attributes
                         if self.input_tokens > 0:
                             self.instrumentor._set_span_attribute_safe(
-                                self.span, "gen_ai.usage.prompt_tokens", self.input_tokens
+                                self.span, "gen_ai.usage.input_tokens", self.input_tokens
                             )
                         if self.output_tokens > 0:
                             self.instrumentor._set_span_attribute_safe(
-                                self.span, "gen_ai.usage.completion_tokens", self.output_tokens
+                                self.span, "gen_ai.usage.output_tokens", self.output_tokens
                             )
 
                     # Record metrics using accumulated data
diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/gen_ai_trace_verifier.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/gen_ai_trace_verifier.py
@@ -18,6 +18,15 @@ def check_span_attributes(self, span, attributes):
         attribute_dict = dict(attributes)
         attribute_dict["az.namespace"] = "Microsoft.CognitiveServices"
 
+        # First, check that all expected attributes are present in the span
+        for expected_attribute_name in attribute_dict.keys():
+            if expected_attribute_name not in span.attributes:
+                raise AssertionError(
+                    f"Expected attribute '{expected_attribute_name}' not found in span. "
+                    f"Span has: {list(span.attributes.keys())}"
+                )
+
+        # Then, check that all attributes in the span are expected and have correct values
         for attribute_name in span.attributes.keys():
             # Check if the attribute name exists in the input attributes
             if attribute_name not in attribute_dict:
diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_ai_agents_instrumentor.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_ai_agents_instrumentor.py
@@ -338,7 +338,6 @@ def test_agent_creation_with_tracing_content_recording_disabled(self, **kwargs):
         events_match = GenAiTraceVerifier().check_span_events(span, expected_events)
         assert events_match == True
 
-    @pytest.mark.skip(reason="recordings to be added")
     @pytest.mark.usefixtures("instrument_with_content")
     @servicePreparer()
     @recorded_by_proxy
@@ -409,7 +408,6 @@ def test_workflow_agent_creation_with_tracing_content_recording_enabled(self, **
         assert "workflow" in event_content["content"][0]
         assert "kind: workflow" in event_content["content"][0]["workflow"]
 
-    @pytest.mark.skip(reason="recordings to be added")
     @pytest.mark.usefixtures("instrument_without_content")
     @servicePreparer()
     @recorded_by_proxy
diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_ai_agents_instrumentor_async.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_ai_agents_instrumentor_async.py
@@ -216,7 +216,6 @@ async def test_agent_creation_with_tracing_content_recording_disabled(self, **kw
         events_match = GenAiTraceVerifier().check_span_events(span, expected_events)
         assert events_match == True
 
-    @pytest.mark.skip(reason="recordings to be added")
     @pytest.mark.usefixtures("instrument_with_content")
     @servicePreparer()
     @recorded_by_proxy_async
@@ -287,7 +286,6 @@ async def test_workflow_agent_creation_with_tracing_content_recording_enabled(se
         assert "workflow" in event_content["content"][0]
         assert "kind: workflow" in event_content["content"][0]["workflow"]
 
-    @pytest.mark.skip(reason="recordings to be added")
     @pytest.mark.usefixtures("instrument_without_content")
     @servicePreparer()
     @recorded_by_proxy_async
diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor.py
@@ -756,7 +756,6 @@ def test_sync_function_tool_with_content_recording_non_streaming(self, **kwargs)
         expected_attributes_1 = [
             ("az.namespace", "Microsoft.CognitiveServices"),
             ("gen_ai.operation.name", "responses"),
-            ("gen_ai.request.model", deployment_name),
             ("gen_ai.request.assistant_name", agent.name),
             ("gen_ai.provider.name", "azure.openai"),
             ("server.address", ""),
@@ -796,7 +795,6 @@ def test_sync_function_tool_with_content_recording_non_streaming(self, **kwargs)
         expected_attributes_2 = [
             ("az.namespace", "Microsoft.CognitiveServices"),
             ("gen_ai.operation.name", "responses"),
-            ("gen_ai.request.model", deployment_name),
             ("gen_ai.request.assistant_name", agent.name),
             ("gen_ai.provider.name", "azure.openai"),
             ("server.address", ""),
@@ -952,7 +950,6 @@ def test_sync_function_tool_with_content_recording_streaming(self, **kwargs):
         expected_attributes_1 = [
             ("az.namespace", "Microsoft.CognitiveServices"),
             ("gen_ai.operation.name", "responses"),
-            ("gen_ai.request.model", deployment_name),
             ("gen_ai.request.assistant_name", agent.name),
             ("gen_ai.provider.name", "azure.openai"),
             ("server.address", ""),
@@ -992,7 +989,6 @@ def test_sync_function_tool_with_content_recording_streaming(self, **kwargs):
         expected_attributes_2 = [
             ("az.namespace", "Microsoft.CognitiveServices"),
             ("gen_ai.operation.name", "responses"),
-            ("gen_ai.request.model", deployment_name),
             ("gen_ai.request.assistant_name", agent.name),
             ("gen_ai.provider.name", "azure.openai"),
             ("server.address", ""),
@@ -1124,7 +1120,6 @@ def test_sync_function_tool_without_content_recording_non_streaming(self, **kwar
         expected_attributes_1 = [
             ("az.namespace", "Microsoft.CognitiveServices"),
             ("gen_ai.operation.name", "responses"),
-            ("gen_ai.request.model", deployment_name),
             ("gen_ai.request.assistant_name", agent.name),
             ("gen_ai.provider.name", "azure.openai"),
             ("server.address", ""),
@@ -1164,7 +1159,6 @@ def test_sync_function_tool_without_content_recording_non_streaming(self, **kwar
         expected_attributes_2 = [
             ("az.namespace", "Microsoft.CognitiveServices"),
             ("gen_ai.operation.name", "responses"),
-            ("gen_ai.request.model", deployment_name),
             ("gen_ai.request.assistant_name", agent.name),
             ("gen_ai.provider.name", "azure.openai"),
             ("server.address", ""),
@@ -1314,7 +1308,6 @@ def test_sync_function_tool_without_content_recording_streaming(self, **kwargs):
         expected_attributes_1 = [
             ("az.namespace", "Microsoft.CognitiveServices"),
             ("gen_ai.operation.name", "responses"),
-            ("gen_ai.request.model", deployment_name),
             ("gen_ai.request.assistant_name", agent.name),
             ("gen_ai.provider.name", "azure.openai"),
             ("server.address", ""),
@@ -1354,7 +1347,6 @@ def test_sync_function_tool_without_content_recording_streaming(self, **kwargs):
         expected_attributes_2 = [
             ("az.namespace", "Microsoft.CognitiveServices"),
             ("gen_ai.operation.name", "responses"),
-            ("gen_ai.request.model", deployment_name),
             ("gen_ai.request.assistant_name", agent.name),
             ("gen_ai.provider.name", "azure.openai"),
             ("server.address", ""),
diff --git a/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_async.py b/sdk/ai/azure-ai-projects/tests/agents/telemetry/test_responses_instrumentor_async.py
@@ -412,7 +412,6 @@ async def test_async_function_tool_with_content_recording_streaming(self, **kwar
         expected_attributes_1 = [
             ("az.namespace", "Microsoft.CognitiveServices"),
             ("gen_ai.operation.name", "responses"),
-            ("gen_ai.request.model", deployment_name),
             ("gen_ai.request.assistant_name", agent.name),
             ("gen_ai.provider.name", "azure.openai"),
             ("server.address", ""),
@@ -452,7 +451,6 @@ async def test_async_function_tool_with_content_recording_streaming(self, **kwar
         expected_attributes_2 = [
             ("az.namespace", "Microsoft.CognitiveServices"),
             ("gen_ai.operation.name", "responses"),
-            ("gen_ai.request.model", deployment_name),
             ("gen_ai.request.assistant_name", agent.name),
             ("gen_ai.provider.name", "azure.openai"),
             ("server.address", ""),
@@ -604,7 +602,6 @@ async def test_async_function_tool_without_content_recording_streaming(self, **k
         expected_attributes_1 = [
             ("az.namespace", "Microsoft.CognitiveServices"),
             ("gen_ai.operation.name", "responses"),
-            ("gen_ai.request.model", deployment_name),
             ("gen_ai.request.assistant_name", agent.name),
             ("gen_ai.provider.name", "azure.openai"),
             ("server.address", ""),
@@ -644,7 +641,6 @@ async def test_async_function_tool_without_content_recording_streaming(self, **k
         expected_attributes_2 = [
             ("az.namespace", "Microsoft.CognitiveServices"),
             ("gen_ai.operation.name", "responses"),
-            ("gen_ai.request.model", deployment_name),
             ("gen_ai.request.assistant_name", agent.name),
             ("gen_ai.provider.name", "azure.openai"),
             ("server.address", ""),

Original file line number	Diff line number	Diff line change
`@@ -2,5 +2,5 @@`
`2`	`2`	`"AssetsRepo": "Azure/azure-sdk-assets",`
`3`	`3`	`"AssetsRepoPrefixPath": "python",`
`4`	`4`	`"TagPrefix": "python/ai/azure-ai-projects",`
`5`		`- "Tag": "python/ai/azure-ai-projects_7e1b7f222f"`
	`5`	`+ "Tag": "python/ai/azure-ai-projects_8ddbfaaa38"`
`6`	`6`	`}`