refine(tracing): remove global instance and add context to root span (#173)

yaozheng-fang · web-flow · commit db3f46013f13 · 2025-09-17T18:03:14.000+08:00
diff --git a/veadk/tracing/telemetry/attributes/extractors/llm_attributes_extractors.py b/veadk/tracing/telemetry/attributes/extractors/llm_attributes_extractors.py
@@ -252,12 +252,14 @@ def llm_gen_ai_messages(params: LLMAttributesParams) -> ExtractorResponse:
                                                 part.inline_data.display_name.split(
                                                     "/"
                                                 )[-1]
-                                                if part.inline_data.display_name
+                                                if part.inline_data
+                                                and part.inline_data.display_name
                                                 else "<unknown_image_name>"
                                             ),
                                             "parts.0.image_url.url": (
                                                 part.inline_data.display_name
-                                                if part.inline_data.display_name
+                                                if part.inline_data
+                                                and part.inline_data.display_name
                                                 else "<unknown_image_url>"
                                             ),
                                         }
diff --git a/veadk/tracing/telemetry/exporters/apmplus_exporter.py b/veadk/tracing/telemetry/exporters/apmplus_exporter.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import time
 from dataclasses import dataclass
 from typing import Any
 
+from google.adk.agents.invocation_context import InvocationContext
 from google.adk.models.llm_request import LlmRequest
 from google.adk.models.llm_response import LlmResponse
-from opentelemetry import metrics
+from opentelemetry import metrics, trace
 from opentelemetry import metrics as metrics_api
 from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
 from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
@@ -193,7 +195,13 @@ def __init__(
             explicit_bucket_boundaries_advisory=_GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS,
         )
 
-    def record(self, llm_request: LlmRequest, llm_response: LlmResponse) -> None:
+    def record(
+        self,
+        invocation_context: InvocationContext,
+        event_id: str,
+        llm_request: LlmRequest,
+        llm_response: LlmResponse,
+    ) -> None:
         attributes = {
             "gen_ai_system": "volcengine",
             "gen_ai_response_model": llm_request.model,
@@ -217,10 +225,18 @@ def record(self, llm_request: LlmRequest, llm_response: LlmResponse) -> None:
                 token_attributes = {**attributes, "gen_ai_token_type": "output"}
                 self.token_usage.record(output_token, attributes=token_attributes)
 
-            # TODO: Get llm duration
-            # duration = 5.0
-            # if self.duration_histogram:
-            #     self.duration_histogram.record(duration, attributes=attributes)
+            # Get llm duration
+            span = trace.get_current_span()
+            if span and hasattr(span, "start_time") and self.duration_histogram:
+                # We use span start time as the llm request start time
+                tik = span.start_time  # type: ignore
+                # We use current time as the llm request end time
+                tok = time.time_ns()
+                # Calculate duration in seconds
+                duration = (tok - tik) / 1e9
+                self.duration_histogram.record(
+                    duration, attributes=attributes
+                )  # unit in seconds
 
             # Get model request error
             if llm_response.error_code and self.chat_exception_counter:
@@ -269,6 +285,8 @@ class APMPlusExporter(BaseExporter):
     config: APMPlusExporterConfig = Field(default_factory=APMPlusExporterConfig)
 
     def model_post_init(self, context: Any) -> None:
+        logger.info(f"APMPlusExporter sevice name: {self.config.service_name}")
+
         headers = {
             "x-byteapm-appkey": self.config.app_key,
         }
diff --git a/veadk/tracing/telemetry/exporters/cozeloop_exporter.py b/veadk/tracing/telemetry/exporters/cozeloop_exporter.py
@@ -42,6 +42,8 @@ class CozeloopExporter(BaseExporter):
     config: CozeloopExporterConfig = Field(default_factory=CozeloopExporterConfig)
 
     def model_post_init(self, context: Any) -> None:
+        logger.info(f"CozeloopExporter space ID: {self.config.space_id}")
+
         headers = {
             "cozeloop-workspace-id": self.config.space_id,
             "authorization": f"Bearer {self.config.token}",
diff --git a/veadk/tracing/telemetry/exporters/inmemory_exporter.py b/veadk/tracing/telemetry/exporters/inmemory_exporter.py
@@ -75,11 +75,22 @@ def clear(self):
 class _InMemorySpanProcessor(export.SimpleSpanProcessor):
     def __init__(self, exporter: _InMemoryExporter) -> None:
         super().__init__(exporter)
-        self.spans = []
 
     def on_start(self, span, parent_context) -> None:
-        if span.context:
-            self.spans.append(span)
+        if span.name.startswith("invocation"):
+            span.set_attribute("gen_ai.operation.name", "chain")
+            span.set_attribute("gen_ai.usage.total_tokens", 0)
+
+            ctx = set_value("invocation_span_instance", span, context=parent_context)
+            token = attach(ctx)  # mount context on `invocation` root span in Google ADK
+            setattr(span, "_invocation_token", token)  # for later detach
+
+        if span.name.startswith("agent_run"):
+            span.set_attribute("gen_ai.operation.name", "agent")
+
+            ctx = set_value("agent_run_span_instance", span, context=parent_context)
+            token = attach(ctx)
+            setattr(span, "_agent_run_token", token)  # for later detach
 
     def on_end(self, span: ReadableSpan) -> None:
         if span.context:
@@ -92,8 +103,14 @@ def on_end(self, span: ReadableSpan) -> None:
             except Exception:
                 logger.exception("Exception while exporting Span.")
             detach(token)
-            if span in self.spans:
-                self.spans.remove(span)
+
+            token = getattr(span, "_invocation_token", None)
+            if token:
+                detach(token)
+
+            token = getattr(span, "_agent_run_token", None)
+            if token:
+                detach(token)
 
 
 class InMemoryExporter(BaseExporter):
@@ -106,6 +123,3 @@ def __init__(self, name: str = "inmemory_exporter") -> None:
 
         self._exporter = _InMemoryExporter()
         self.processor = _InMemorySpanProcessor(self._exporter)
-
-
-_INMEMORY_EXPORTER_INSTANCE = InMemoryExporter()
diff --git a/veadk/tracing/telemetry/exporters/tls_exporter.py b/veadk/tracing/telemetry/exporters/tls_exporter.py
@@ -44,6 +44,8 @@ class TLSExporter(BaseExporter):
     config: TLSExporterConfig = Field(default_factory=TLSExporterConfig)
 
     def model_post_init(self, context: Any) -> None:
+        logger.info(f"TLSExporter topic ID: {self.config.topic_id}")
+
         headers = {
             "x-tls-otel-tracetopic": self.config.topic_id,
             "x-tls-otel-ak": self.config.access_key,
diff --git a/veadk/tracing/telemetry/opentelemetry_tracer.py b/veadk/tracing/telemetry/opentelemetry_tracer.py
@@ -30,10 +30,7 @@
 from veadk.tracing.base_tracer import BaseTracer
 from veadk.tracing.telemetry.exporters.apmplus_exporter import APMPlusExporter
 from veadk.tracing.telemetry.exporters.base_exporter import BaseExporter
-from veadk.tracing.telemetry.exporters.inmemory_exporter import (
-    _INMEMORY_EXPORTER_INSTANCE,
-    InMemoryExporter,
-)
+from veadk.tracing.telemetry.exporters.inmemory_exporter import InMemoryExporter
 from veadk.utils.logger import get_logger
 from veadk.utils.patches import patch_google_adk_telemetry
 
@@ -119,7 +116,7 @@ def _init_global_tracer_provider(self) -> None:
                     f"Add span processor for exporter `{exporter.__class__.__name__}` to OpentelemetryTracer failed."
                 )
 
-        self._inmemory_exporter = _INMEMORY_EXPORTER_INSTANCE
+        self._inmemory_exporter = InMemoryExporter()
         if self._inmemory_exporter.processor:
             # make sure the in memory exporter processor is added at index 0
             # because we use this to record all spans
diff --git a/veadk/tracing/telemetry/telemetry.py b/veadk/tracing/telemetry/telemetry.py
@@ -20,24 +20,23 @@
 from google.adk.models.llm_response import LlmResponse
 from google.adk.tools import BaseTool
 from opentelemetry import trace
-from opentelemetry.sdk.trace import _Span
+from opentelemetry.context import get_value
+from opentelemetry.sdk.trace import Span, _Span
 
 from veadk.tracing.telemetry.attributes.attributes import ATTRIBUTES
 from veadk.tracing.telemetry.attributes.extractors.types import (
     ExtractorResponse,
     LLMAttributesParams,
     ToolAttributesParams,
 )
-from veadk.tracing.telemetry.exporters.inmemory_exporter import (
-    _INMEMORY_EXPORTER_INSTANCE,
-)
 from veadk.utils.logger import get_logger
 
 logger = get_logger(__name__)
 
 
-def upload_metrics(
+def _upload_metrics(
     invocation_context: InvocationContext,
+    event_id: str,
     llm_request: LlmRequest,
     llm_response: LlmResponse,
 ) -> None:
@@ -48,11 +47,13 @@ def upload_metrics(
         for tracer in tracers:
             for exporter in getattr(tracer, "exporters", []):
                 if getattr(exporter, "meter_uploader", None):
-                    exporter.meter_uploader.record(llm_request, llm_response)
+                    exporter.meter_uploader.record(
+                        invocation_context, event_id, llm_request, llm_response
+                    )
 
 
 def _set_agent_input_attribute(
-    span: _Span, invocation_context: InvocationContext
+    span: Span, invocation_context: InvocationContext
 ) -> None:
     # We only save the original user input as the agent input
     # hence once the `agent.input` has been set, we don't overwrite it
@@ -106,7 +107,7 @@ def _set_agent_input_attribute(
                 )
 
 
-def _set_agent_output_attribute(span: _Span, llm_response: LlmResponse) -> None:
+def _set_agent_output_attribute(span: Span, llm_response: LlmResponse) -> None:
     content = llm_response.content
     if content and content.parts:
         for idx, part in enumerate(content.parts):
@@ -126,67 +127,64 @@ def set_common_attributes_on_model_span(
     current_span: _Span,
     **kwargs,
 ) -> None:
-    if current_span.context:
-        current_span_id = current_span.context.trace_id
-    else:
-        logger.warning(
-            "Current span context is missing, failed to get `trace_id` to set common attributes."
-        )
-        return
-
+    common_attributes = ATTRIBUTES.get("common", {})
     try:
-        spans = _INMEMORY_EXPORTER_INSTANCE.processor.spans  # type: ignore
-
-        spans_in_current_trace = [
-            span
-            for span in spans
-            if span.context and span.context.trace_id == current_span_id
-        ]
-
-        common_attributes = ATTRIBUTES.get("common", {})
-        for span in spans_in_current_trace:
-            if span.is_recording():
-                if span.name.startswith("invocation"):
-                    span.set_attribute("gen_ai.operation.name", "chain")
-                    _set_agent_input_attribute(span, invocation_context)
-                    _set_agent_output_attribute(span, llm_response)
-                elif span.name.startswith("agent_run"):
-                    span.set_attribute("gen_ai.operation.name", "agent")
-                    _set_agent_input_attribute(span, invocation_context)
-                    _set_agent_output_attribute(span, llm_response)
-                for attr_name, attr_extractor in common_attributes.items():
-                    value = attr_extractor(**kwargs)
-                    span.set_attribute(attr_name, value)
+        invocation_span: Span = get_value("invocation_span_instance")  # type: ignore
+        agent_run_span: Span = get_value("agent_run_span_instance")  # type: ignore
+
+        if invocation_span and invocation_span.name.startswith("invocation"):
+            _set_agent_input_attribute(invocation_span, invocation_context)
+            _set_agent_output_attribute(invocation_span, llm_response)
+            for attr_name, attr_extractor in common_attributes.items():
+                value = attr_extractor(**kwargs)
+                invocation_span.set_attribute(attr_name, value)
+
+            # Calculate the token usage for the whole invocation span
+            current_step_token_usage = (
+                llm_response.usage_metadata.total_token_count
+                if llm_response.usage_metadata
+                and llm_response.usage_metadata.total_token_count
+                else 0
+            )
+            prev_total_token_usage = (
+                invocation_span.attributes["gen_ai.usage.total_tokens"]
+                if invocation_span.attributes
+                else 0
+            )
+            accumulated_total_token_usage = (
+                current_step_token_usage + int(prev_total_token_usage)  # type: ignore
+            )  # we can ignore this warning, cause we manually set the attribute to int before
+            invocation_span.set_attribute(
+                "gen_ai.usage.total_tokens", accumulated_total_token_usage
+            )
+
+        if agent_run_span and agent_run_span.name.startswith("agent_run"):
+            _set_agent_input_attribute(agent_run_span, invocation_context)
+            _set_agent_output_attribute(agent_run_span, llm_response)
+            for attr_name, attr_extractor in common_attributes.items():
+                value = attr_extractor(**kwargs)
+                agent_run_span.set_attribute(attr_name, value)
+
+        for attr_name, attr_extractor in common_attributes.items():
+            value = attr_extractor(**kwargs)
+            current_span.set_attribute(attr_name, value)
     except Exception as e:
         logger.error(f"Failed to set common attributes for spans: {e}")
 
 
 def set_common_attributes_on_tool_span(current_span: _Span) -> None:
-    # find parent span (generally a llm span)
-    if not current_span.context:
-        logger.warning(
-            f"Get tool span's context failed. Skip setting common attributes for span {current_span.name}"
-        )
-        return
-
-    if not current_span.parent:
-        logger.warning(
-            f"Get tool span's parent failed. Skip setting common attributes for span {current_span.name}"
-        )
-        return
-
-    parent_span_id = current_span.parent.span_id
-    for span in _INMEMORY_EXPORTER_INSTANCE.processor.spans:  # type: ignore
-        if span.context.span_id == parent_span_id:
-            common_attributes = ATTRIBUTES.get("common", {})
-            for attr_name in common_attributes.keys():
-                if hasattr(span.attributes, attr_name):
-                    current_span.set_attribute(attr_name, span.attributes[attr_name])
-                else:
-                    logger.error(f"Parent span does not have attribute {attr_name}")
+    common_attributes = ATTRIBUTES.get("common", {})
 
+    invocation_span: Span = get_value("invocation_span_instance")  # type: ignore
 
-def trace_send_data(): ...
+    for attr_name in common_attributes.keys():
+        if (
+            invocation_span
+            and invocation_span.name.startswith("invocation")
+            and invocation_span.attributes
+            and attr_name in invocation_span.attributes
+        ):
+            current_span.set_attribute(attr_name, invocation_span.attributes[attr_name])
 
 
 def trace_tool_call(
@@ -212,7 +210,7 @@ def trace_call_llm(
     llm_request: LlmRequest,
     llm_response: LlmResponse,
 ) -> None:
-    span = trace.get_current_span()
+    span: Span = trace.get_current_span()  # type: ignore
 
     from veadk.agent import Agent
 
@@ -234,6 +232,7 @@ def trace_call_llm(
             span.context.trace_state.get("call_type", "")
             if (
                 hasattr(span, "context")
+                and span.context
                 and hasattr(span.context, "trace_state")
                 and hasattr(span.context.trace_state, "get")
             )
@@ -253,4 +252,8 @@ def trace_call_llm(
         response: ExtractorResponse = attr_extractor(params)
         ExtractorResponse.update_span(span, attr_name, response)
 
-    upload_metrics(invocation_context, llm_request, llm_response)
+    _upload_metrics(invocation_context, event_id, llm_request, llm_response)
+
+
+# Do not modify this function
+def trace_send_data(): ...