Skip to content

Commit 1c7cc51

Browse files
authored
feat(tracing): add APMPlus metrics (#210)
* feat: add apmplus metrics * feat: add tool metrics * feat: add tool metrics * feat: remove unused code * feat: format
1 parent 54e27f3 commit 1c7cc51

File tree

3 files changed

+86
-1
lines changed

3 files changed

+86
-1
lines changed

veadk/agent.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,9 @@ def model_post_init(self, __context: Any) -> None:
141141
if self.long_term_memory is not None:
142142
from google.adk.tools import load_memory
143143

144+
if not load_memory.custom_metadata:
145+
load_memory.custom_metadata = {}
146+
load_memory.custom_metadata["backend"] = self.long_term_memory.backend
144147
self.tools.append(load_memory)
145148

146149
logger.info(f"VeADK version: {VERSION}")

veadk/tools/load_knowledgebase_tool.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525

2626
from veadk.knowledgebase import KnowledgeBase
2727
from veadk.knowledgebase.entry import KnowledgebaseEntry
28+
from veadk.utils.logger import get_logger
29+
30+
logger = get_logger(__name__)
2831

2932
if TYPE_CHECKING:
3033
from google.adk.models.llm_request import LlmRequest
@@ -96,6 +99,15 @@ class LoadKnowledgebaseTool(FunctionTool):
9699

97100
def __init__(self):
98101
super().__init__(load_knowledgebase)
102+
global knowledgebase
103+
if knowledgebase is None:
104+
logger.info(
105+
"Get global knowledgebase instance failed, failed to set knowledgebase tool backend."
106+
)
107+
else:
108+
if not self.custom_metadata:
109+
self.custom_metadata = {}
110+
self.custom_metadata["backend"] = knowledgebase.backend
99111

100112
@override
101113
def _get_declaration(self) -> types.FunctionDeclaration | None:

veadk/tracing/telemetry/exporters/apmplus_exporter.py

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,12 @@ class Meters:
126126
"gen_ai.chat_completions.streaming_time_per_output_token"
127127
)
128128

129+
# apmplus metrics
130+
# span duration
131+
APMPLUS_SPAN_LATENCY = "apmplus_span_latency"
132+
# tool token usage
133+
APMPLUS_TOOL_TOKEN_USAGE = "apmplus_tool_token_usage"
134+
129135

130136
class MeterUploader:
131137
def __init__(
@@ -197,6 +203,20 @@ def __init__(
197203
explicit_bucket_boundaries_advisory=_GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS,
198204
)
199205

206+
# apmplus metrics for veadk dashboard
207+
self.apmplus_span_latency = self.meter.create_histogram(
208+
name=Meters.APMPLUS_SPAN_LATENCY,
209+
description="Latency of span",
210+
unit="s",
211+
explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS,
212+
)
213+
self.apmplus_tool_token_usage = self.meter.create_histogram(
214+
name=Meters.APMPLUS_TOOL_TOKEN_USAGE,
215+
description="Token consumption of APMPlus tool token",
216+
unit="count",
217+
explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS,
218+
)
219+
200220
def record_call_llm(
201221
self,
202222
invocation_context: InvocationContext,
@@ -207,7 +227,8 @@ def record_call_llm(
207227
attributes = {
208228
"gen_ai_system": "volcengine",
209229
"gen_ai_response_model": llm_request.model,
210-
"gen_ai_operation_name": "chat_completions",
230+
"gen_ai_operation_name": "chat",
231+
"gen_ai_operation_type": "llm",
211232
"stream": "false",
212233
"server_address": "api.volcengine.com",
213234
} # required by Volcengine APMPlus
@@ -269,13 +290,62 @@ def record_call_llm(
269290
# time_per_output_token, attributes=attributes
270291
# )
271292

293+
# add span name attribute
294+
span = trace.get_current_span()
295+
if not span:
296+
return
297+
298+
# record span latency
299+
if hasattr(span, "start_time") and self.apmplus_span_latency:
300+
# span 耗时
301+
duration = (time.time_ns() - span.start_time) / 1e9 # type: ignore
302+
self.apmplus_span_latency.record(duration, attributes=attributes)
303+
272304
def record_tool_call(
273305
self,
274306
tool: BaseTool,
275307
args: dict[str, Any],
276308
function_response_event: Event,
277309
):
278310
logger.debug(f"Record tool call work in progress. Tool: {tool.name}")
311+
span = trace.get_current_span()
312+
if not span:
313+
return
314+
operation_type = "tool"
315+
operation_name = tool.name
316+
operation_backend = ""
317+
if tool.custom_metadata:
318+
operation_backend = tool.custom_metadata.get("backend", "")
319+
320+
attributes = {
321+
"gen_ai_operation_name": operation_name,
322+
"gen_ai_operation_type": operation_type,
323+
"gen_ai_operation_backend": operation_backend,
324+
}
325+
326+
if hasattr(span, "start_time") and self.apmplus_span_latency:
327+
# span 耗时
328+
duration = (time.time_ns() - span.start_time) / 1e9 # type: ignore
329+
self.apmplus_span_latency.record(duration, attributes=attributes)
330+
331+
if self.apmplus_tool_token_usage and hasattr(span, "attributes"):
332+
tool_input = span.attributes["gen_ai.tool.input"]
333+
tool_token_usage_input = (
334+
len(tool_input) / 4
335+
) # tool token 数量,使用文本长度/4
336+
input_tool_token_attributes = {**attributes, "token_type": "input"}
337+
self.apmplus_tool_token_usage.record(
338+
tool_token_usage_input, attributes=input_tool_token_attributes
339+
)
340+
341+
tool_output = span.attributes["gen_ai.tool.output"]
342+
tool_token_usage_output = (
343+
len(tool_output) / 4
344+
) # tool token 数量,使用文本长度/4
345+
output_tool_token_attributes = {**attributes, "token_type": "output"}
346+
self.apmplus_tool_token_usage.record(
347+
tool_token_usage_output, attributes=output_tool_token_attributes
348+
)
279349

280350

281351
class APMPlusExporterConfig(BaseModel):

0 commit comments

Comments
 (0)