3030
3131from veadk .config import getenv
3232from veadk .tracing .telemetry .exporters .base_exporter import BaseExporter
33+ from veadk .tracing .telemetry .metrics .buckets import (
34+ _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS ,
35+ _GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS ,
36+ _GEN_AI_SERVER_TIME_TO_FIRST_TOKEN_BUCKETS ,
37+ _GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS ,
38+ )
39+ from veadk .tracing .telemetry .metrics import Meters
3340from veadk .utils .logger import get_logger
3441
3542logger = get_logger (__name__ )
@@ -73,6 +80,37 @@ def __init__(
7380 name = "gen_ai.client.token.usage" ,
7481 description = "Token consumption of LLM invocations" ,
7582 unit = "count" ,
83+ explicit_bucket_boundaries_advisory = _GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS
84+ )
85+ self .duration_histogram = self .meter .create_histogram (
86+ name = Meters .LLM_OPERATION_DURATION ,
87+ unit = "s" ,
88+ description = "GenAI operation duration" ,
89+ explicit_bucket_boundaries_advisory = _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
90+ )
91+ # 统计 exception,酌情判断是否好获取?
92+ self .chat_exception_counter = self .meter .create_counter (
93+ name = Meters .LLM_COMPLETIONS_EXCEPTIONS ,
94+ unit = "time" ,
95+ description = "Number of exceptions occurred during chat completions" ,
96+ )
97+ self .streaming_time_to_first_token = self .meter .create_histogram (
98+ name = Meters .LLM_STREAMING_TIME_TO_FIRST_TOKEN ,
99+ unit = "s" ,
100+ description = "Time to first token in streaming chat completions" ,
101+ explicit_bucket_boundaries_advisory = _GEN_AI_SERVER_TIME_TO_FIRST_TOKEN_BUCKETS
102+ )
103+ self .streaming_time_to_generate = self .meter .create_histogram (
104+ name = Meters .LLM_STREAMING_TIME_TO_GENERATE ,
105+ unit = "s" ,
106+ description = "Time between first token and completion in streaming chat completions" ,
107+ explicit_bucket_boundaries_advisory = _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
108+ )
109+ self .streaming_time_per_output_token = self .meter .create_histogram (
110+ name = Meters .LLM_STREAMING_TIME_PER_OUTPUT_TOKEN ,
111+ unit = "s" ,
112+ description = "Time per output token in streaming chat completions" ,
113+ explicit_bucket_boundaries_advisory = _GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS
76114 )
77115
78116 def record (self , llm_request : LlmRequest , llm_response : LlmResponse ) -> None :
@@ -99,6 +137,31 @@ def record(self, llm_request: LlmRequest, llm_response: LlmResponse) -> None:
99137 token_attributes = {** attributes , "gen_ai_token_type" : "output" }
100138 self .token_usage .record (output_token , attributes = token_attributes )
101139
140+ # TODO: get llm duration
141+ duration = 5.0
142+ if self .duration_histogram :
143+ self .duration_histogram .record (duration , attributes = attributes )
144+ # TODO: get streaming time to first token
145+ time_to_frist_token = 0.1
146+ if self .streaming_time_to_first_token :
147+ self .streaming_time_to_first_token .record (time_to_frist_token , attributes = attributes )
148+ # TODO: get streaming time to generate
149+ time_to_generate = 1.0
150+ if self .streaming_time_to_generate :
151+ self .streaming_time_to_generate .record (time_to_generate , attributes = attributes )
152+ # TODO: get streaming time per output token
153+ time_per_output_token = 0.01
154+ if self .streaming_time_per_output_token :
155+ self .streaming_time_per_output_token .record (time_per_output_token , attributes = attributes )
156+
157+ # TODO: catch exception
158+ e = Exception ("test" )
159+ exception_attributes = {** attributes , "error_type" : e .__class__ .__name__ }
160+
161+ if self .chat_exception_counter :
162+ self .chat_exception_counter .add (1 , exception_attributes )
163+
164+
102165
103166class APMPlusExporterConfig (BaseModel ):
104167 endpoint : str = Field (
0 commit comments