1414
1515from typing import Any
1616
17+ from attr import dataclass
1718from google .adk .models .llm_request import LlmRequest
1819from google .adk .models .llm_response import LlmResponse
1920from opentelemetry import metrics
3536logger = get_logger (__name__ )
3637
3738
39+ _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS = [
40+ 0.01 ,
41+ 0.02 ,
42+ 0.04 ,
43+ 0.08 ,
44+ 0.16 ,
45+ 0.32 ,
46+ 0.64 ,
47+ 1.28 ,
48+ 2.56 ,
49+ 5.12 ,
50+ 10.24 ,
51+ 20.48 ,
52+ 40.96 ,
53+ 81.92 ,
54+ ]
55+
56+ _GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS = [
57+ 0.01 ,
58+ 0.025 ,
59+ 0.05 ,
60+ 0.075 ,
61+ 0.1 ,
62+ 0.15 ,
63+ 0.2 ,
64+ 0.3 ,
65+ 0.4 ,
66+ 0.5 ,
67+ 0.75 ,
68+ 1.0 ,
69+ 2.5 ,
70+ ]
71+
72+ _GEN_AI_SERVER_TIME_TO_FIRST_TOKEN_BUCKETS = [
73+ 0.001 ,
74+ 0.005 ,
75+ 0.01 ,
76+ 0.02 ,
77+ 0.04 ,
78+ 0.06 ,
79+ 0.08 ,
80+ 0.1 ,
81+ 0.25 ,
82+ 0.5 ,
83+ 0.75 ,
84+ 1.0 ,
85+ 2.5 ,
86+ 5.0 ,
87+ 7.5 ,
88+ 10.0 ,
89+ ]
90+
91+ _GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS = [
92+ 1 ,
93+ 4 ,
94+ 16 ,
95+ 64 ,
96+ 256 ,
97+ 1024 ,
98+ 4096 ,
99+ 16384 ,
100+ 65536 ,
101+ 262144 ,
102+ 1048576 ,
103+ 4194304 ,
104+ 16777216 ,
105+ 67108864 ,
106+ ]
107+
108+
109+ @dataclass
110+ class Meters :
111+ LLM_CHAT_COUNT = "gen_ai.chat.count"
112+ LLM_TOKEN_USAGE = "gen_ai.client.token.usage"
113+ LLM_OPERATION_DURATION = "gen_ai.client.operation.duration"
114+ LLM_COMPLETIONS_EXCEPTIONS = "gen_ai.chat_completions.exceptions"
115+ LLM_STREAMING_TIME_TO_FIRST_TOKEN = (
116+ "gen_ai.chat_completions.streaming_time_to_first_token"
117+ )
118+ LLM_STREAMING_TIME_TO_GENERATE = (
119+ "gen_ai.chat_completions.streaming_time_to_generate"
120+ )
121+ LLM_STREAMING_TIME_PER_OUTPUT_TOKEN = (
122+ "gen_ai.chat_completions.streaming_time_per_output_token"
123+ )
124+
125+
38126class MeterUploader :
39127 def __init__ (
40128 self , name : str , endpoint : str , headers : dict , resource_attributes : dict
@@ -65,14 +153,44 @@ def __init__(
65153
66154 # create meter attributes
67155 self .llm_invoke_counter = self .meter .create_counter (
68- name = "gen_ai.chat.count" ,
156+ name = Meters . LLM_CHAT_COUNT ,
69157 description = "Number of LLM invocations" ,
70158 unit = "count" ,
71159 )
72160 self .token_usage = self .meter .create_histogram (
73- name = "gen_ai.client.token.usage" ,
161+ name = Meters . LLM_TOKEN_USAGE ,
74162 description = "Token consumption of LLM invocations" ,
75163 unit = "count" ,
164+ explicit_bucket_boundaries_advisory = _GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS ,
165+ )
166+ self .duration_histogram = self .meter .create_histogram (
167+ name = Meters .LLM_OPERATION_DURATION ,
168+ unit = "s" ,
169+ description = "GenAI operation duration" ,
170+ explicit_bucket_boundaries_advisory = _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS ,
171+ )
172+ self .chat_exception_counter = self .meter .create_counter (
173+ name = Meters .LLM_COMPLETIONS_EXCEPTIONS ,
174+ unit = "time" ,
175+ description = "Number of exceptions occurred during chat completions" ,
176+ )
177+ self .streaming_time_to_first_token = self .meter .create_histogram (
178+ name = Meters .LLM_STREAMING_TIME_TO_FIRST_TOKEN ,
179+ unit = "s" ,
180+ description = "Time to first token in streaming chat completions" ,
181+ explicit_bucket_boundaries_advisory = _GEN_AI_SERVER_TIME_TO_FIRST_TOKEN_BUCKETS ,
182+ )
183+ self .streaming_time_to_generate = self .meter .create_histogram (
184+ name = Meters .LLM_STREAMING_TIME_TO_GENERATE ,
185+ unit = "s" ,
186+ description = "Time between first token and completion in streaming chat completions" ,
187+ explicit_bucket_boundaries_advisory = _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS ,
188+ )
189+ self .streaming_time_per_output_token = self .meter .create_histogram (
190+ name = Meters .LLM_STREAMING_TIME_PER_OUTPUT_TOKEN ,
191+ unit = "s" ,
192+ description = "Time per output token in streaming chat completions" ,
193+ explicit_bucket_boundaries_advisory = _GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS ,
76194 )
77195
78196 def record (self , llm_request : LlmRequest , llm_response : LlmResponse ) -> None :
@@ -99,6 +217,40 @@ def record(self, llm_request: LlmRequest, llm_response: LlmResponse) -> None:
99217 token_attributes = {** attributes , "gen_ai_token_type" : "output" }
100218 self .token_usage .record (output_token , attributes = token_attributes )
101219
220+ # TODO: Get llm duration
221+ # duration = 5.0
222+ # if self.duration_histogram:
223+ # self.duration_histogram.record(duration, attributes=attributes)
224+
225+ # Get model request error
226+ if llm_response .error_code and self .chat_exception_counter :
227+ exception_attributes = {
228+ ** attributes ,
229+ "error_type" : llm_response .error_message ,
230+ }
231+ self .chat_exception_counter .add (1 , exception_attributes )
232+
233+ # TODO: Get streaming time to first token
234+ # time_to_frist_token = 0.1
235+ # if self.streaming_time_to_first_token:
236+ # self.streaming_time_to_first_token.record(
237+ # time_to_frist_token, attributes=attributes
238+ # )
239+
240+ # TODO: Get streaming time to generate
241+ # time_to_generate = 1.0
242+ # if self.streaming_time_to_generate:
243+ # self.streaming_time_to_generate.record(
244+ # time_to_generate, attributes=attributes
245+ # )
246+
247+ # TODO: Get streaming time per output token
248+ # time_per_output_token = 0.01
249+ # if self.streaming_time_per_output_token:
250+ # self.streaming_time_per_output_token.record(
251+ # time_per_output_token, attributes=attributes
252+ # )
253+
102254
103255class APMPlusExporterConfig (BaseModel ):
104256 endpoint : str = Field (
0 commit comments