Skip to content

Commit 93d89b7

Browse files
committed
feat: add metrics
1 parent ecc27d7 commit 93d89b7

File tree

3 files changed

+148
-2
lines changed

3 files changed

+148
-2
lines changed

veadk/tracing/telemetry/exporters/apmplus_exporter.py

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,13 @@
3030

3131
from veadk.config import getenv
3232
from veadk.tracing.telemetry.exporters.base_exporter import BaseExporter
33+
from veadk.tracing.telemetry.metrics.buckets import (
34+
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS,
35+
_GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS,
36+
_GEN_AI_SERVER_TIME_TO_FIRST_TOKEN_BUCKETS,
37+
_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS,
38+
)
39+
from veadk.tracing.telemetry.metrics import Meters
3340
from veadk.utils.logger import get_logger
3441

3542
logger = get_logger(__name__)
@@ -65,14 +72,45 @@ def __init__(
6572

6673
# create meter attributes
6774
self.llm_invoke_counter = self.meter.create_counter(
68-
name="gen_ai.chat.count",
75+
name=Meters.LLM_CHAT_COUNT,
6976
description="Number of LLM invocations",
7077
unit="count",
7178
)
7279
self.token_usage = self.meter.create_histogram(
73-
name="gen_ai.client.token.usage",
80+
name=Meters.LLM_TOKEN_USAGE,
7481
description="Token consumption of LLM invocations",
7582
unit="count",
83+
explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS
84+
)
85+
self.duration_histogram = self.meter.create_histogram(
86+
name=Meters.LLM_OPERATION_DURATION,
87+
unit="s",
88+
description="GenAI operation duration",
89+
explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
90+
)
91+
# 统计 exception,酌情判断是否好获取?
92+
self.chat_exception_counter = self.meter.create_counter(
93+
name=Meters.LLM_COMPLETIONS_EXCEPTIONS,
94+
unit="time",
95+
description="Number of exceptions occurred during chat completions",
96+
)
97+
self.streaming_time_to_first_token = self.meter.create_histogram(
98+
name=Meters.LLM_STREAMING_TIME_TO_FIRST_TOKEN,
99+
unit="s",
100+
description="Time to first token in streaming chat completions",
101+
explicit_bucket_boundaries_advisory=_GEN_AI_SERVER_TIME_TO_FIRST_TOKEN_BUCKETS
102+
)
103+
self.streaming_time_to_generate = self.meter.create_histogram(
104+
name=Meters.LLM_STREAMING_TIME_TO_GENERATE,
105+
unit="s",
106+
description="Time between first token and completion in streaming chat completions",
107+
explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
108+
)
109+
self.streaming_time_per_output_token = self.meter.create_histogram(
110+
name=Meters.LLM_STREAMING_TIME_PER_OUTPUT_TOKEN,
111+
unit="s",
112+
description="Time per output token in streaming chat completions",
113+
explicit_bucket_boundaries_advisory=_GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS
76114
)
77115

78116
def record(self, llm_request: LlmRequest, llm_response: LlmResponse) -> None:
@@ -99,6 +137,31 @@ def record(self, llm_request: LlmRequest, llm_response: LlmResponse) -> None:
99137
token_attributes = {**attributes, "gen_ai_token_type": "output"}
100138
self.token_usage.record(output_token, attributes=token_attributes)
101139

140+
# TODO: get llm duration
141+
duration = 5.0
142+
if self.duration_histogram:
143+
self.duration_histogram.record(duration, attributes=attributes)
144+
# TODO: get streaming time to first token
145+
time_to_frist_token = 0.1
146+
if self.streaming_time_to_first_token:
147+
self.streaming_time_to_first_token.record(time_to_frist_token, attributes=attributes)
148+
# TODO: get streaming time to generate
149+
time_to_generate = 1.0
150+
if self.streaming_time_to_generate:
151+
self.streaming_time_to_generate.record(time_to_generate, attributes=attributes)
152+
# TODO: get streaming time per output token
153+
time_per_output_token = 0.01
154+
if self.streaming_time_per_output_token:
155+
self.streaming_time_per_output_token.record(time_per_output_token, attributes=attributes)
156+
157+
# TODO: catch exception
158+
e = Exception("test")
159+
exception_attributes = {**attributes, "error_type": e.__class__.__name__}
160+
161+
if self.chat_exception_counter:
162+
self.chat_exception_counter.add(1, exception_attributes)
163+
164+
102165

103166
class APMPlusExporterConfig(BaseModel):
104167
endpoint: str = Field(
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
class Meters:
2+
LLM_CHAT_COUNT = "gen_ai.chat.count"
3+
LLM_TOKEN_USAGE = "gen_ai.client.token.usage"
4+
LLM_OPERATION_DURATION = "gen_ai.client.operation.duration"
5+
LLM_COMPLETIONS_EXCEPTIONS = "gen_ai.chat_completions.exceptions"
6+
LLM_STREAMING_TIME_TO_FIRST_TOKEN = (
7+
"gen_ai.chat_completions.streaming_time_to_first_token"
8+
)
9+
LLM_STREAMING_TIME_TO_GENERATE = (
10+
"gen_ai.chat_completions.streaming_time_to_generate"
11+
)
12+
LLM_STREAMING_TIME_PER_OUTPUT_TOKEN = (
13+
"gen_ai.chat_completions.streaming_time_per_output_token"
14+
)
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS = [
2+
0.01,
3+
0.02,
4+
0.04,
5+
0.08,
6+
0.16,
7+
0.32,
8+
0.64,
9+
1.28,
10+
2.56,
11+
5.12,
12+
10.24,
13+
20.48,
14+
40.96,
15+
81.92,
16+
]
17+
18+
_GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS = [
19+
0.01,
20+
0.025,
21+
0.05,
22+
0.075,
23+
0.1,
24+
0.15,
25+
0.2,
26+
0.3,
27+
0.4,
28+
0.5,
29+
0.75,
30+
1.0,
31+
2.5,
32+
]
33+
34+
_GEN_AI_SERVER_TIME_TO_FIRST_TOKEN_BUCKETS = [
35+
0.001,
36+
0.005,
37+
0.01,
38+
0.02,
39+
0.04,
40+
0.06,
41+
0.08,
42+
0.1,
43+
0.25,
44+
0.5,
45+
0.75,
46+
1.0,
47+
2.5,
48+
5.0,
49+
7.5,
50+
10.0,
51+
]
52+
53+
_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS = [
54+
1,
55+
4,
56+
16,
57+
64,
58+
256,
59+
1024,
60+
4096,
61+
16384,
62+
65536,
63+
262144,
64+
1048576,
65+
4194304,
66+
16777216,
67+
67108864,
68+
]
69+

0 commit comments

Comments
 (0)