Skip to content

Commit 1fe6257

Browse files
fix(langchain): report token usage histogram (#3059)
Co-authored-by: Nir Gazit <[email protected]>
1 parent b7403d2 commit 1fe6257

File tree

4 files changed

+211
-5
lines changed

4 files changed

+211
-5
lines changed

packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/callback_handler.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from opentelemetry.instrumentation.langchain.span_utils import (
3535
SpanHolder,
3636
_set_span_attribute,
37+
extract_model_name_from_response_metadata,
3738
set_chat_request,
3839
set_chat_response,
3940
set_chat_response_usage,
@@ -446,7 +447,8 @@ def on_llm_end(
446447
id = response.llm_output.get("id")
447448
if id is not None and id != "":
448449
_set_span_attribute(span, GEN_AI_RESPONSE_ID, id)
449-
450+
if model_name is None:
451+
model_name = extract_model_name_from_response_metadata(response)
450452
token_usage = (response.llm_output or {}).get("token_usage") or (
451453
response.llm_output or {}
452454
).get("usage")
@@ -495,7 +497,7 @@ def on_llm_end(
495497
SpanAttributes.LLM_RESPONSE_MODEL: model_name or "unknown",
496498
},
497499
)
498-
set_chat_response_usage(span, response)
500+
set_chat_response_usage(span, response, self.token_histogram, token_usage is None, model_name)
499501
if should_emit_events():
500502
self._emit_llm_end_events(response)
501503
else:

packages/opentelemetry-instrumentation-langchain/opentelemetry/instrumentation/langchain/span_utils.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
CallbackFilteredJSONEncoder,
1616
should_send_prompts,
1717
)
18+
from opentelemetry.metrics import Histogram
1819
from opentelemetry.semconv_ai import (
1920
SpanAttributes,
2021
)
@@ -271,13 +272,18 @@ def set_chat_response(span: Span, response: LLMResult) -> None:
271272
i += 1
272273

273274

274-
def set_chat_response_usage(span: Span, response: LLMResult):
275+
def set_chat_response_usage(
276+
span: Span,
277+
response: LLMResult,
278+
token_histogram: Histogram,
279+
record_token_usage: bool,
280+
model_name: str
281+
) -> None:
275282
input_tokens = 0
276283
output_tokens = 0
277284
total_tokens = 0
278285
cache_read_tokens = 0
279286

280-
i = 0
281287
for generations in response.generations:
282288
for generation in generations:
283289
if (
@@ -302,7 +308,6 @@ def set_chat_response_usage(span: Span, response: LLMResult):
302308
"input_token_details", {}
303309
)
304310
cache_read_tokens += input_token_details.get("cache_read", 0)
305-
i += 1
306311

307312
if (
308313
input_tokens > 0
@@ -330,6 +335,38 @@ def set_chat_response_usage(span: Span, response: LLMResult):
330335
SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS,
331336
cache_read_tokens,
332337
)
338+
if record_token_usage:
339+
if input_tokens > 0:
340+
token_histogram.record(
341+
input_tokens,
342+
attributes={
343+
SpanAttributes.LLM_SYSTEM: "Langchain",
344+
SpanAttributes.LLM_TOKEN_TYPE: "input",
345+
SpanAttributes.LLM_RESPONSE_MODEL: model_name,
346+
},
347+
)
348+
349+
if output_tokens > 0:
350+
token_histogram.record(
351+
output_tokens,
352+
attributes={
353+
SpanAttributes.LLM_SYSTEM: "Langchain",
354+
SpanAttributes.LLM_TOKEN_TYPE: "output",
355+
SpanAttributes.LLM_RESPONSE_MODEL: model_name,
356+
},
357+
)
358+
359+
360+
def extract_model_name_from_response_metadata(response: LLMResult) -> str:
361+
for generations in response.generations:
362+
for generation in generations:
363+
if (
364+
getattr(generation, "message", None)
365+
and getattr(generation.message, "response_metadata", None)
366+
and (model_name := generation.message.response_metadata.get("model_name"))
367+
):
368+
return model_name
369+
return "unknown"
333370

334371

335372
def _set_chat_tool_calls(
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
interactions:
2+
- request:
3+
body: '{"messages": [{"content": "What is a good name for a company that makes
4+
colorful socks?", "role": "user"}], "model": "gpt-3.5-turbo", "n": 1, "stream":
5+
false, "temperature": 0.0}'
6+
headers:
7+
accept:
8+
- application/json
9+
accept-encoding:
10+
- gzip, deflate
11+
connection:
12+
- keep-alive
13+
content-length:
14+
- '178'
15+
content-type:
16+
- application/json
17+
host:
18+
- api.openai.com
19+
traceparent:
20+
- 00-d77972e6e3b0d96f4a65edea48fc5e34-92f98f134382afae-01
21+
user-agent:
22+
- OpenAI/Python 1.45.1
23+
x-stainless-arch:
24+
- arm64
25+
x-stainless-async:
26+
- 'false'
27+
x-stainless-lang:
28+
- python
29+
x-stainless-os:
30+
- MacOS
31+
x-stainless-package-version:
32+
- 1.45.1
33+
x-stainless-runtime:
34+
- CPython
35+
x-stainless-runtime-version:
36+
- 3.12.1
37+
method: POST
38+
uri: https://api.openai.com/v1/chat/completions
39+
response:
40+
body:
41+
string: !!binary |
42+
H4sIAAAAAAAAA2xQPU/DMBTc8yssz02VDwI0WydExVAB6gBCkeO8JgbHtuyX8lH1vyOHtElVFg93
43+
vnt3tw8IoaKiOaG8YchbI8Pl/c/u6mm92m1eeJpmD5s1cyq7W8VLI1s68wpdvgPHo2rOdWskoNDq
44+
j+YWGIJ3jW+SRbSIk9vrnmh1BdLLaoNhOs9C7GypwyhOskHZaMHB0Zy8BoQQsu9fn1FV8EVzEs2O
45+
SAvOsRpofvpECLVaeoQy54RDppDORpJrhaD62I9MqFJ/kufGAqvc9JeFbeeYT6k6KQf8cDordW2s
46+
Lt3An/CtUMI1hQXmtPInHGpDe/YQEPLW1+vOElNjdWuwQP0ByhsmQzs6DjqS6cChRiYnmiN+ZlZU
47+
gExIN1mHcsYbqEZlFEyaXZ78z+KvnVD1hUswOFH37RDaYitUDdZY0e/dL3kIfgEAAP//AwDTDjnS
48+
bgIAAA==
49+
headers:
50+
CF-Cache-Status:
51+
- DYNAMIC
52+
CF-RAY:
53+
- 8d38f65e8c5807d6-ATL
54+
Connection:
55+
- keep-alive
56+
Content-Encoding:
57+
- gzip
58+
Content-Type:
59+
- application/json
60+
Date:
61+
- Wed, 16 Oct 2024 15:08:07 GMT
62+
Server:
63+
- cloudflare
64+
Set-Cookie:
65+
- __cf_bm=OSPGzzsIkijMjLSfTJjIkSiCxrDnWuQJoJfatCLq_os-1729091287-1.0.1.1-VsHqFTiK6ZfwZayLMdlW8YQ1RAnQm5rP76aV3S2QPBtjrbPWclwVCrqNC0SMuxcZxXGHj2IP1r8OafsbY_cwXA;
66+
path=/; expires=Wed, 16-Oct-24 15:38:07 GMT; domain=.api.openai.com; HttpOnly;
67+
Secure; SameSite=None
68+
- _cfuvid=kMO6YhIaggKVe4UZFi5l9RM0fjUpZYSwQRUPEBZNJr8-1729091287006-0.0.1.1-604800000;
69+
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
70+
Transfer-Encoding:
71+
- chunked
72+
X-Content-Type-Options:
73+
- nosniff
74+
access-control-expose-headers:
75+
- X-Request-ID
76+
alt-svc:
77+
- h3=":443"; ma=86400
78+
openai-organization:
79+
- traceloop
80+
openai-processing-ms:
81+
- '160'
82+
openai-version:
83+
- '2020-10-01'
84+
strict-transport-security:
85+
- max-age=31536000; includeSubDomains; preload
86+
x-ratelimit-limit-requests:
87+
- '5000'
88+
x-ratelimit-limit-tokens:
89+
- '4000000'
90+
x-ratelimit-remaining-requests:
91+
- '4999'
92+
x-ratelimit-remaining-tokens:
93+
- '3999968'
94+
x-ratelimit-reset-requests:
95+
- 12ms
96+
x-ratelimit-reset-tokens:
97+
- 0s
98+
x-request-id:
99+
- req_1f22ead323fed2094d4f3f5c0187205d
100+
status:
101+
code: 200
102+
message: OK
103+
version: 1

packages/opentelemetry-instrumentation-langchain/tests/metrics/test_langchain_metrics.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from unittest.mock import patch
12
import pytest
23
from langchain.chains import LLMChain
34
from langchain.prompts import PromptTemplate
@@ -114,3 +115,66 @@ def test_llm_chain_streaming_metrics(instrument_legacy, reader, llm):
114115

115116
assert found_token_metric is True
116117
assert found_duration_metric is True
118+
119+
120+
def verify_token_metrics(data_points):
121+
for data_point in data_points:
122+
assert data_point.attributes[SpanAttributes.LLM_TOKEN_TYPE] in [
123+
"output",
124+
"input",
125+
]
126+
assert data_point.sum > 0
127+
assert data_point.attributes[SpanAttributes.LLM_SYSTEM] == "Langchain"
128+
129+
130+
def verify_duration_metrics(data_points):
131+
assert any(data_point.count > 0 for data_point in data_points)
132+
assert any(data_point.sum > 0 for data_point in data_points)
133+
for data_point in data_points:
134+
assert data_point.attributes[SpanAttributes.LLM_SYSTEM] == "Langchain"
135+
136+
137+
def verify_langchain_metrics(reader):
138+
metrics_data = reader.get_metrics_data()
139+
resource_metrics = metrics_data.resource_metrics
140+
assert len(resource_metrics) > 0
141+
142+
found_token_metric = False
143+
found_duration_metric = False
144+
145+
for rm in resource_metrics:
146+
for sm in rm.scope_metrics:
147+
for metric in sm.metrics:
148+
if metric.name == Meters.LLM_TOKEN_USAGE:
149+
found_token_metric = True
150+
verify_token_metrics(metric.data.data_points)
151+
152+
if metric.name == Meters.LLM_OPERATION_DURATION:
153+
found_duration_metric = True
154+
verify_duration_metrics(metric.data.data_points)
155+
156+
return found_token_metric, found_duration_metric
157+
158+
159+
@pytest.mark.vcr
160+
def test_llm_chain_metrics_with_none_llm_output(instrument_legacy, reader, chain, llm):
161+
"""
162+
This test verifies that the metrics system correctly handles edge cases where the
163+
LLM response contains a None value in the llm_output field, ensuring that token
164+
usage and operation duration metrics are still properly recorded.
165+
"""
166+
original_generate = llm._generate
167+
168+
# Create a patched version that returns results with None llm_output
169+
def patched_generate(*args, **kwargs):
170+
result = original_generate(*args, **kwargs)
171+
result.llm_output = None
172+
return result
173+
174+
with patch.object(llm, '_generate', side_effect=patched_generate):
175+
chain.run(product="colorful socks")
176+
177+
found_token_metric, found_duration_metric = verify_langchain_metrics(reader)
178+
179+
assert found_token_metric is True, "Token usage metrics not found"
180+
assert found_duration_metric is True, "Operation duration metrics not found"

0 commit comments

Comments
 (0)